<a href="https://colab.research.google.com/github/skurakake/MMP_public/blob/main/Convolution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 初期設定、cifar10 データのダウンロード
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator

(x_train, t_train), (x_test, t_test) = cifar10.load_data()
print("Image size:", x_train[0].shape)
print(x_test[0].shape)

# ラベルの設定
cifar10_labels = np.array(["airplane", "automobile", "bird", "cat",
                           "deer", "dog", "frog", "horse", "ship", "truck"])

# num_image の個数だけ、ランダムに画僧を選んで表示
num_image = 20
rand_idx = np.random.randint(0, len(x_train), num_image)
plt.figure(figsize=(16, 16))

for i in range(num_image):
    plt.subplot(4, 5, i + 1)
    plt.imshow(x_train[rand_idx[i]])
    plt.title(cifar10_labels[t_train[rand_idx[i]][0]])
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)

plt.show()

In [2]:
# 関数 img2col を定義　img の畳み込み処理対象の領域 kernel_height * kernel_width * chanel_no 分を
# を1次元にして、col の各行へ格納
def img2col(img, ker_h, ker_w, pad_h_value, pad_w_value, stride_h, stride_w, batch_size):

    img_h, img_w, chanel_in = img.shape

    pad_h = ker_h//2
    pad_w = ker_w//2
    out_h = (img_h - ker_h)// stride_h + 1 # out_h = (img_h - ker_h+2*pad_h)// stride_h + 1
    out_w = (img_w - ker_w) // stride_w + 1 # out_w = (img_w - ker_w+2*pad_w) // stride_w + 1

    # Add padding to the image if necessary
    #img_padded = np.pad(img, ((pad_h, pad_h), (pad_w, pad_w), (pad_h_value, pad_h_value)), mode='constant')

    # Calculate dimensions after padding
   # img_h_padded, img_w_padded, _ = img_padded.shape

    # Calculate the number of patches
    num_patches = out_h * out_w # = img_h_padded * img_w_padded,

    # Initialize the output array
    out = np.zeros((ker_h * ker_w * chanel_in, num_patches * batch_size))
    #out = np.zeros((ker_h * ker_w * chanel_no, img_h_padded * img_w_padded * batch_size))

    patch_index = 0
    for b in range(batch_size):
        # for h in range(0, img_h_padded - ker_h + 1, stride_h):
        #     for w in range(0, img_w_padded - ker_w + 1, stride_w):
        for h in range(0, out_h, stride_h):
            for w in range(0, out_w, stride_w):
                # Extract the patch
                #patch = img_padded[h:h + ker_h, w:w + ker_w, :]
                patch = img[h:h + ker_h, w:w + ker_w, :]
                # Flatten the patch and assign it to a column in the output array
                out[:, patch_index] = patch.reshape(-1)
                patch_index += 1

    return out, out_h, out_w

In [None]:
# 画像データの格納形式の違いを確認
temp_image = np.array([[[1,2,3], [5,6,7],[8,9,10]],[[10,20,30], [50,60,70],[80,90,100]]])
print(temp_image)
print(temp_image.shape)
# print("cifar10data:", x_train[0])
# print("cifar10data size:", x_train[0].shape) # 画素のRBG値が最下層の点
temp_image2 = np.array([[[1,10], [2,20], [3, 30]],[[5,50], [6,60],[7, 70]],[[8,80],[9,90],[10,100]]])
print(temp_image2)
print(temp_image2.shape)

In [None]:
# kernel の設定



stride_h=1
stride_w = 1
pad_h_value=0
pad_w_value =0

# kernel の設定
ker_1 = np.array([[-1,2,-1],
                 [-1,2,-1],
                 [-1,2,-1]])
ker_2 = np.array([[-1,2,-1],
                 [-1,2,-1],
                 [-1,2,-1]])
ker_3 = np.array([[-1,2,-1],
                 [-1,2,-1],
                 [-1,2,-1]])

ker_h, ker_w = ker_1.shape

all_kernel = np.stack((ker_1, ker_2, ker_3), axis=2)
print(all_kernel.shape)
print(all_kernel)

all_kernel = np.array(all_kernel).reshape(-1)
print(all_kernel)


In [None]:
# num_image で指定された個数だけ、ランダムに選ばれた画像に対して畳み込み処理をおこなって表示する。
num_image = 10
rand_idx = np.random.randint(0, len(x_train), num_image)

plt.figure(
    figsize=(4, 35),
    dpi = 200,
    facecolor = "lightblue",
    )

batch_size = 1

for i in range(num_image):
    plt.subplot(2*num_image, 2, 2*i + 1)
    image = x_train[rand_idx[i]]
    plt.imshow(image)
    plt.title(cifar10_labels[t_train[rand_idx[i]][0]])
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    #image = x_train[rand_idx[i]]

    cols, out_h, out_w = img2col(image, ker_h, ker_w, pad_h_value, pad_w_value, stride_h, stride_w, batch_size)
    image_out = np.dot(all_kernel, cols) # 行列の積。これで畳み込みの計算が全て行われる。
    image_out = image_out.reshape(out_h, out_w)

    # Normalize the output values to the range [0, 255] before converting to uint8
    image_float32 = image_out.astype(np.float32)
    normalized_image_out = (image_float32 - np.min(image_float32)) / (np.max(image_float32) - np.min(image_float32)) * 255
    normalized_image_out = normalized_image_out.astype(np.uint8) # Convert to uint8 for displaying images

    plt.subplot(2*num_image, 2, 2*i + 2)
    plt.imshow(normalized_image_out, cmap='gray') # Use gray colormap for single channel output
    plt.title(cifar10_labels[t_train[rand_idx[i]][0]])
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)

plt.show()