# Assignment
- [ ] Pascal VOC 데이터셋을 학습해 세그멘테이션을 하는 UNet 모델 만들기
- [ ] Tensorboard.dev를 통해 학습 결과 공유하기
- [ ] CAM 기법으로 학습된 결과 확인하기

# 데이터 불러오기

In [None]:
!pip install datasets huggingface_hub

In [None]:
import datasets
from datasets import load_dataset

dataset = load_dataset('nateraw/pascal-voc-2012', split='train')

# 데이터셋 분석

In [None]:
print(type(dataset))

In [None]:
print(dataset)

In [None]:
print(len(dataset['image']))

In [None]:
print(type(dataset['image'][0]))

In [None]:
print(dataset['image'][0])

In [None]:
import io
from PIL import Image

image_bytes = io.BytesIO(dataset['image'][1]['bytes'])
image = Image.open(image_bytes)

In [None]:
import matplotlib.pyplot as plt

print(image.size)
plt.imshow(image)
plt.show()

In [None]:
import tensorflow as tf

image_tensor = tf.convert_to_tensor(image, dtype='float32')
print(image_tensor.shape)
print(image_tensor.dtype)

In [None]:
import numpy as np

def bytes_to_numpy(bytes_string, image_size=(280, 280)):
    image_bytes = io.BytesIO(bytes_string)
    image = Image.open(image_bytes).resize(image_size)
    return np.array(image)

In [None]:
print(type(dataset['mask']))

In [None]:
print(dataset['mask'][0])

In [None]:
mask_img = bytes_to_numpy(dataset['mask'][0]['bytes'])

In [None]:
print(mask_img.shape)

In [None]:
plt.imshow(tf.cast(mask_img, 'uint8'))
plt.show()

# 데이터셋 만들기

In [None]:
#reference: https://d2l.ai/chapter_computer-vision/semantic-segmentation-and-dataset.html

VOC_COLORMAP = np.array([[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
                [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
                [64, 0, 0], [192, 0, 0], [64, 128, 0], [192, 128, 0],
                [64, 0, 128], [192, 0, 128], [64, 128, 128], [192, 128, 128],
                [0, 64, 0], [128, 64, 0], [0, 192, 0], [128, 192, 0],
                [0, 64, 128]])

VOC_CLASSES = ['background', 'aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
               'diningtable', 'dog', 'horse', 'motorbike', 'person',
               'potted plant', 'sheep', 'sofa', 'train', 'tv/monitor']

border = np.array([224, 224, 192])
num_classes = len(VOC_CLASSES)

In [None]:
def convert_to_int_labels(image_numpy):
    original_shape = image_numpy.shape
    image_numpy = image_numpy.reshape(-1, 3)
    labels = np.zeros(image_numpy.shape[0])
    for i, rgb in enumerate(image_numpy):
        if (rgb == border).all():
            labels[i] = 0.
        else:
            colormap_matched = (rgb == VOC_COLORMAP).all(axis=-1)
            labels[i] = np.argmax(colormap_matched.astype('float32'))
    return labels.reshape(original_shape[:-1])

def to_image_array(row):
    new_row = {}
    new_row['image'] = bytes_to_numpy(row['image']['bytes'])
    new_row['mask'] = bytes_to_numpy(row['mask']['bytes'])
    new_row['mask'] = convert_to_int_labels(new_row['mask'])
    return row

In [None]:
dataset = dataset.map(to_image_array, num_proc=8)

In [None]:
dataset = dataset.to_tf_dataset(columns=['image'],
                                label_cols=['mask'],
                                batch_size=256,
                                shuffle=False)

In [None]:
for images, masks in dataset:
    print(images[0].shape)
    print(images[0].dtype)
    print(masks[0].shape)
    print(masks[0].dtype)
    break

# UNet 모델 훈련

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

down_samplings = []

inputs = keras.Input(shape=(280, 280, 3))

x = inputs
for num_filters in [64, 128, 256, 512]:
    x = layers.Conv2D(num_filters, 3, activation='relu')(x)
    x = layers.Conv2D(num_filters, 3, activation='relu')(x)
    down_samplings.append(x)
    x = layers.MaxPooling2D(2)(x)

x = layers.Conv2D(1024, 3)(x)
x = layers.Conv2D(1024, 3)(x)

for num_filters in [512, 256, 128, 64]:
    x = layers.Conv2DTranspose(num_filters, strides=2, padding='same', activation='relu')(x)
    donw_conv = down_samplings.pop()
    x = layers.Concatenate(axis=-1)([down_conv, x])
    x = layers.Conv2D(num_filters, activation='relu')(x)
    x = layers.Conv2D(num_filters, activation='relu')(x)

outputs = layers.Conv2D(num_classes, 1, activation='softmax')(x)

model = keras.Model(inputs, outputs)
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.summary()

In [None]:
callbacks = [
    keras.callbacks.ModelCheckpoint('UNet_Pascal_VOC.keras',
                                    save_best_only=True),
    keras.callbacks.TensorBoard('./logs')
]

history = model.fit(dataset, epochs=20, callbacks=callbacks)

# tensorboard.dev로 학습 결과 공유하기

In [None]:
!pip install -U tensorboard

In [None]:
!tensorboard dev upload --logdir logs \
    --name "AIFFLE Main Quest 2 by SteelBear" \
    --description "Pascal VOC semantic segmentation with UNet"

# Class Activation Map을 통해 모델 검증하기