In [None]:
import pandas as pd
import os
import subprocess
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.keras import layers, models, applications
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from pathlib import Path


# Helpers


In [None]:
def plotHistory(history):
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    epochs = range(len(acc))
    plt.plot(epochs, acc, label="Training accuracy")
    plt.plot(epochs, val_acc, label="Validation accuracy")
    plt.legend()
    plt.title('Training and validation accuracy')
    plt.figure()

    plt.plot(epochs, loss, label="Training loss")
    plt.plot(epochs, val_loss, label="Validation loss")
    plt.legend()
    plt.title('Training and validation loss')


In [None]:
checkpoint_path = './checkpoints/lab2/'


def get_cp_callback(path):
    return tf.keras.callbacks.ModelCheckpoint(filepath=path,
                                              save_weights_only=True,
                                              save_freq="epoch",
                                              verbose=1)


def purge_checkpoints():
    if os.path.isdir(checkpoint_path):
        subprocess.run(['rm', '-rf', checkpoint_path])
        subprocess.run(['mkdir', checkpoint_path])


# Data processing


In [None]:
dataset_path = Path(r'./data/lab2')

file_path = list(dataset_path.glob(r'**/*.png'))

# create labels from the folder name
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], file_path))


In [None]:
file_path = pd.Series(file_path).astype(str)
labels = pd.Series(labels)

df = pd.concat([file_path, labels], axis=1)

df.columns = ['image', 'label']

df.head()


In [None]:
fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(
    15, 10), subplot_kw={'xticks': [], 'yticks': []})
for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(df.image[i]))
    ax.set_title(df.label[i])

plt.show()


In [None]:
df = df[df['label'].apply(lambda x: x[-2:] != 'GT')].reset_index(drop=True)
df.label.value_counts()

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test = train_test_split(df, test_size=0.2, random_state=30)
x_train, x_val = train_test_split(x_train, test_size=0.3, random_state=30)

print("Shape of training data", x_train.shape)
print("Shape of test_ds data", x_test.shape)
print("Shape of validation data", x_val.shape)


# Initial model

In [None]:
image_data_generator = ImageDataGenerator()

train_ds = image_data_generator.flow_from_dataframe(dataframe=x_train,
                                                    x_col='image',
                                                    y_col='label',
                                                    target_size=(200, 200),
                                                    color_mode='rgb',
                                                    class_mode='categorical')

test_ds = image_data_generator.flow_from_dataframe(dataframe=x_test,
                                                   x_col='image',
                                                   y_col='label',
                                                   target_size=(200, 200),
                                                   color_mode='rgb',
                                                   class_mode='categorical')

val_ds = image_data_generator.flow_from_dataframe(dataframe=x_val,
                                                  x_col='image',
                                                  y_col='label',
                                                  target_size=(200, 200),
                                                  color_mode='rgb',
                                                  class_mode='categorical')


In [None]:
input_shape = (200, 200, 3)
checkpoint_path = "./checkpoints/lab2/init/"
purge_checkpoints()

model = models.Sequential([
    layers.Conv2D(64, (3, 3), activation='relu', input_shape=input_shape),
    layers.MaxPool2D(pool_size=(2, 2)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPool2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(9, activation='softmax')
])


In [None]:
model.compile(optimizer="adam",
              loss='categorical_crossentropy',
              metrics=["acc"])

history = model.fit(train_ds,
                    validation_data=val_ds,
                    callbacks=[get_cp_callback(checkpoint_path)],
                    epochs=10,
                    use_multiprocessing=True,
                    workers=4)

plotHistory(history)
model.save('./models/lab2/init/')


# Augmentation

In [None]:
batch_size = 64

image_data_generator = ImageDataGenerator(rescale=1./255,
                                          rotation_range=40,
                                          width_shift_range=0.2,
                                          height_shift_range=0.2,
                                          shear_range=0.2,
                                          zoom_range=0.2,
                                          horizontal_flip=True,
                                          fill_mode='nearest')

train_ds = image_data_generator.flow_from_dataframe(dataframe=x_train,
                                                 x_col='image',
                                                 y_col='label',
                                                 target_size=(200, 200),
                                                 color_mode='rgb',
                                                 class_mode='categorical',
                                                 batch_size=batch_size)

test_ds = image_data_generator.flow_from_dataframe(dataframe=x_test,
                                                x_col='image',
                                                y_col='label',
                                                target_size=(200, 200),
                                                color_mode='rgb',
                                                class_mode='categorical',
                                                batch_size=batch_size)

val_ds = image_data_generator.flow_from_dataframe(dataframe=x_val,
                                               x_col='image',
                                               y_col='label',
                                               target_size=(200, 200),
                                               color_mode='rgb',
                                               class_mode='categorical',
                                               batch_size=batch_size)


In [None]:
input_shape = (200, 200, 3)
checkpoint_path = "./checkpoints/lab2/augm/"
purge_checkpoints()

model = models.Sequential([
    layers.Conv2D(64, (3, 3), activation='relu', input_shape=input_shape),
    layers.MaxPool2D(pool_size=(2, 2)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPool2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(9, activation='softmax')
])

# model.summary()


In [None]:
model.compile(optimizer="adam",
              loss='categorical_crossentropy',
              metrics=["acc"])

history = model.fit(train_ds,
                    validation_data=val_ds,
                    callbacks=[get_cp_callback(checkpoint_path)],
                    epochs=10,
                    use_multiprocessing=True,
                    workers=4)

plotHistory(history)
model.save('./models/lab2/augm/')

# 100x100 Image size

In [None]:
batch_size = 64

image_data_generator = ImageDataGenerator(rescale=1./255,
                                          rotation_range=40,
                                          width_shift_range=0.2,
                                          height_shift_range=0.2,
                                          shear_range=0.2,
                                          zoom_range=0.2,
                                          horizontal_flip=True,
                                          fill_mode='nearest')

train_ds = image_data_generator.flow_from_dataframe(dataframe=x_train,
                                                 x_col='image',
                                                 y_col='label',
                                                 target_size=(100, 100),
                                                 color_mode='rgb',
                                                 class_mode='categorical',
                                                 batch_size=batch_size)

test_ds = image_data_generator.flow_from_dataframe(dataframe=x_test,
                                                x_col='image',
                                                y_col='label',
                                                target_size=(100, 100),
                                                color_mode='rgb',
                                                class_mode='categorical',
                                                batch_size=batch_size)

val_ds = image_data_generator.flow_from_dataframe(dataframe=x_val,
                                               x_col='image',
                                               y_col='label',
                                               target_size=(100, 100),
                                               color_mode='rgb',
                                               class_mode='categorical',
                                               batch_size=batch_size)

In [None]:
input_shape = (100, 100, 3)
checkpoint_path = "./checkpoints/lab2/100x100/"
purge_checkpoints()

model = models.Sequential([
    layers.Conv2D(64, (3, 3), activation='relu', input_shape=input_shape),
    layers.MaxPool2D(pool_size=(2, 2)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPool2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(9, activation='softmax')
])

# model.summary()

In [None]:
model.compile(optimizer="adam",
              loss='categorical_crossentropy',
              metrics=["acc"])

history = model.fit(train_ds,
                    validation_data=val_ds,
                    callbacks=[
                        get_cp_callback(checkpoint_path)
                        # tf.keras.callbacks.EarlyStopping(
                        #     monitor='val_loss',
                        #     patience=1,
                        #     restore_best_weights=True
                        # )
                    ],
                    epochs=20)

plotHistory(history)
model.save('./models/lab2/100x100/')


# Transfer learning (ResNet)

In [None]:
batch_size = 64

image_data_generator = ImageDataGenerator(rescale=1./255,
                                          rotation_range=40,
                                          width_shift_range=0.2,
                                          height_shift_range=0.2,
                                          shear_range=0.2,
                                          zoom_range=0.2,
                                          horizontal_flip=True,
                                          fill_mode='nearest')

train_ds = image_data_generator.flow_from_dataframe(dataframe=x_train,
                                                 x_col='image',
                                                 y_col='label',
                                                 target_size=(200, 200),
                                                 color_mode='rgb',
                                                 class_mode='categorical',
                                                 batch_size=batch_size)

test_ds = image_data_generator.flow_from_dataframe(dataframe=x_test,
                                                x_col='image',
                                                y_col='label',
                                                target_size=(200, 200),
                                                color_mode='rgb',
                                                class_mode='categorical',
                                                batch_size=batch_size)

val_ds = image_data_generator.flow_from_dataframe(dataframe=x_val,
                                               x_col='image',
                                               y_col='label',
                                               target_size=(200, 200),
                                               color_mode='rgb',
                                               class_mode='categorical',
                                               batch_size=batch_size)

In [None]:
input_shape = (200, 200, 3)
checkpoint_path = "./checkpoints/lab2/resn/"
purge_checkpoints()

base_model = applications.ResNet50V2(include_top=False,
                                      input_shape=input_shape)
base_model.trainable = False

x = layers.Flatten()(base_model.output)
x = layers.Dense(128, activation="relu")(x)
output_layer = layers.Dense(9, activation='softmax')(x)

model = models.Model(inputs=base_model.inputs,
                     outputs=output_layer)



In [None]:
model.compile(optimizer="adam",
              loss='categorical_crossentropy',
              metrics=["acc"])

history = model.fit(train_ds,
                    validation_data=val_ds,
                    callbacks=[
                        get_cp_callback(checkpoint_path),
                        tf.keras.callbacks.EarlyStopping(
                            monitor='val_loss',
                            patience=1,
                            restore_best_weights=True
                        )
                    ],
                    epochs=10)

plotHistory(history)
model.save('./models/lab2/resn/')


# Transfer learning (MobileNet)

In [None]:
batch_size = 64

image_data_generator = ImageDataGenerator()

train_ds = image_data_generator.flow_from_dataframe(dataframe=x_train,
                                                    x_col='image',
                                                    y_col='label',
                                                    target_size=(200, 200),
                                                    color_mode='rgb',
                                                    class_mode='categorical',
                                                    batch_size=batch_size)

test_ds = image_data_generator.flow_from_dataframe(dataframe=x_test,
                                                   x_col='image',
                                                   y_col='label',
                                                   target_size=(200, 200),
                                                   color_mode='rgb',
                                                   class_mode='categorical',
                                                   batch_size=batch_size)

val_ds = image_data_generator.flow_from_dataframe(dataframe=x_val,
                                                  x_col='image',
                                                  y_col='label',
                                                  target_size=(200, 200),
                                                  color_mode='rgb',
                                                  class_mode='categorical',
                                                  batch_size=batch_size)


In [None]:
input_shape = (200, 200, 3)
checkpoint_path = "./checkpoints/lab2/mobn/"
purge_checkpoints()

base_model = applications.MobileNetV2(include_top=False,
                                      input_shape=input_shape)
base_model.trainable = False

x = layers.Flatten()(base_model.output)
x = layers.Dense(128, activation="relu")(x)
output_layer = layers.Dense(9, activation='softmax')(x)

model = models.Model(inputs=base_model.inputs,
                     outputs=output_layer)


In [None]:
model.compile(optimizer="adam",
              loss='categorical_crossentropy',
              metrics=["acc"])

history = model.fit(train_ds,
                    validation_data=val_ds,
                    callbacks=[get_cp_callback(checkpoint_path)],
                    epochs=5)

plotHistory(history)
model.save('./models/lab2/mobn/')