In [None]:
import cv2
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
plt.style.use('seaborn-poster')
import os
import random
import gc
import pickle

In [None]:
num_classes = 2

## Read in the images

In [None]:
train_dir = '../vm-datasets/training_data/'

train_damage = glob.glob(train_dir + 'damage/*jpg')
train_other = glob.glob(train_dir + 'other/*jpg')
print('There are %d damage image, %d non-damage images'%(len(train_damage), len(train_other)))

if num_classes == 3:
    train_unclear = glob.glob(train_dir + 'unclear/*jpg')
    print('with %d unclear images.'%(len(train_unclear)))

## Prepare data

In [None]:
train_imgs = train_damage + train_other

if num_classes == 3:
    train_imgs +=  train_unclear

# add a seed here so that we can reproduce
np.random.seed(42)
random.shuffle(train_imgs)

print('We have %d total images:'%len(train_imgs))

In [None]:
nrows = 150
ncolumns = 150
channels = 3

In [None]:
from tqdm import tqdm
def read_and_process_image(list_of_images, num_classes):
    X = []
    y = []
    
    for image in tqdm(list_of_images):
        img_temp = cv2.resize(cv2.imread(image, cv2.IMREAD_COLOR), (nrows, ncolumns), interpolation=cv2.INTER_CUBIC)
        b,g,r = cv2.split(img_temp)           # get b, g, r
        rgb_img1 = cv2.merge([r,g,b])     # switch it to r, g, b

        X.append(rgb_img1)
        if 'damage' in image:
            y.append(1)
        elif 'other' in image:
            y.append(0)
            
        if num_classes == 3:
            if 'unclear' in image:
                y.append(2)
            
    return X, y

In [None]:
X, y = read_and_process_image(train_imgs, num_classes=num_classes)
X = np.array(X)
y = np.array(y)

## Calculate the class weights and change to categorical

In [None]:
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(y),
                                                 y)

In [None]:
# convert data to float 32 to save memory
X = X.astype(np.float32)
# convert the label to categorical
y = tf.keras.utils.to_categorical(
    y, num_classes=num_classes, dtype='float32'
)

## Split data

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=2)

In [None]:
import tensorflow.keras as keras
from keras import layers
from keras import models
from keras import optimizers
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing.image import img_to_array, load_img
from keras.callbacks import ModelCheckpoint

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True, shear_range=0.2)
val_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
ntrain = len(X_train)
nval = len(X_val)
batch_size = 16

In [None]:
train_generator = train_datagen.flow(X_train, y_train, batch_size=batch_size)
val_generator = val_datagen.flow(X_val, y_val, batch_size=batch_size)
#test_generator = test_datagen.flow(X_test, y_test, batch_size=batch_size)

In [None]:
from keras.applications import VGG19

conv_base = VGG19(weights='imagenet',
                  include_top=False,
                  input_shape=(150, 150, 3))

In [None]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))

In [None]:
# take a look of the VGG19 model
model.layers[0].summary()

In [None]:
model.summary()

In [None]:
print('This is the number of trainable weights '
         'before freezing the conv base:', len(model.trainable_weights))


In [None]:
conv_base.trainable = True

# I usually like to make this more clear which layer we will re-train
layers_to_be_trained = ['block5_conv1', 'block5_conv2', 'block5_conv3', 'block5_conv4']

for layer in conv_base.layers:
    if layer.name in layers_to_be_trained:
        layer.trainable = True
    else:
        layer.trainable = False
        
# double check
[(layer.name, layer.trainable) for layer in conv_base.layers]

In [None]:
if num_classes == 3:
    output_path = '../outputs/working/3_class_VGG19/'
else:
    output_path = '../outputs/working/2_class_VGG19/'


checkpoint_val_loss = ModelCheckpoint(output_path + 'Best_loss_model-{epoch:03d}-{loss:03f}-{val_loss:03f}.h5', verbose=1, monitor='val_loss',save_best_only=True, mode='auto')
checkpoint_val_acc = ModelCheckpoint(output_path + 'Best_acc_model-{epoch:03d}-{acc:03f}-{val_acc:03f}.h5', verbose=1, monitor='val_acc',save_best_only=True, mode='auto')

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-5),
              metrics=['acc'])

history = model.fit_generator(
      train_generator,
      steps_per_epoch=100,
      epochs=50,
      class_weight=class_weights,
      validation_data=val_generator,
      validation_steps=50, 
     callbacks=[checkpoint_val_loss, checkpoint_val_acc])

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
epochs = range(1, len(acc) + 1)
plt.figure(figsize=(12, 8))
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.legend()
plt.xlabel('Training Epoch')
plt.ylabel('Accuracy')
plt.savefig(output_path + 'training_curve.pdf')

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.figure(figsize=(12, 8))
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.legend()
plt.xlabel('Training Epoch')
plt.ylabel('Loss')
plt.savefig(output_path + 'training_loss_curve.pdf')

In [None]:
with open(output_path + 'trainHistoryDict.pkl', 'wb') as file_pi:
    pickle.dump(history.history, file_pi)