In [None]:
!pip install image-classifiers==1.0.0b1

In [None]:
# Library imports
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from skimage.io import imread
from skimage.transform import resize
from sklearn.metrics import accuracy_score
import re
from classification_models.tfkeras import Classifiers

In [None]:
# model/training variables. batch size must be divisor of number of training, validation, and test records.
BATCH_SIZE = 30

In [None]:
# https://medium.com/@mrgarg.rajat/training-on-large-datasets-that-dont-fit-in-memory-in-keras-60a974785d71
class My_Custom_Generator(tf.keras.utils.Sequence) :
  
  def __init__(self, image_filenames, labels, batch_size, IMG_HEIGHT, IMG_WIDTH) :
    self.image_filenames = image_filenames
    self.labels = labels
    self.batch_size = batch_size
    self.IMG_HEIGHT = IMG_HEIGHT
    self.IMG_WIDTH = IMG_WIDTH
    
    
  def __len__(self) :
    return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
  
  
  def __getitem__(self, idx) :
    batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
    batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
    
    return np.array([
            np.resize(imread(str(file_name)), (self.IMG_HEIGHT, self.IMG_WIDTH, 3))
               for file_name in batch_x])/255.0, np.array(batch_y)

In [None]:
def load_data():
    with open('/content/drive/My Drive/Data/final-book30-labels-train.csv', mode='r', encoding='utf-8', errors='ignore') as f:
        train_labels = pd.read_csv(f, delimiter=",", header=None, names=['record', 'Filename', 'Category ID'])

    with open('/content/drive/My Drive/Data/final-book30-labels-valid.csv', mode='r', encoding='utf-8', errors='ignore') as f:
        valid_labels = pd.read_csv(f, delimiter=",", header=None, names=['record', 'Filename', 'Category ID'])

    train_labels = train_labels.assign(Full_Filename = '/content/padded/'+ train_labels["Filename"])
    valid_labels = valid_labels.assign(Full_Filename = '/content/valid_padded/padded/'+ valid_labels["Filename"])

    print('Loading data')
     # Load actual data
    zip_path = '/content/drive/My Drive/images/Train/padded.zip'
    !cp "{zip_path}" .
    !unzip -q "padded.zip" 
    !rm "padded.zip" 

    zip_path = '/content/drive/My Drive/images/Valid/padded.zip'
    !cp "{zip_path}" .
    !unzip -q "padded.zip"  -d "valid_padded" 
    !rm "padded.zip" 

    return train_labels, valid_labels


def train_model(base_model, IMG_HEIGHT, IMG_WIDTH, folder, train_labels, valid_labels):
    """
    Function to load all the relevant data, train the model with early stopping, saving best validation accuracy snapshots, and 
        save training results to a file for later visualisation. 
    arg:
        dataset - name of the dataset files to work from
        skip_load - should the load from drive into colab be skipped
    """
   
    print('Prepping model')
    my_training_batch_generator = My_Custom_Generator(train_labels["Full_Filename"], train_labels["Category ID"], BATCH_SIZE, IMG_HEIGHT, IMG_WIDTH)
    my_validation_batch_generator = My_Custom_Generator(valid_labels["Full_Filename"], valid_labels["Category ID"], BATCH_SIZE, IMG_HEIGHT, IMG_WIDTH)


    base_model.trainable = False

    model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(30, activation='softmax')
    ])

    model.compile(optimizer='Adam',
              loss='SparseCategoricalCrossentropy',
              metrics=['accuracy', tf.keras.metrics.SparseTopKCategoricalAccuracy(3)])
    
      #early stopping and checkpoints
    es = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', verbose=1, patience=20)
    mc = tf.keras.callbacks.ModelCheckpoint(f'/content/drive/My Drive/Models/{folder}/' + '{epoch:02d}.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True, save_weights_only=True)
    cl = tf.keras.callbacks.CSVLogger(f"/content/drive/My Drive/Models/{folder}/model_history_log.csv", append = True)

    epoch_files = os.listdir(f'/content/drive/My Drive/Models/{folder}/')
    pat = re.compile(f'^.*\.h5$')
    epoch_files_cut = [i for i in epoch_files if pat.match(i) ]
    if len(epoch_files_cut) > 0:
        model_weights = max(epoch_files_cut)
        epoch = int(os.path.basename(model_weights)[:-3])

        model.load_weights(f'/content/drive/My Drive/Models/{folder}/{model_weights}')
    else:
        epoch = 0

    print('Training model')
    history = model.fit_generator(generator=my_training_batch_generator, 
                              validation_data = my_validation_batch_generator,
                              steps_per_epoch = int(len(train_labels) // BATCH_SIZE),
                              validation_steps = int(len(valid_labels) // BATCH_SIZE),
                              initial_epoch = epoch,
                              epochs = 200,
                              verbose = 1,
                              callbacks = [es, mc, cl])
    
    return history


In [None]:
!rm -r padded
!rm -r valid_padded

In [None]:
train_labels, valid_labels = load_data()

Loading data


In [None]:
## MobileNetV2
base_model = tf.keras.applications.MobileNetV2(input_shape=(224, 224, 3),
                                               include_top=False,
                                               weights='imagenet')
mob_hist = train_model(base_model, 224, 224, 'mobilenet', train_labels, valid_labels)

Prepping model
Training model
Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/1000
Epoch 00001: val_accuracy improved from -inf to 0.14561, saving model to /content/drive/My Drive/Models/mobilenet/01.h5
Epoch 2/1000
Epoch 00002: val_accuracy improved from 0.14561 to 0.14795, saving model to /content/drive/My Drive/Models/mobilenet/02.h5
Epoch 3/1000
Epoch 00003: val_accuracy did not improve from 0.14795
Epoch 4/1000
Epoch 00004: val_accuracy improved from 0.14795 to 0.15984, saving model to /content/drive/My Drive/Models/mobilenet/04.h5
Epoch 5/1000
Epoch 00005: val_accuracy did not improve from 0.15984
Epoch 6/1000
Epoch 00006: val_accuracy did not improve from 0.15984
Epoch 7/1000
Epoch 00007: val_accuracy did not improve from 0.15984
Epoch 8/1000
Epoch 00008: val_accuracy did not improve from 0.15984
Epoch 9/1000
Epoch 00009: val_accuracy did not improve from 0.15984
Epoch 10/1000
Epoch 00010: val_accuracy improved from 0.15984 to 0.16842, saving 

KeyboardInterrupt: ignored

In [None]:
## InceptionResnetV2
base_model = tf.keras.applications.InceptionResNetV2(input_shape=(299, 299, 3),
                                               include_top=False,
                                               weights='imagenet')
inc_hist = train_model(base_model, 299, 299, 'inception_resnetv2', train_labels, valid_labels)

Prepping model
Training model
Epoch 47/1000
Epoch 00047: val_accuracy improved from -inf to 0.27212, saving model to /content/drive/My Drive/Models/inception_resnetv2/47.h5
Epoch 48/1000
Epoch 00048: val_accuracy did not improve from 0.27212
Epoch 49/1000
Epoch 00049: val_accuracy did not improve from 0.27212
Epoch 50/1000
Epoch 00050: val_accuracy did not improve from 0.27212
Epoch 51/1000
Epoch 00051: val_accuracy did not improve from 0.27212
Epoch 52/1000
Epoch 00052: val_accuracy improved from 0.27212 to 0.27856, saving model to /content/drive/My Drive/Models/inception_resnetv2/52.h5
Epoch 53/1000
Epoch 00053: val_accuracy did not improve from 0.27856
Epoch 54/1000
Epoch 00054: val_accuracy improved from 0.27856 to 0.27914, saving model to /content/drive/My Drive/Models/inception_resnetv2/54.h5
Epoch 55/1000
Epoch 00055: val_accuracy did not improve from 0.27914
Epoch 56/1000
Epoch 00056: val_accuracy improved from 0.27914 to 0.28402, saving model to /content/drive/My Drive/Models/

In [None]:
# Resnext, has to come from anotehr package
resnext50, preprocess_input = Classifiers.get('resnext50')
base_model = resnext50((224, 224, 3), weights='imagenet', include_top = False)

res_hist = train_model(base_model, 224, 224, 'resnext', train_labels, valid_labels)

Prepping model
Training model
Epoch 85/200
Epoch 00085: val_accuracy improved from -inf to 0.12417, saving model to /content/drive/My Drive/Models/resnext/85.h5
Epoch 86/200
Epoch 00086: val_accuracy did not improve from 0.12417
Epoch 87/200
Epoch 00087: val_accuracy did not improve from 0.12417
Epoch 88/200
Epoch 00088: val_accuracy did not improve from 0.12417
Epoch 89/200
Epoch 00089: val_accuracy did not improve from 0.12417
Epoch 90/200
Epoch 00090: val_accuracy did not improve from 0.12417
Epoch 91/200
Epoch 00091: val_accuracy did not improve from 0.12417
Epoch 92/200
Epoch 00092: val_accuracy improved from 0.12417 to 0.12807, saving model to /content/drive/My Drive/Models/resnext/92.h5
Epoch 93/200
Epoch 00093: val_accuracy did not improve from 0.12807
Epoch 94/200
Epoch 00094: val_accuracy did not improve from 0.12807
Epoch 95/200
Epoch 00095: val_accuracy improved from 0.12807 to 0.13119, saving model to /content/drive/My Drive/Models/resnext/95.h5
Epoch 96/200
Epoch 00096: v

KeyboardInterrupt: ignored