In [1]:
!pip install image-classifiers==1.0.0b1

Collecting image-classifiers==1.0.0b1
  Downloading https://files.pythonhosted.org/packages/d0/15/c51837c7009063ab9e4d3654eb32a92838fe515023cc7862e06857c9d19b/image_classifiers-1.0.0b1.tar.gz
Building wheels for collected packages: image-classifiers
  Building wheel for image-classifiers (setup.py) ... [?25l[?25hdone
  Created wheel for image-classifiers: filename=image_classifiers-1.0.0b1-cp36-none-any.whl size=19956 sha256=4d19cda30fd16eee849ca91e41142ab38cc66763bb706fafe4381102c3a0cb25
  Stored in directory: /root/.cache/pip/wheels/a4/22/b6/715c09496e4a64024b00a695e07a2b5804286f4840852fbcd3
Successfully built image-classifiers
Installing collected packages: image-classifiers
Successfully installed image-classifiers-1.0.0b1


In [2]:
# Library imports
import numpy as np
import pandas as pd
import os
import tensorflow as tf
from skimage.io import imread
from skimage.transform import resize
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import re
from classification_models.tfkeras import Classifiers

In [3]:
BATCH_SIZE = 30
IMG_HEIGHT = 224
IMG_WIDTH = 224

In [4]:
# https://medium.com/@mrgarg.rajat/training-on-large-datasets-that-dont-fit-in-memory-in-keras-60a974785d71


class My_Custom_Generator(tf.keras.utils.Sequence) :
  
  def __init__(self, image_filenames, labels, batch_size, IMG_HEIGHT, IMG_WIDTH) :
    self.image_filenames = image_filenames
    self.labels = labels
    self.batch_size = batch_size
    self.IMG_HEIGHT = IMG_HEIGHT
    self.IMG_WIDTH = IMG_WIDTH
    
    
  def __len__(self) :
    return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
  
  
  def __getitem__(self, idx) :
    batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
    batch_y = self.labels[idx * self.batch_size : (idx+1) * self.batch_size]
    
    return np.array([
            np.resize(imread(str(file_name)), (self.IMG_HEIGHT, self.IMG_WIDTH, 3))
               for file_name in batch_x])/255.0, np.array(batch_y)

In [5]:

def top_n_accuracy(preds, truths, n, model, sparse = True):
    """ Thank you stackoverflow https://stackoverflow.com/questions/32461246/how-to-get-top-3-or-top-n-predictions-using-sklearns-sgdclassifier/48572046"""
    best_n = np.argsort(preds, axis=1)[:,-n:]
    if sparse: 
        ts = truths
    else:
        ts = np.argmax(truths, axis=1)
    successes = 0
    for i in range(ts.shape[0]):
        if ts[i] in best_n[i,:]:
            successes += 1
    return {'Model': model, 'N': n, 'Class': 'Total', 'Result': float(successes)/ts.shape[0]}

def top_n_recall_per_class(preds, truths, n, classes, model, sparse = True):
    n_classes = len(classes)
    best_n = np.argsort(preds, axis=1)[:,-n:]
    if sparse: 
        ts = truths
    else:
        ts = np.argmax(truths, axis=1)
    successes = [0]*n_classes
    class_count = [0]*n_classes
    for i in range(ts.shape[0]):
        class_count[ts[i]] += 1
        if ts[i] in best_n[i,:]:
            successes[ts[i]] += 1
    return [{'Model': model, 'N': n, 'Class': k, 'Result': v} for k,v in zip(classes, [float(i)/float(j) for i, j in zip(successes, class_count)])]
    #return {k:v for k, v in zip(classes, [float(i)/float(j) for i, j in zip(successes, class_count)])}

In [6]:
def load_test_data():
    with open('/content/drive/My Drive/Data/final-book30-labels-test.csv', mode='r', encoding='utf-8', errors='ignore') as f:
        test_labels = pd.read_csv(f, delimiter=",", header=None, names=['record', 'Filename', 'Category ID'])

    test_labels = test_labels.assign(Full_Filename = '/content/padded/'+ test_labels["Filename"])

    print('Loading data')
     # Load actual data
    zip_path = '/content/drive/My Drive/images/Test/padded.zip'
    !cp "{zip_path}" .
    !unzip -q "padded.zip" 
    !rm "padded.zip" 

    return test_labels

def test_data_pred(base_model, IMG_HEIGHT, IMG_WIDTH, folder, test_labels):
    my_test_batch_generator = My_Custom_Generator(test_labels["Full_Filename"], test_labels["Category ID"], BATCH_SIZE, IMG_HEIGHT, IMG_WIDTH)
    print('Loading model')

    base_model.trainable = False

    model = tf.keras.Sequential([
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(30, activation='softmax')
    ])

    model.compile(optimizer='Adam',
              loss='SparseCategoricalCrossentropy',
              metrics=['accuracy', tf.keras.metrics.SparseTopKCategoricalAccuracy(3)])
    
    files = os.listdir(f'/content/drive/My Drive/Models/{folder}/')
    pat = re.compile(f'^.*\.h5$')
    files_cut = [i for i in files if pat.match(i) ]
    model_weights = max(files_cut)

    model.load_weights(f'/content/drive/My Drive/Models/{folder}/{model_weights}')

    print('Making predition')

    y_pred = model.predict(my_test_batch_generator, steps = int(len(test_labels) // BATCH_SIZE), verbose = 1)
    

    return y_pred, test_labels["Category ID"]

In [7]:
test_labels = load_test_data()

Loading data


In [8]:
## MobileNetV2
base_model = tf.keras.applications.MobileNetV2(input_shape=(224, 224, 3),
                                               include_top=False,
                                               weights='imagenet')
mnet_preds, test_actual = test_data_pred(base_model, 224, 224, 'mobilenet', test_labels)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
Loading model
Making predition


In [17]:
## InceptionResnetV2
base_model = tf.keras.applications.InceptionResNetV2(input_shape=(299, 299, 3),
                                               include_top=False,
                                               weights='imagenet')
incep_preds, test_actual = test_data_pred(base_model, 299, 299, 'inception_resnetv2', test_labels)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Loading model
Making predition


In [18]:
# Resnext, has to come from anotehr package
resnext50, preprocess_input = Classifiers.get('resnext50')
base_model = resnext50((224, 224, 3), weights='imagenet', include_top = False)

resnext_preds, test_actual = test_data_pred(base_model, 224, 224, 'resnext', test_labels)

Downloading data from https://github.com/qubvel/classification_models/releases/download/0.0.1/resnext50_imagenet_1000_no_top.h5
Loading model
Making predition


In [40]:
mnet_df = pd.DataFrame(mnet_preds)
mnet_df = mnet_df.assign(Filename = test_labels['Filename'])
mnet_df.to_csv('/content/drive/My Drive/Data/Predictions/mobilenetV2_preds.csv', header = True, index = False)

incep_df = pd.DataFrame(incep_preds)
incep_df = incep_df.assign(Filename = test_labels['Filename'])
incep_df.to_csv('/content/drive/My Drive/Data/Predictions/InceptionResnetV2_preds.csv', header = True, index = False)

resnext_df = pd.DataFrame(resnext_preds)
resnext_df = resnext_df.assign(Filename = test_labels['Filename'])
resnext_df.to_csv('/content/drive/My Drive/Data/Predictions/Resnext_preds.csv', header = True, index = False)

In [36]:
results = []
for i in range(1, 6):
    results.extend([top_n_accuracy(mnet_preds, test_actual, i, 'MobileNetV2')])
    results.extend(top_n_recall_per_class(mnet_preds, test_actual, i, range(30), 'MobileNetV2'))
    results.extend([top_n_accuracy(incep_preds, test_actual, i, 'InceptionResnetV2')])
    results.extend(top_n_recall_per_class(incep_preds, test_actual, i, range(30), 'InceptionResnetV2'))
    results.extend([top_n_accuracy(resnext_preds, test_actual, i, 'ResneXt50')])
    results.extend(top_n_recall_per_class(resnext_preds, test_actual, i, range(30), 'ResneXt50'))

In [37]:
df = pd.concat([pd.Series(d) for d in results], axis=1).fillna(0).T

In [39]:
df.to_csv('/content/drive/My Drive/Data/Predictions/all_top_n_results.csv', header = True, index = False)