**TRANSFER LEARNING**

In [1]:
import numpy as np
import pandas as pd
import pickle
import os
from PIL import Image
import h5py
from keras.utils.io_utils import HDF5Matrix
import math
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
# !pip install pydrive

In [0]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Input, Dropout
from keras.callbacks import ModelCheckpoint
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.optimizers import SGD, Adam
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score,recall_score,precision_score, roc_auc_score
from sklearn.utils import class_weight
import sklearn
import keras

In [0]:
def getCM(y_true,y_pred, threshold):
    y = list(map(lambda x: 1 if x>threshold else 0,y_pred))
    return confusion_matrix(y_true,y), accuracy_score(y_true,y),  sklearn.metrics.precision_score(y_true,y), sklearn.metrics.recall_score(y_true,y), f1_score(y_true,y), roc_auc_score(y_true,y_pred)

def f_score(y_true, y_pred, threshold=0.2, beta=2):

    tp = tp_score(y_true, y_pred, threshold)
    fp = fp_score(y_true, y_pred, threshold)
    fn = fn_score(y_true, y_pred, threshold)

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    return (1+beta**2) * ((precision * recall) / ((beta**2)*precision + recall))


def tp_score(y_true, y_pred, threshold=0.1):

    tp_3d = K.concatenate(
        [
            K.cast(K.expand_dims(K.flatten(y_true)), 'bool'),
            K.cast(K.expand_dims(K.flatten(K.greater(y_pred, K.constant(threshold)))), 'bool'),
            K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool')
        ], axis=1
    )

    tp = K.sum(K.cast(K.all(tp_3d, axis=1), 'int32'))

    return tp


def fp_score(y_true, y_pred, threshold=0.1):

    fp_3d = K.concatenate(
        [
            K.cast(K.expand_dims(K.flatten(K.abs(y_true - K.ones_like(y_true)))), 'bool'),
            K.cast(K.expand_dims(K.flatten(K.greater(y_pred, K.constant(threshold)))), 'bool'),
            K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool')
        ], axis=-1
    )

    fp = K.sum(K.cast(K.all(fp_3d, axis=1), 'int32'))

    return fp


def fn_score(y_true, y_pred, threshold=0.1):

    fn_3d = K.concatenate(
        [
            K.cast(K.expand_dims(K.flatten(y_true)), 'bool'),
            K.cast(K.expand_dims(K.flatten(K.abs(K.cast(K.greater(y_pred, K.constant(threshold)), 'float') - K.ones_like(y_pred)))), 'bool'),
            K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool')
        ], axis=1
    )

    fn = K.sum(K.cast(K.all(fn_3d, axis=1), 'int32'))

    return fn


def precision_score(y_true, y_pred, threshold=0.1):

    tp = tp_score(y_true, y_pred, threshold)
    fp = fp_score(y_true, y_pred, threshold)

    return tp / (tp + fp)


def recall_score(y_true, y_pred, threshold=0.1):

    tp = tp_score(y_true, y_pred, threshold)
    fn = fn_score(y_true, y_pred, threshold)

    return tp / (tp + fn)

In [0]:
path = "gdrive/My Drive/NeuralNetCapstone/"

In [8]:
total_length = 0
f = h5py.File(path + 'all_images.h5', mode='r')
total_length += len(f['dataset_1'])
print (total_length)
f = h5py.File(path + 'added_images_1.h5', mode='r')
total_length += len(f['dataset_1'])
print (total_length)
f = h5py.File(path + 'added_images_2.h5', mode='r')
total_length += len(f['dataset_1'])

51759
66759


In [9]:
np.random.seed(0)
idxs = np.random.permutation(total_length)
t_idxs = idxs[:math.floor(total_length*0.8)]
test_idxs = idxs[math.floor(total_length*0.8):]
train_length = len(t_idxs)
train_idxs = t_idxs[:math.floor(train_length*0.8)]
val_idxs = t_idxs[math.floor(train_length*0.8):]

print ("Train set:", train_idxs.shape)
print ("Validation set:", val_idxs.shape)
print ("Test set:", test_idxs.shape)

Train set: (54932,)
Validation set: (13734,)
Test set: (17167,)


In [0]:
#Get y labels from datafile
datasheetfile = "final_df.csv"
df = pd.read_csv(os.path.join(path,datasheetfile))
all_classes = df.columns.values[-14:]
y_true = df[all_classes].values

In [0]:
def getLabels(augment,*args):
    stack = args[0]
    if augment:
        for each in args[1:]:
            stack = np.concatenate((stack,each))    
    return stack

In [0]:
f1 = h5py.File(path +'added_labels_1.h5', 'r')
X1 = f1['dataset_1']
added_labels1 = np.array(X1.value)

f1 = h5py.File(path +'added_labels_2.h5', 'r')
X1 = f1['dataset_1']
added_labels2 = np.array(X1.value)


In [0]:
total_labels = getLabels(True,y_true,added_labels1, added_labels2)

In [14]:
total_labels.shape

(85833, 14)

In [0]:
class DataGenerator(keras.utils.Sequence):

    def __init__(self, *args):
        # Arguments would be : AugmentationBool ,labels, batch_size, idxs, all_images_path, *added_images_paths
        self.augment = args[0]
        self.all_images = HDF5Matrix(args[4],'dataset_1')
        self.added_images1 = HDF5Matrix(args[5],'dataset_1')
        self.added_images2 = HDF5Matrix(args[6],'dataset_1')
        self.labels = args[1]
        self.batch_size = args[2]
        self.idxs = args[3]

    def __len__(self):
        return len(self.idxs)//self.batch_size

    def __getitem__(self, idx):
        send_idxs = self.idxs[idx * self.batch_size:min((idx + 1) * self.batch_size,len(self.labels))]
        batch_x = list()

        if self.augment:
            for each in send_idxs:
                if each < self.all_images.shape[0]:
                    batch_x.append(self.all_images[each])
                elif each >= self.all_images.shape[0] and each < self.added_images1.shape[0]:
                    batch_x.append(self.all_images1[each]-self.all_images.shape[0])
                else:
                    batch_x.append(self.added_images2[each-(self.all_images.shape[0]+self.added_images1.shape[0])])
        else:
            for each in send_idxs:
                batch_x.append(self.all_images[each])
                
        batch_y = self.labels[send_idxs]
        return np.array(batch_x), batch_y
    
    def getTestLabels(self):
        return self.labels[self.idxs]
    
    def getTestData(self):
        
        self.idxs.sort()
        batch_x = list()
        
        if self.augment:
            for each in self.idxs:
                if each < self.all_images.shape[0]:
                    batch_x.append(self.all_images[each])
                elif each >= self.all_images.shape[0] and each < self.added_images1.shape[0]:
                    batch_x.append(self.all_images1[each]-self.all_images.shape[0])
                else:
                    batch_x.append(self.added_images2[each-(self.all_images.shape[0]+self.added_images1.shape[0])])
        else:
            for each in self.idxs:
                batch_x.append(self.all_images[each])
                
        return np.array(batch_x)
        
    

In [0]:
#  labels, batch_size, idxs, all_images_path, added_images_path
train_dg = DataGenerator(True, total_labels, 128, train_idxs, path+"all_images.h5",path+"added_images_1.h5",path+"added_images_2.h5")
val_dg = DataGenerator(True, total_labels, 128, val_idxs, path+"all_images.h5",path+"added_images_1.h5",path+"added_images_2.h5")
train_val_dg = DataGenerator(True, total_labels, 128, t_idxs, path+"all_images.h5",path+"added_images_1.h5",path+"added_images_2.h5")
test_dg = DataGenerator(True, total_labels, 1, test_idxs, path+"all_images.h5",path+"added_images_1.h5",path+"added_images_2.h5")

In [0]:
# Free memory
import gc
del arr
gc.collect()

**PRE - TRAINED INCEPTION V3**

In [0]:
def PreTrained_IV3():
    
    inputs = Input(shape=(128, 128, 3))  
    base_model = InceptionV3(input_tensor = inputs, weights='imagenet', include_top=False)
    # Add a global spatial average pooling layer
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
#     # fully-connected layer
#     x = Dense(1024, activation='sigmoid')(x)
    x = Dropout(rate = 0.2)(x)
    # and a logistic layer for 14 classes
    predictions = Dense(14, activation='sigmoid')(x)
    
    # this is the model we will train
    final_model = Model(inputs=base_model.input, outputs=predictions)
    
    return base_model,final_model
    
    

In [0]:
def TrainInceptionV3(base_model,final_model, train_dg, val_dg, final):
    print ("Training with pretrained-weights")
    filepath = ""

    if final:
        filepath="final_v3weights-improvement-{epoch:02d}-{f_score:.2f}.hdf5"
    else:
        filepath="v3weights-improvement-{epoch:02d}-{val_f_score:.2f}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_f_score', verbose=1, save_best_only=True, mode='max')
    callbacks_list = [checkpoint]
        
    
    final_model.compile(optimizer=Adam(lr=0.001), loss = 'binary_crossentropy',metrics=[f_score])
    if final:
        history = final_model.fit_generator(train_dg,epochs = 5, callbacks=callbacks_list)
        return final_model,history.history
    else:
        history = final_model.fit_generator(train_dg,epochs = 10,validation_data = val_dg, callbacks=callbacks_list)
        return final_model,history.history

In [0]:
K.clear_session()
base_model_i3, final_model_i3 = PreTrained_IV3()

######### TRAIN - MODEL FOR HYPER PARAMETER TUNING ##############
final_model_i3,history = TrainInceptionV3(base_model_i3,final_model_i3,train_dg, val_dg, False)

######### FINAL TRAINING IV3 ################## 
# final_model_i3,history = TrainInceptionV3(base_model_i3,final_model_i3, train_val_dg,val_dg, True)


In [0]:
fig,ax = plt.subplots()
plt.plot(list(range(10)),history['f_score'], color = 'b',label = 'Training F score')
plt.plot(list(range(10)),history['val_f_score'], color = 'r', label = 'Validation F score')
plt.legend()
plt.show()

In [0]:
# with open(path+ 'history_objects/IV3/with_augmentation/'+'history_128.pkl','wb') as f:
#     pickle.dump(history, f, protocol=pickle.HIGHEST_PROTOCOL)
    
# upload = drive.CreateFile({'title': 'weights_05_0.46_128.hdf5'})
# upload.SetContentFile('v3weights-improvement-05-0.46.hdf5')
# upload.Upload()


final_model_i3.save_weights(path+ 'history_objects/IV3/with_augmentation/' + 'final_iv3.h5')


In [0]:
final_model_i3.load_weights('v3weights-improvement-03-0.55.hdf5')

In [0]:
Xtest = test_dg.getTestData()

In [0]:
ypred = final_model_i3.predict(Xtest)
ypred.shape

In [0]:
ytest = test_dg.getTestLabels()
ytest.shape

In [0]:
K.eval(f_score(ytest,ypred))

In [0]:
for each in range(14):
    CM = getCM(ytest[:,each],ypred[:,each],0.2)
    print (all_classes[each])
    print (CM[0])
    print ("Accuracy:{0}, Precision:{1}, Recall:{2}, F1-Score:{3}, AUC Score:{4} ".format(CM[1],CM[2],CM[3],CM[4], CM[5]))
    print ("------------------------")

**PRE - TRAINED VGG16**

In [0]:
def PreTrained_VGG16():
    
    inputs = Input(shape=(128, 128, 3))  
    base_model = VGG16(input_tensor = inputs, weights='imagenet', include_top=False)
    # Add a global spatial average pooling layer
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
#     # fully-connected layer
#     x = Dense(1024, activation='sigmoid')(x)
    x = Dropout(rate = 0.2)(x)
    # and a logistic layer for 14 classes
    predictions = Dense(14, activation='sigmoid')(x)
    
    # this is the model we will train
    final_model = Model(inputs=base_model.input, outputs=predictions)
    
    return base_model,final_model
    
    

In [0]:
def TrainVGG16(base_model,final_model, train_dg, val_dg, final):
    
    print ("Training with pretrained-weights")
    
    # Freeze the layers except the last 4 layers
    for layer in base_model.layers[:-4]:
        layer.trainable = False

    if final:
        filepath="final_vgg16weights-improvement-{epoch:02d}-{f_score:.2f}.hdf5"
    else:
        filepath="vgg16weights-improvement-{epoch:02d}-{val_f_score:.2f}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_f_score', verbose=1, save_best_only=True, mode='max')
    callbacks_list = [checkpoint]
    final_model.compile(optimizer=Adam(lr=0.0001), loss = 'binary_crossentropy',metrics=[f_score])
    if final:
        history = final_model.fit_generator(train_dg ,epochs = 8, callbacks=callbacks_list)
        return final_model,history.history
    else:
        history = final_model.fit_generator(train_dg,validation_data = val_dg,epochs = 10, callbacks=callbacks_list)
        return final_model,history.history

In [20]:
K.clear_session()

base_model_vgg, final_model_vgg = PreTrained_VGG16()

######### TRAIN - MODEL FOR HYPER PARAMETER TUNING ##############
# final_model_vgg,history = TrainVGG16(base_model_vgg,final_model_vgg,train_dg, val_dg, False)

######### FINAL TRAINING VGG16 ##################
final_model_vgg,history = TrainVGG16(base_model_vgg,final_model_vgg,train_val_dg,val_dg, final = True)


Training with pretrained-weights
Epoch 1/8
Epoch 2/8




Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [0]:
fig,ax = plt.subplots()
plt.plot(list(range(10)),history['f_score'], color = 'b',label = 'Training F score')
plt.plot(list(range(10)),history['val_f_score'], color = 'r', label = 'Validation F score')
plt.legend()
plt.show()

In [0]:
# with open(path+ 'history_objects/VGG16/with_augmentation/'+'history_128.pkl','wb') as f:
#     pickle.dump(history, f, protocol=pickle.HIGHEST_PROTOCOL)
    
# upload = drive.CreateFile({'title': 'weights_08_0.41_128.hdf5'})
# upload.SetContentFile('vgg16weights-improvement-08-0.41.hdf5')
# upload.Upload()


final_model_vgg.save_weights(path+ 'history_objects/VGG16/with_augmentation/' + 'final_vgg16.h5')


In [0]:
final_model_vgg.load_weights('vgg16weights-improvement-06-0.52.hdf5')

In [0]:
Xtest = test_dg.getTestData()

In [23]:
ypred = final_model_vgg.predict(Xtest)
ypred.shape

(17167, 14)

In [24]:
ytest = test_dg.getTestLabels()
ytest.shape

(17167, 14)

In [25]:
K.eval(f_score(ytest,ypred))

0.38797832798904486

In [0]:
for each in range(14):
    CM = getCM(ytest[:,each],ypred[:,each],0.2)
    print (all_classes[each])
    print (CM[0])
    print ("Accuracy:{0}, Precision:{1}, Recall:{2}, F1-Score:{3}, AUC Score:{4} ".format(CM[1],CM[2],CM[3],CM[4], CM[5]))
    print ("------------------------")

In [0]:
# #Upload weights on drive
# upload = drive.CreateFile({'title': 'weights_irv2_final_freezed.h5'})
# upload.SetContentFile('final_irv2.h5')
# upload.Upload()