## Transfer learning usingVGG Net

In [None]:
%matplotlib inline
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.layers import Input, Flatten, Dense
from keras.models import Model
import numpy as np
from keras.utils import np_utils
from keras import backend as K

K.set_image_dim_ordering('tf')

In [73]:
#Get back the convolutional part of a VGG network trained on ImageNet
model_vgg16_conv = VGG16(weights='imagenet', include_top=False)
model_vgg16_conv.summary()

model_vgg16_conv.trainable = False
#Create your own input format (here 3x200x200)

input = Input(shape=(32,32,3),name = 'image_input')

#Use the generated model 
output_vgg16_conv = model_vgg16_conv(input)

#Add the fully-connected layers 

x = Flatten(name='flatten',input_shape=(512,1,1))(output_vgg16_conv)
x = Dense(1024, activation='relu', name='fc1')(x)
x = Dense(512, activation='relu', name='fc2')(x)
x = Dense(4, activation='softmax', name='predictions')(x)

#Create your own model 
my_model = Model(input=input, output=x)

#In the summary, weights and layers from VGG part will be hidden, but they will be fit during the training
my_model.summary()
my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#Then training with your data ! 

for layer in my_model.layers[:2]:
    layer.trainable = False

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_27 (InputLayer)        (None, None, None, 3)     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)   147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, None, None, 128)   0         
__________



## Preprocessing data

In [35]:
from PIL import Image
import os
import numpy as np
from os import listdir
from scipy.misc import imshow
import scipy.misc
from numpy import *
import PIL

size = 32
def preprocess_image(infilename,size):
    data = Image.open(infilename)#.convert('L')
#     data = scipy.misc.imread(infilename, mode = "L")
    data.thumbnail((size,size), Image.ANTIALIAS)
    img = data.resize((size,size))
    scipy.misc.imsave(infilename, img)
    
def load_image( infilename ) :
    data = scipy.misc.imread(infilename, mode = "RGB")
#     preprocess_image(infilename,size)
    return data

path = ['Dataset/imgflip_images', 'Dataset/greetings_images','Dataset/scanned_documents','Dataset/imp_images']

Y = []
X = []
data = []

for p in path:
    for files in listdir(p):
        #print(files)
        try:
            t = load_image(p+'/'+files)
            X = t
            Y = (float(path.index(p)))
            data.append((X,Y,p+'/'+files))
        except:
            print("error reading file:"+ files)

In [36]:
#Xtemp = np.uint8(X)
import random
import matplotlib.pyplot as plt
random.shuffle(data)
imageNameDict = {}
X = []
Y = []

for d in data:
    X.append(d[0])
    Y.append(d[1])
    imageNameDict[len(X)-1]=d[2]

In [37]:
from keras.utils import np_utils
from keras import backend as K

In [48]:
image_size = 32
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)
totalSize = len(X)
trainingSize = int(0.8*totalSize)
# load data
X_train = np.array(X[:trainingSize])
y_train = np.array(Y[:trainingSize])
X_test = np.array(X[trainingSize:])
y_test = np.array(Y[trainingSize:])
X_train = X_train / 255.0
X_test = X_test / 255.0

# reshape to be [samples][pixels][width][height]
# X_train = X_train.reshape(X_train.shape[0], 3, image_size, image_size).astype('float32')
# X_test = X_test.reshape(X_test.shape[0], 3, image_size, image_size).astype('float32')
# # normalize inputs from 0-255 to 0-1

# one hot encode outputs
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]

## Sanity Check

In [49]:
print(X_train[0].shape)
print(len(X_train))

(32, 32, 3)
15742


## Fit the new model

In [50]:
from keras.callbacks import History 
history = History()

In [74]:
epochs = 20
weights_file = 'models/vgg_exp1.h5'
if not os.path.exists(weights_file):
#     my_model.compile(optimizer = 'adam',loss= 'categorical_crossentropy', metrics = ['accuracy'])
    my_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs, batch_size=200, callbacks = [history])    
    my_model.save_weights(weights_file)
else:
    my_model.load_weights(weights_file)


Train on 15742 samples, validate on 3936 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fa50e63a908>

## Plots

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
acc = history.history['acc']
val_acc = history.history['val_acc']

my_model.save_weights(weights_file)

import matplotlib.pyplot as plt
def plot(label1,label2,xlabel,ylabel):
    X_axis = range(1,len(acc)+1)
    plt.plot(X_axis, acc, marker='o', linestyle='-', color='g',label=xlabel)
    plt.plot(X_axis, val_acc, marker='o', linestyle='--', color='r',label=ylabel)
    plt.xlabel(label1)
    plt.ylabel(label2)
    legend = plt.legend(loc='lower right', shadow=True)
    plt.show()

In [None]:
plot('No. of epochs','Accuracy',"Train Accuracy","Test Accuracy")

## Confusion Matrix

In [None]:
import numpy as np
predict = model.predict(X_test, verbose=1)
predict1 = np.argmax(predict,axis=1)
y_test1 = np.argmax(y_test,axis=1)

import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

cm= confusion_matrix(y_test1,predict1)
print(cm)
sn.set()
import matplotlib.pyplot as plt
%matplotlib inline
# plt.imshow(cm, cmap='binary')
pd.options.display.float_format = '{:.2f}'.format
categories = ["memes",'greetings','scanned','misc']
np.set_printoptions(suppress=True)
df_cm = pd.DataFrame(cm, index = [i for i in categories],
                  columns = [i for i in categories])
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True, fmt="d")

In [None]:
misclassified = np.where(y_test1 != predict1)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline  
misclassified_files = []
for i in misclassified[0]:
    print("backup/"+imageNameDict[i])
    misclassified_files.append("backup/"+imageNameDict[i])

In [75]:
img = Image.open(misclassified_files[0])
plt.imshow(img)
print("Label:"+ y_test[int(misclassified[0][0])])

NameError: name 'misclassified_files' is not defined