# Assignment 4: Transfer Learning

#### Student Name: Ryan Richardson

In this assignment,  you will use a pretrained model such as VGG16 to perform a new classification task. We will use The dataset (https://www.kaggle.com/c/aptos2019-blindness-detection) consisted of 3,662 labeled, high resolution color images for the training set, and 1,928 unlabeled for the test set. 
Images are classified into 5 groups according to the severity of DR present. Label 0 represents the control group. Labels 1–4 represent mild, moderate, severe, and proliferative DR, respectively.


In [1]:

import pandas as pd
from keras.applications import VGG16
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator


## Load the data

In [2]:
data_path="data"
df = pd.read_csv(data_path+"/train.csv")
df.head()

Unnamed: 0,id_code,diagnosis
0,000c1434d8d7,2
1,001639a390f0,4
2,0024cdab0c1e,1
3,002c21358ce6,0
4,005b95c28852,0


In [3]:
df['id_code'] = df['id_code'].astype(str)+'.png'
df['diagnosis'] = df['diagnosis'].astype('str')

#### Create a data generator

In [4]:

BATCH_SIZE = 16
IMAGE_SIZE = (224,224)

datagen=ImageDataGenerator(
    rescale=1./255, 
    validation_split=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

train_gen=datagen.flow_from_dataframe(
    dataframe=df,
    directory=data_path+"/train_images",
    x_col="id_code",
    y_col="diagnosis",
    batch_size=BATCH_SIZE,
    shuffle=True,
    class_mode="categorical",
    target_size=IMAGE_SIZE,
    subset='training')

test_gen=datagen.flow_from_dataframe(
    dataframe=df,
    directory=data_path+"/train_images",
    x_col="id_code",
    y_col="diagnosis",
    batch_size=BATCH_SIZE,
    shuffle=True,
    class_mode="categorical", 
    target_size=IMAGE_SIZE,
    subset='validation')

Found 2930 validated image filenames belonging to 5 classes.
Found 732 validated image filenames belonging to 5 classes.


#### Load the VGG16 Model 

In [5]:

# Load the pre-trained VGG16 model, including the top (classification) layer
model = VGG16(weights='imagenet', include_top=True)

# Print the model summary
model.summary()


Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

## Define New Model

In [6]:
base_model = VGG16(weights='imagenet', include_top=False)
vgg_output = base_model.get_layer('block4_pool').output

#### Add your own Layers

Use up to the the block4_pool layer of the vgg16 model and add 2 Fully Connected layers of your own at the end:

In [7]:
gap_output = GlobalAveragePooling2D()(vgg_output)

fc1 = Dense(256, activation='relu')(gap_output)
output = Dense(5, activation='softmax')(fc1)
modified_model = Model(inputs=base_model.input, outputs=output)
modified_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 block1_conv1 (Conv2D)       (None, None, None, 64)    1792      
                                                                 
 block1_conv2 (Conv2D)       (None, None, None, 64)    36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, None, None, 64)    0         
                                                                 
 block2_conv1 (Conv2D)       (None, None, None, 128)   73856     
                                                                 
 block2_conv2 (Conv2D)       (None, None, None, 128)   147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, None, None, 128)   0     

#### Freeze the vgg16 layers

In [8]:
for layer in base_model.layers:
    layer.trainable = False


modified_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 block1_conv1 (Conv2D)       (None, None, None, 64)    1792      
                                                                 
 block1_conv2 (Conv2D)       (None, None, None, 64)    36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, None, None, 64)    0         
                                                                 
 block2_conv1 (Conv2D)       (None, None, None, 128)   73856     
                                                                 
 block2_conv2 (Conv2D)       (None, None, None, 128)   147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, None, None, 128)   0     

In [16]:
callback = []
callback += [EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')]
callback += [ModelCheckpoint('atops.h5',save_best_only=True,mode='min')]
modified_model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])

## Train the  Model


In [None]:
modified_model.fit_generator(generator=train_gen,
                             steps_per_epoch=len(train_gen),
                             validation_data=test_gen,
                             validation_steps=len(test_gen),
                             epochs=50,
                             callbacks=callback,
                             use_multiprocessing=False,
                             verbose=1)

  modified_model.fit_generator(generator=train_gen,


Epoch 1/50