# Classifieur VGG16 par fine tuning

source : https://thebinarynotes.com/transfer-learning-keras-vgg16/

In [1]:
import pandas as pd
import numpy as np
from sklearn import model_selection, preprocessing
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras import layers
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Activation,GlobalMaxPooling2D
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.applications import VGG16
from keras.models import Model

In [2]:
# Constants
image_width = 224
image_height = 224
channels = 3
batch_size = 64
 
# Data directories
train_dir = "data/train"
test_dir = "data/test"

In [3]:
# Preparing dataframes
data = pd.read_csv("data/data.csv")

data = data[["image", "category"]]

In [4]:
data

Unnamed: 0,image,category
0,55b85ea15a1536d46b7190ad6fff8ce7.jpg,Home Furnishing
1,7b72c92c2f6c40268628ec5f14c6d590.jpg,Baby Care
2,64d5d4a258243731dc7bbb1eef49ad74.jpg,Baby Care
3,d4684dcdc759dd9cdf41504698d737d8.jpg,Home Furnishing
4,6325b6870c54cd47be6ebfbffa620ec7.jpg,Home Furnishing
...,...,...
1045,958f54f4c46b53c8a0a9b8167d9140bc.jpg,Baby Care
1046,fd6cbcc22efb6b761bd564c28928483c.jpg,Baby Care
1047,5912e037d12774bb73a2048f35a00009.jpg,Baby Care
1048,c3edc504d1b4f0ba6224fa53a43a7ad6.jpg,Baby Care


In [5]:
# splitting images into test and train directory

import shutil

train_data = pd.DataFrame()
test_data = pd.DataFrame()

sample_data = pd.read_csv("data/sample_data.csv")
for ind in data.index:
    if data.loc[ind, "image"] in sample_data["image"].values:
        shutil.copy("data/Images/"+data.loc[ind, "image"], "data/test/")
        test_data = test_data.append(data.loc[ind])
    else :
        shutil.copy("data/Images/"+data.loc[ind, "image"], "data/train/")
        train_data = train_data.append(data.loc[ind])

In [6]:
test_data.groupby(by="category").size()

category
Baby Care                     15
Beauty and Personal Care      15
Computers                     15
Home Decor & Festive Needs    15
Home Furnishing               15
Kitchen & Dining              15
Watches                       15
dtype: int64

In [7]:
# Splitting train and validation data
train_df, validation_df = model_selection.train_test_split(train_data, test_size=0.1)
train_df = train_df.reset_index(drop=True)
validation_df = validation_df.reset_index(drop=True)

In [8]:
# Creating train generator
train_datagen = ImageDataGenerator(
    rotation_range=15,
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    width_shift_range=0.1,
    height_shift_range=0.1
)
 
train_generator = train_datagen.flow_from_dataframe(
    train_df, 
    train_dir, 
    x_col='image',
    y_col='category',
    class_mode='categorical',
    target_size=(image_height, image_width),
    batch_size=batch_size
)

# Creating Validation Generator
validation_datagen = ImageDataGenerator(rescale=1./255)
validation_generator = validation_datagen.flow_from_dataframe(
    validation_df, 
    train_dir, 
    x_col='image',
    y_col='category',
    class_mode='categorical',
    target_size=(image_height, image_width),
    batch_size=batch_size
)

Found 850 validated image filenames belonging to 7 classes.
Found 95 validated image filenames belonging to 7 classes.


In [9]:
# Loading Model
pretrained_model = VGG16(input_shape=(image_height, image_width, channels), include_top=False, weights="imagenet")
pretrained_model.summary()
 
# Freezing the layers
for layer in pretrained_model.layers[:15]:
    layer.trainable = False
for layer in pretrained_model.layers[15:]:
    layer.trainable = True

# Modification of pretrained model
last_layer = pretrained_model.get_layer('block5_pool')
last_output = last_layer.output
 
x = GlobalMaxPooling2D()(last_output)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = layers.Dense(7, activation='sigmoid')(x)
 
# Creating a new model
model = Model(pretrained_model.input, x)
 
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])
 
model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [10]:
# Training model
n_training_samples = len(train_df)
n_validation_samples = len(validation_df)
 
history = model.fit_generator(
    train_generator,
    epochs=20,
    validation_data=validation_generator,
    validation_steps=n_validation_samples//batch_size,
    steps_per_epoch=n_training_samples//batch_size)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/20



Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [11]:
nb_samples = test_data.shape[0]
test_gen = ImageDataGenerator(rescale=1./255)
 
test_generator = test_gen.flow_from_dataframe(
    test_data, 
    test_dir, 
    x_col='image',
    y_col=None,
    class_mode=None,
    batch_size=batch_size,
    target_size=(image_height, image_width),
    shuffle=False
)
 
# Testing Model
predict = model.predict_generator(test_generator, steps=np.ceil(nb_samples/batch_size))


Found 105 validated image filenames.
Instructions for updating:
Please use Model.predict, which supports generators.


In [12]:
predicted_class_indices=np.argmax(predict,axis=1)
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [13]:
# performance of the model

from sklearn import metrics

metrics.accuracy_score(predictions, sample_data["category"])

0.12380952380952381