In [None]:
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras import layers 
import pandas as pd
import numpy as np

In [None]:
def preprocess_data(X,Y):
  XP = keras.applications.resnet50.preprocess_input(X)
  YP = keras.utils.to_categorical(Y,10)
  return XP,YP

In [None]:
(x_train, y_train),(x_test, y_test) = keras.datasets.cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
print(x_train.shape, y_train.shape)
x_train, y_train = preprocess_data(x_train,y_train)
print(x_train.shape, y_train.shape)
print("***************")
print(x_test.shape, y_test.shape)
x_test, y_test = preprocess_data(x_test,y_test)
print(x_test.shape, y_test.shape)


(50000, 32, 32, 3) (50000, 1)
(50000, 32, 32, 3) (50000, 10)
***************
(10000, 32, 32, 3) (10000, 1)
(10000, 32, 32, 3) (10000, 10)


In [None]:
inputs = tf.keras.Input(shape=(224,224,3))

In [None]:
res = keras.applications.ResNet50(include_top=False,
                                  weights="imagenet",
                                  input_tensor =inputs)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
for layer in res.layers[:143]:
  layer.trainable = False
#We keep the same weights for layers from 0 => 143

In [None]:
for i, layer in enumerate(res.layers):
  print(i, layer.name, "-", layer.trainable)

0 input_1 - False
1 conv1_pad - False
2 conv1_conv - False
3 conv1_bn - False
4 conv1_relu - False
5 pool1_pad - False
6 pool1_pool - False
7 conv2_block1_1_conv - False
8 conv2_block1_1_bn - False
9 conv2_block1_1_relu - False
10 conv2_block1_2_conv - False
11 conv2_block1_2_bn - False
12 conv2_block1_2_relu - False
13 conv2_block1_0_conv - False
14 conv2_block1_3_conv - False
15 conv2_block1_0_bn - False
16 conv2_block1_3_bn - False
17 conv2_block1_add - False
18 conv2_block1_out - False
19 conv2_block2_1_conv - False
20 conv2_block2_1_bn - False
21 conv2_block2_1_relu - False
22 conv2_block2_2_conv - False
23 conv2_block2_2_bn - False
24 conv2_block2_2_relu - False
25 conv2_block2_3_conv - False
26 conv2_block2_3_bn - False
27 conv2_block2_add - False
28 conv2_block2_out - False
29 conv2_block3_1_conv - False
30 conv2_block3_1_bn - False
31 conv2_block3_1_relu - False
32 conv2_block3_2_conv - False
33 conv2_block3_2_bn - False
34 conv2_block3_2_relu - False
35 conv2_block3_3_conv - 

In [None]:
to_res = (224, 224)

In [None]:
model = tf.keras.Sequential()
model.add(layers.Lambda(lambda image: tf.image.resize(image, to_res))) 
model.add(res)
model.add(layers.Flatten())
model.add(layers.BatchNormalization())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.BatchNormalization())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.BatchNormalization())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.BatchNormalization())
model.add(layers.Dense(10, activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.RMSprop(lr=2e-5),
                  metrics=['accuracy'])


  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [None]:
history = model.fit(x_train, y_train, batch_size=32, epochs=10, verbose=1,
                    validation_data=(x_test, y_test))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda (Lambda)              (None, 224, 224, 3)       0         
_________________________________________________________________
resnet50 (Functional)        (None, 7, 7, 2048)        23587712  
_________________________________________________________________
flatten (Flatten)            (None, 100352)            0         
_________________________________________________________________
batch_normalization (BatchNo (None, 100352)            401408    
_________________________________________________________________
dense (Dense)                (None, 256)               25690368  
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 256)               1

In [None]:
model.evaluate(x_test, y_test, batch_size=128, verbose=1)



[0.2086568921804428, 0.9449999928474426]

#Some ideas to take into consideration when using transfer learning: 
1- We confirmed that ResNet50 works best with input images of 224 x 224. As CIFAR-10 have 32 x 32 images, it was necessary to perform a resize. With this adjustment alone, the model can achieve a high accuracy, I think it was the most important for ResNet50.

2- A good recommendation when building a model using transfer learning is to first test optimizers to get a low bias and good results in training set, then look for regularizers if you see overfitting over the validation set.

3- The discussion over using freezing on the pretrained model continues. It reduces computation time, reduces overffiting but lowers accuracy. When the new dataset is very different from the datased used for training it may be necessary to use more layer for adjustment.

4- On the selecting of hyperparameters, it is important for transfer learning to use a low learning rate to take advantage of the weights of the pretrained model. This choice as the optimizer choice (SGD, Adam, RMSprop) will impact the number of epochs needed to get a successfully trained model.