# Fine Tuning of ResNet50

We want to fine-tune ResNet50 to classify pictorial genres.

We then want to save the model.

## Libraries

In [None]:
!pip install tensorflow-addons

In [None]:
import tensorflow_addons as tfa

In [1]:
from tensorflow.keras.models import Model
from keras.applications.resnet import ResNet50 
from keras.applications.resnet import preprocess_input as preprocess_input_resnet
from tensorflow.keras.preprocessing import image as image_resnet


import tensorflow as tf
from keras.layers import *
import keras

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
import numpy as np
import pandas as pd
import cv2 as cv

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import os

import itertools

from IPython.display import clear_output

## Setup

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
root_dir = '/content/drive/MyDrive'

Mounted at /content/drive


In [4]:
base_dir = root_dir + '/Painting/data/'
data_folder = base_dir + 'raw/dataset/'
train_folder = data_folder + 'train/'
test_folder = data_folder + 'test/'

## Dataset

The dataset is taken by Kaggle at the following link: https://www.kaggle.com/c/painter-by-numbers/data .

We have to resize all the image to be (224,224) to be readable from VGG.

In [5]:
def preprocess_cv2_image_resnet(image):
  #image = cv.imread(image_path)
  image = cv.resize(image, (224, 224))
  image =  cv.cvtColor(image, cv.COLOR_BGR2RGB)
  image = Image.fromarray(image)
  image = image_resnet.img_to_array(image)
  image = np.expand_dims(image, axis = 0)
  return preprocess_input_resnet(image)

In [6]:
df = pd.read_csv(data_folder + "all_data_info.csv")
df.rename(columns={"new_filename": "filename"}, inplace=True)
df.drop(columns=["pixelsx", "pixelsy", "size_bytes", "artist_group", "source"], inplace=True)
df.drop(columns=["artist", "style", "date", "title"], inplace=True)
df.dropna(subset=["genre"], inplace=True)
df.reset_index(drop=True, inplace=True)

# save memory 
df["genre"] = df["genre"].astype("category")

print(df.memory_usage(deep=True))
print(df.info())

df.reset_index(drop=True, inplace=True)
df.to_csv(data_folder + "not_all_data_info.csv")

Index           128
genre        105957
in_train     101996
filename    6723978
dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101996 entries, 0 to 101995
Data columns (total 3 columns):
 #   Column    Non-Null Count   Dtype   
---  ------    --------------   -----   
 0   genre     101996 non-null  category
 1   in_train  101996 non-null  bool    
 2   filename  101996 non-null  object  
dtypes: bool(1), category(1), object(1)
memory usage: 997.6+ KB
None


In [7]:
pd.set_option('display.expand_frame_repr', False)
print(df.columns)
print(df.head())

Index(['genre', 'in_train', 'filename'], dtype='object')
                      genre  in_train    filename
0                  abstract      True  102257.jpg
1                  abstract      True   75232.jpg
2     mythological painting      True   29855.jpg
3                  abstract      True   62252.jpg
4  bird-and-flower painting     False   49823.jpg


In [8]:
def get_image_index(filename):
  for index in range(df.shape[0]): #n_row
    if( df["filename"][index] == filename ):
      return index
  return -1

In [9]:
def get_genre_by_filename(filename):
  index = get_image_index(filename)
  return df["genre"][index] 

In [10]:
def preprocess_images_from_folder(folder, save_folder_name):
  save_folder = os.path.abspath(os.path.join(folder, os.pardir))
  save_folder = os.path.join(save_folder, save_folder_name)

  if not os.path.exists( save_folder ):
    os.makedirs( save_folder )

  N = len( os.listdir(folder) )
  n_file = 0

  for filename in os.listdir(folder):
    img = cv.imread(os.path.join(folder,filename))
    try:
      img = preprocess_cv2_image_resnet(img)
    except Exception as e:
      print(str(e))
      print("Problem with image: " +filename)
      img = None


    if n_file % 10 == 0:
      clear_output(wait=True)
      print("{} / {} " .format(n_file, N))
    n_file = n_file + 1 

    if img is not None:
      index = get_image_index(filename)
      if index != -1:
        genre = df["genre"][index]
        temp_path = os.path.join(save_folder, genre)
        if not os.path.exists( temp_path ):
          os.makedirs( temp_path )
        np.save( os.path.join(temp_path, filename), img)
  
  clear_output(wait=True)
  print("{} / {} " .format(N, N))
  return True

In [11]:
#preprocess_images_from_folder( train_folder, "resized_train")

In [None]:
preprocess_images_from_folder( test_folder, "resized_test")

21410 / 23817 


In [None]:
train_dir = os.path.join(train_folder, "resized_train")
test_dir = os.path.join(test_folder, "resized_test")

In [None]:
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()

In [None]:
batch_size = 32

num_classes = 42 #n_genre
input_shape = (224, 224, 3)

train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=input_shape,
        color_mode='rgb',
        batch_size=batch_size,
        subset='training',
        #class_mode='sparse'
        )

validation_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=input_shape,
        color_mode='rgb',
        batch_size=batch_size,
        subset='validation',
        #class_mode='sparse'
        )

test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=input_shape,
        color_mode='rgb',
        batch_size=batch_size,
        #class_mode='sparse'
        )

## Models

In [None]:
base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=Input(shape=(224, 224, 3)) )
#base_model.summary()

In [None]:
head_model = base_model.output
head_model = AveragePooling2D(pool_size=(7, 7))(head_model)
head_model = Flatten(name="flatten")(head_model)
head_model = Dense(256, activation="relu")(head_model)
head_model = Dropout(0.5)(head_model)
head_model = Dense( num_classes , activation="softmax")(head_model)

In [None]:
model = Model(inputs=base_model.input, outputs=head_model)
#model.summary()

In [None]:
for layer in base_model.layers:
	layer.trainable = False

In [None]:
f1_micro = tfa.metrics.F1Score(num_classes=num_classes, average='micro')
f1_macro = tfa.metrics.F1Score(num_classes=num_classes, average='macro')

In [None]:
# compile the model
model.compile(loss="categorical_crossentropy", #sparse_categorical_crossentropy
              optimizer="adam",
              metrics=["accuracy", f1_micro, f1_macro] )

# Train

In [None]:
# Train it on the data for some epochs
epochs = 50

history = model.fit(train_generator, epochs=epochs, validation_data=validation_generator)

In [None]:
model.save(data_folder + 'resnet_finetuing_model')

In [None]:
from matplotlib import pyplot as plt
#x_plot = list(range(1,epochs+1))

def plot_history(network_history):
    epochs = len( history.history['loss'] )
    x_plot = list(range(1,epochs+1))
    
    plt.figure()
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.plot(x_plot, network_history.history['loss'])
    plt.plot(x_plot, network_history.history['val_loss'])
    plt.legend(['Training', 'Validation'])

    plt.figure()
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.plot(x_plot, network_history.history['accuracy'])
    plt.plot(x_plot, network_history.history['val_accuracy'])
    plt.legend(['Training', 'Validation'], loc='lower right')
    plt.show()

In [None]:
plot_history(history)

In [None]:
eval_result = model.evaluate(test_generator)
print("[test loss, test accuracy]:", eval_result)

In [None]:
test_pred = model.predict(test_generator)

In [None]:
y_pred = []
for pred in test_pred:
  y_pred.append( np.argmax(pred) )

In [None]:
from sklearn.metrics import f1_score

y_true = test_generator.labels

f1_test = f1_score(y_true, y_pred, average='macro')
print('Average f1_score: {} \n' .format(f1_test) )

print('F1-SCORE FOR EACH CLASS')
print('-----------------------')
av_f1_score = f1_score(y_true, y_pred, average=None)
for i in range(len(av_f1_score)):
  print('{} : {} '.format( i, av_f1_score[i]))

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay( confusion_matrix=cm )

disp.plot()
frame1 = plt.gca()
frame1.axes.get_xaxis().set_visible(False)
plt.show()