In [1]:
# ========================================================
# To train and test a classifier using Transfer Learning.
# =======================================================

#--- Import necessary modules from Python libraries.
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.applications import vgg16, mobilenet
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import Accuracy, Precision, Recall, AUC
import matplotlib.pyplot as plt
import numpy as np
import cv2, os, pickle
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

2024-11-24 14:44:47.455186: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-24 14:44:47.476890: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732437887.500348    7774 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732437887.506043    7774 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-24 14:44:47.530938: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
def test_classifier(storage_dir, testX, testY):
	#--- Load trained model
	# model = build_model()	
	# model_path = storage_dir + 'VGG16_Classifier.weights.h5'
	model_path = storage_dir + 'VGG16_Classifier.weights.keras'
	# model_weights = model.load_weights(model_path)
	model = load_model(model_path)
	
	#--- Compile model when we need metrics not mentioned while training
	model.compile(loss = 'categorical_crossentropy', metrics = ['accuracy', Precision()])
	
	#--- Predict model's output
	predictedY = np.argmax(model.predict(testX), axis = -1)
	int_testY = np.argmax(testY, axis = -1)
	# n = predictedY.shape[0]
	n = 10
	print('Original_Y 	Predicted_Y')
	print('========== 	===========')	
	for i in range(n):
		print('{}                 {}'.format(int_testY[i], predictedY[i]))
    
    #--- Evaluate model performance
	test_metrics = model.evaluate(testX, testY)
	
	return test_metrics

In [3]:
def train_classifier(storage_dir, trainData, valX, valY):
    #--- Build model
    model = build_model()
    model.summary(show_trainable = True)
    
    #--- Freez backbone
    for layer in model.layers[:-5]:
        layer.trainable = False
    model.summary(show_trainable = True)
    
    #--- Train model
    model.fit(trainData, validation_data = (valX, valY), epochs = WARMUP_EPOCHS) #--- Warm-up training
    
    #--- Unfreez some Convolutional layers of backbone for fine-tuning
    for layer in model.layers[-7:-5]:
        layer.trainable = True
    model.summary(show_trainable = True)	
    
    #--- Callbacks
    # model_path = storage_dir + 'VGG16_Classifier.weights.h5'
    model_path = storage_dir + 'VGG16_Classifier.weights.keras'
    callbacks = [
        ModelCheckpoint(model_path, monitor = "val_loss", mode = 'min', save_best_only = True, save_weights_only = False),
        EarlyStopping(monitor = "val_loss", mode = 'min', patience = EARLY_STOP_PATIENCE),
        ReduceLROnPlateau(monitor = "val_loss", mode = 'min', factor = LR_REDUCE_FACTOR, patience = LR_REDUCE_PATIENCE)
    ]

    #--- Train model
    hist = model.fit(trainData, validation_data = (valX, valY), epochs = EPOCHS, callbacks = callbacks) #--- Fine-tuning
    
    #--- Save history
    performance_path = storage_dir + 'TrainVal_'
    save_model_performance(performance_path, hist)
    
    return hist

In [4]:
def save_model_performance(performance_path, history):
	#--- Save history into a dictionary
	hist_dict = history.history
	with open(performance_path + 'PerformanceDict.pkl', 'wb') as f:
		pickle.dump(hist_dict, f)

	#--- Plot progress graphs
	# Plot loss
	x_axis = np.arange(len(hist_dict['loss']))
	plt.rcParams.update({'font.size': 22})
	plt.figure(figsize = (20, 20))
	plt.plot(x_axis, hist_dict['loss'], 'k.--', linewidth = 2, markersize = 12)
	plt.plot(x_axis, hist_dict['val_loss'], 'g*--', linewidth = 2, markersize = 12)
	plt.xlabel('Loss')
	plt.ylabel('Epoch')
	plt.title('Training and Validation Loss')
	plt.xticks(rotation = 90)
	plt.legend(['training_loss', 'validation_loss'])
	plt.savefig(performance_path + 'Loss.jpg')
	plt.close()

	# Plot accuracy
	metric = 'accuracy'
	plt.rcParams.update({'font.size': 22})
	plt.figure(figsize = (20, 20))
	plt.plot(x_axis, hist_dict[metric], 'k.--', linewidth = 2, markersize = 12)
	plt.plot(x_axis, hist_dict['val_' + metric], 'g*--', linewidth = 2, markersize = 12)
	plt.xlabel('Accuracy')
	plt.ylabel('Epoch')
	plt.title('Training and Validation Accuracy')
	plt.xticks(rotation = 90)
	plt.legend(['training_' + metric, 'validation_' + metric])
	plt.savefig(performance_path + metric + '.jpg')
	plt.close()


In [5]:
def process_data():
    #-- Load data
    (trainX_full, trainY_full), (testX, testY) = cifar10.load_data()

    # Split the training data into training and validation sets
    trainX, valX, trainY, valY = train_test_split(trainX_full, trainY_full, test_size=0.2)
    
    #--- Turn 3D image dataset into 4D dataset for Conv2D layers
    print('trainX.shape: {}, trainX.dtype: {}'.format(trainX.shape, trainX.dtype))
    print('valX.shape: {}, valX.dtype: {}'.format(valX.shape, valX.dtype))
    print('testX.shape: {}, testX.dtype: {}'.format(testX.shape, testX.dtype))
    
    # resize data
    trainX = resize_images(trainX)
    valX = resize_images(valX)
    testX = resize_images(testX)
    print('trainX.shape: {}, trainX.dtype: {}'.format(trainX.shape, trainX.dtype))
    print('valX.shape: {}, valX.dtype: {}'.format(valX.shape, valX.dtype))
    print('testX.shape: {}, testX.dtype: {}'.format(testX.shape, testX.dtype))

    #--- Preprocess imageset according to the preprocess procedure of pre-trained model
    trainX = vgg16.preprocess_input(trainX)
    valX = vgg16.preprocess_input(valX)
    testX = vgg16.preprocess_input(testX)
    print('trainX.shape: {}, trainX.dtype: {}'.format(trainX.shape, trainX.dtype))
    print('valX.shape: {}, valX.dtype: {}'.format(valX.shape, valX.dtype))
    print('testX.shape: {}, testX.dtype: {}'.format(testX.shape, testX.dtype))
            
    #--- Turn y as one-hot-encoding
    print('trainY.shape: {}, trainY.dtype: {}'.format(trainY.shape, trainY.dtype))
    print('valY.shape: {}, valY.dtype: {}'.format(valY.shape, valY.dtype))
    print('testY.shape: {}, testY.dtype: {}'.format(testY.shape, testY.dtype))
    trainY = to_categorical(trainY, NUM_CLASSES)
    valY = to_categorical(valY, NUM_CLASSES)
    testY = to_categorical(testY, NUM_CLASSES)
    print('trainY.shape: {}, trainY.dtype: {}'.format(trainY.shape, trainY.dtype))
    print('valY.shape: {}, valY.dtype: {}'.format(valY.shape, valY.dtype))
    print('testY.shape: {}, testY.dtype: {}'.format(testY.shape, testY.dtype))

    #--- Apply data augmentation to the training set
    datagen = ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
    )
    
    # Fit the generator to the training data (optional but can improve augmentation results)
    datagen.fit(trainX)

    # produce a new batch with augmented data
    trainData = datagen.flow(trainX, trainY, batch_size=32)
    
    #--- Cross check
    # plt.imshow(trainX[0])
    # plt.title(trainY[0])
    # plt.show()
    # plt.close()
    
    # return trainData, (valX, valY), (testX, testY)
    return trainData, (valX, valY), (testX, testY)

In [6]:
# def convert_3D_to_4D(x):
# 	n, h, w = x.shape
# 	x4D = np.zeros((n, IMG_SIZE, IMG_SIZE, 3), dtype = np.uint8)
# 	for i in range(n):
# 		#--- Resize image
# 		resized_img = cv2.resize(x[i], (IMG_SIZE, IMG_SIZE))
		
# 		#--- Convert 2D image into 3D image
# 		x4D[i] = cv2.cvtColor(resized_img, cv2.COLOR_GRAY2RGB) 
# 	return x4D

In [7]:
def resize_images(images):
    # Resize images to (224, 224, 3) as required by the VGG16 model
    resized_images = np.zeros((images.shape[0], IMG_SIZE, IMG_SIZE, 3), dtype=np.uint8)
    for i in range(images.shape[0]):
        resized_images[i] = cv2.resize(images[i], (IMG_SIZE, IMG_SIZE))
    return resized_images

In [8]:
def build_model():
	#--- Load a pre-trained backbone
	base_model = vgg16.VGG16(include_top = False, weights = 'imagenet', input_shape = (IMG_SIZE, IMG_SIZE, 3))
	base_model.summary(show_trainable = True)
		
	#--- Build a new model based on loaded backbone
	inputs = base_model.input
	x = base_model.output
	x = layers.Flatten()(x)
	x = layers.Dense(128, activation = 'relu')(x)
	x = layers.Dense(64, activation = 'relu')(x)	
	outputs = layers.Dense(10, activation = 'softmax')(x)
	model = Model(inputs, outputs)
	
	#--- Compile model
	model.compile(loss = 'categorical_crossentropy', metrics = ['accuracy'])
	
	return model

In [9]:
#--- Fixed terms
WORKING_DIR = '/home/mursalin/m3c/computer-vision/task/'  
IMG_SIZE = 32
EARLY_STOP_PATIENCE = 50
LR_REDUCE_PATIENCE = 10
LR_REDUCE_FACTOR = 0.8 #--- new_lr = old_lr * LR_REDUCE_FACTOR
NUM_CLASSES = 10
WARMUP_EPOCHS = 10
EPOCHS = 100
# BATCH_SIZE = 128

In [10]:
#--- Create a directory to store model and figures
storage_dir = WORKING_DIR + 'aug/' 
if (os.path.exists(storage_dir) == False):
    os.makedirs(storage_dir)
else:
    print(storage_dir + ' exists.')
    
#--- Prepare data
trainData, (valX, valY), (testX, testY) = process_data()
	
#--- Train a classifier using Transfer learning
history = train_classifier(storage_dir, trainData, valX, valY)

#--- Test trained classifier
test_metrics = test_classifier(storage_dir, testX, testY)


trainX.shape: (40000, 32, 32, 3), trainX.dtype: uint8
valX.shape: (10000, 32, 32, 3), valX.dtype: uint8
testX.shape: (10000, 32, 32, 3), testX.dtype: uint8
trainX.shape: (40000, 32, 32, 3), trainX.dtype: uint8
valX.shape: (10000, 32, 32, 3), valX.dtype: uint8
testX.shape: (10000, 32, 32, 3), testX.dtype: uint8
trainX.shape: (40000, 32, 32, 3), trainX.dtype: float32
valX.shape: (10000, 32, 32, 3), valX.dtype: float32
testX.shape: (10000, 32, 32, 3), testX.dtype: float32
trainY.shape: (40000, 1), trainY.dtype: uint8
valY.shape: (10000, 1), valY.dtype: uint8
testY.shape: (10000, 1), testY.dtype: uint8
trainY.shape: (40000, 10), trainY.dtype: float64
valY.shape: (10000, 10), valY.dtype: float64
testY.shape: (10000, 10), testY.dtype: float64


I0000 00:00:1732437895.660563    7774 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 18979 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:01:00.0, compute capability: 8.9


Epoch 1/10


  self._warn_if_super_not_called()
I0000 00:00:1732437898.303225    7917 service.cc:148] XLA service 0x73b1500025e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1732437898.303288    7917 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2024-11-24 14:44:58.326178: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1732437898.544481    7917 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  11/1250[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19s[0m 16ms/step - accuracy: 0.1422 - loss: 10.5166

I0000 00:00:1732437900.924836    7917 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 15ms/step - accuracy: 0.4289 - loss: 2.2799 - val_accuracy: 0.6016 - val_loss: 1.1884
Epoch 2/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 13ms/step - accuracy: 0.5905 - loss: 1.2205 - val_accuracy: 0.6105 - val_loss: 1.2359
Epoch 3/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - accuracy: 0.6219 - loss: 1.1356 - val_accuracy: 0.6392 - val_loss: 1.1275
Epoch 4/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - accuracy: 0.6340 - loss: 1.0924 - val_accuracy: 0.6429 - val_loss: 1.1043
Epoch 5/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - accuracy: 0.6422 - loss: 1.0725 - val_accuracy: 0.6490 - val_loss: 1.0941
Epoch 6/10
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - accuracy: 0.6503 - loss: 1.0568 - val_accuracy: 0.6434 - val_loss: 1.1659
Epoch 7/10
[1m

Epoch 1/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - accuracy: 0.6671 - loss: 1.0151 - val_accuracy: 0.6557 - val_loss: 1.1676 - learning_rate: 0.0010
Epoch 2/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - accuracy: 0.6745 - loss: 0.9959 - val_accuracy: 0.6570 - val_loss: 1.1376 - learning_rate: 0.0010
Epoch 3/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - accuracy: 0.6726 - loss: 1.0046 - val_accuracy: 0.6614 - val_loss: 1.1269 - learning_rate: 0.0010
Epoch 4/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 12ms/step - accuracy: 0.6779 - loss: 0.9887 - val_accuracy: 0.6510 - val_loss: 1.2023 - learning_rate: 0.0010
Epoch 5/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - accuracy: 0.6753 - loss: 0.9910 - val_accuracy: 0.6592 - val_loss: 1.1416 - learning_rate: 0.0010
Epoch 6/100
[1m1250/1250[0m [32m━━━━━━━━━━

  saveable.load_own_variables(weights_store.get(inner_path))


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step
Original_Y 	Predicted_Y
3                 3
8                 8
8                 8
0                 0
6                 6
6                 6
1                 1
6                 6
3                 6
1                 1
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.6656 - loss: 1.1469 - precision: 0.7470
