# Transfer learning, finetune VGG16

It has 16 layers with learnable weights, including 13 convolutional layers and 3 fully connected layers.<br>
Pre-trained Weights: The model is pre-trained on the ImageNet dataset, which contains 1.2 million images and 1000 classes.<br>
It is widely used for image classification tasks and can be fine-tuned for specific tasks such as brain tumor classification.

### 2.1 Preprocessing data

In [1]:
import cv2
import os
import random
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from keras.utils import normalize
from PIL import Image
from sklearn.model_selection import train_test_split

  _warn(("h5py is running against HDF5 {0} when it was built against {1}, "


In [3]:
seed = 80
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

#### Reading and nomalize dataset

In [4]:
INPUT_SIZE = 64
image_directory = r'E:/Project/2024 Project/BrainTumor_Lam/datasets/'

no_dir = os.listdir(image_directory + 'sort_crop_no/')
yes_dir = os.listdir(image_directory + 'sort_crop_yes/')

no_H = os.listdir(image_directory + 'test_data_no/')
yes_H = os.listdir(image_directory + 'test_data_yes/')

data_set,label = [],[]

for i , image_name in enumerate(no_dir) :
    if(image_name.split('.')[1] == 'jpg') :
        image=cv2.imread(image_directory+ 'sort_crop_no/' + image_name)
        # crop_image(image)
        image = Image.fromarray(image, 'RGB')
        image = image.resize((INPUT_SIZE, INPUT_SIZE))
        data_set.append(np.array(image))
        label.append(0)


for i , image_name in enumerate(yes_dir) :
    if(image_name.split('.')[1] == 'jpg') :
        image=cv2.imread(image_directory+ 'sort_crop_yes/' + image_name)
        # crop_image(image)
        image = Image.fromarray(image, 'RGB')
        image = image.resize((INPUT_SIZE,INPUT_SIZE))
        data_set.append(np.array(image))
        label.append(1)

for image_name in no_H:
    if image_name.endswith('.jpg'):
        image = cv2.imread(image_directory + 'test_data_no/' + image_name)
        image = Image.fromarray(image, 'RGB')
        image = image.resize((INPUT_SIZE, INPUT_SIZE))
        data_set.append(np.array(image))
        label.append(0)

for image_name in yes_H:
    if image_name.endswith('.jpg'):
        image = cv2.imread(image_directory + 'test_data_yes/' + image_name)
        image = Image.fromarray(image, 'RGB')
        image = image.resize((INPUT_SIZE, INPUT_SIZE))
        data_set.append(np.array(image))
        label.append(1)


print(len(data_set))
print(len(label))

3242
3242


In [5]:
data_set = np.array(data_set)
label = np.array(label)

#### Splitting the data into training and testing

In [6]:
x_train,x_test,y_train,y_test = train_test_split(
    data_set,label,
    test_size=0.2,
    random_state=99
    )
x_train,x_val,y_train,y_val = train_test_split(
        x_train,y_train,
    test_size=0.25,
    random_state=9
)

In [7]:
print(f'X train shape: {x_train.shape}\nY train shape: {y_train.shape}\nX test shape: {x_test.shape}\nY test shape: {y_test.shape})')

X train shape: (1944, 64, 64, 3)
Y train shape: (1944,)
X test shape: (649, 64, 64, 3)
Y test shape: (649,))


#### Nomalize

In [9]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import joblib 
# using scaler of nb4
scaler = joblib.load('scaler.pkl') 
# Reshape data to fit with MinMaxScaler
x_train_reshaped = x_train.reshape(-1, x_train.shape[-1])
x_test_reshaped = x_test.reshape(-1, x_test.shape[-1])
x_val_reshaped = x_val.reshape(-1, x_val.shape[-1])

x_train_reshaped = scaler.fit_transform(x_train_reshaped)
x_test_reshaped = scaler.transform(x_test_reshaped)
x_val_reshaped = scaler.transform(x_val_reshaped)

# Reshape to original shape
x_train = x_train_reshaped.reshape(x_train.shape)
x_test = x_test_reshaped.reshape(x_test.shape)
x_val = x_val_reshaped.reshape(x_val.shape)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Save new scaler method to using with new image in the future

In [10]:
import joblib

# Store scaler element
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

### Loading pre-trained model

In [11]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [12]:
# Load the VGG16 model with pre-trained weights, excluding the top fully connected layers
base_model = VGG16(
    weights=None, # bo hoac none
    include_top=False, 
    input_shape=(64, 64, 3)
    )

# Freeze the convolutional base
for layer in base_model.layers:
    layer.trainable = True 


In [13]:
# Add custom top layers for classification
x = Flatten()(base_model.output)
x = Dense(512, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)  #binary classification 
# note 1 dense +1 activation

In [14]:
# Create the new model
model = Model(inputs=base_model.input, outputs=x)

In [15]:
model.summary()

In [17]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

model.compile(
    optimizer=Adam(learning_rate=1e-4, amsgrad=True),
    loss='binary_crossentropy', 
    metrics=['accuracy']
)

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  # Monitor validation loss
    patience=3,          
    restore_best_weights=True  
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',   
    factor=0.1,           
    patience=3,          
    verbose=1,            
    min_lr=1e-6          
)

# Train the model
history = model.fit(
    x_train, 
    y_train, 
    epochs=10, 
    batch_size=32, 
    shuffle=False,
    validation_data=(x_test, y_test),
    callbacks=[early_stopping, reduce_lr]  
)


Epoch 1/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 1s/step - accuracy: 0.5201 - loss: 0.6918 - val_accuracy: 0.7304 - val_loss: 0.6359 - learning_rate: 1.0000e-04
Epoch 2/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 1s/step - accuracy: 0.7558 - loss: 0.5340 - val_accuracy: 0.8860 - val_loss: 0.2954 - learning_rate: 1.0000e-04
Epoch 3/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 1s/step - accuracy: 0.8314 - loss: 0.3832 - val_accuracy: 0.8860 - val_loss: 0.2728 - learning_rate: 1.0000e-04
Epoch 4/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 1s/step - accuracy: 0.8661 - loss: 0.3174 - val_accuracy: 0.9122 - val_loss: 0.2260 - learning_rate: 1.0000e-04
Epoch 5/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 1s/step - accuracy: 0.8829 - loss: 0.2924 - val_accuracy: 0.9183 - val_loss: 0.1969 - learning_rate: 1.0000e-04
Epoch 6/10
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

Test with new data

In [18]:
y_test_pred = model.predict(x_test)
y_pred = (y_test_pred >0.5).astype(int)
y_test_reshape = y_test.reshape(-1,1)
print("Accuracy in test set:", accuracy_score(y_test_reshape, y_pred))
print('Accuracy in validation set:',history.history['val_accuracy'][-1])

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 254ms/step
Accuracy in test set: 0.9183359013867488
Accuracy in validation set: 0.8983050584793091



Name: Transfer_learning<br>
Accuracy in test set: 0.9183359013867488<br>

In [19]:
save_path = 'E:/Project/2024 Project/BrainTumor_Lam/models/transfer_learning.h5'
model.save(save_path)

