<a href="https://colab.research.google.com/github/seecode4/seeRepo1/blob/main/Student_MLE_MiniProject_Fine_Tuning_m3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.backend import resize_images
from sklearn.model_selection import train_test_split

from tensorflow import keras as K
from tensorflow.keras.optimizers import Adagrad
from tensorflow.keras.layers import Input, Lambda, Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.initializers import RandomNormal, RandomUniform

In [None]:
# Load the CIFAR-10 dataset
cifar_home = 'https://www.cs.toronto.edu/~kriz/cifar.html'
# from tensorflow.keras.datasets import cifar10
%cd /content
!pwd
!rm -fr cifar*
!ls
print('---------')
(x_train_val, y_train_val), (x_test, y_test) = cifar10.load_data()
assert x_train_val.shape == (50000, 32, 32, 3)
assert x_test.shape == (10000, 32, 32, 3)
assert y_train_val.shape == (50000, 1)
assert y_test.shape == (10000, 1)
print(f'x_train_val type: {type(x_train_val)}, ndim:{x_train_val.ndim}, shape:{x_train_val.shape}')
print(f'{x_train_val[0][0][0:10]}')

/content
/content
sample_data
---------
Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
x_train_val type: <class 'numpy.ndarray'>, ndim:4, shape:(50000, 32, 32, 3)
[[ 59  62  63]
 [ 43  46  45]
 [ 50  48  43]
 [ 68  54  42]
 [ 98  73  52]
 [119  91  63]
 [139 107  75]
 [145 110  80]
 [149 117  89]
 [149 120  93]]


In [None]:
# One-hot encode the labels - with to_categorical
num_classes = 10
y_train_val_1hot = to_categorical(y_train_val, num_classes)
y_test_1hot= to_categorical(y_test, num_classes)
print(y_train_val_1hot.shape, y_test_1hot.shape)

(50000, 10) (10000, 10)


In [None]:
# Normalize the pixel values to [0, 1]
np.set_printoptions(precision=4)
X_train_val = x_train_val/255
print(f'X_train_val:\n {X_train_val[0][0][0:10]}...')
X_test = x_test/255
print(f'X_test:\n {X_test[0][0][0:10]}...')

X_train_val:
 [[0.2314 0.2431 0.2471]
 [0.1686 0.1804 0.1765]
 [0.1961 0.1882 0.1686]
 [0.2667 0.2118 0.1647]
 [0.3843 0.2863 0.2039]
 [0.4667 0.3569 0.2471]
 [0.5451 0.4196 0.2941]
 [0.5686 0.4314 0.3137]
 [0.5843 0.4588 0.349 ]
 [0.5843 0.4706 0.3647]]...
X_test:
 [[0.6196 0.4392 0.1922]
 [0.6235 0.4353 0.1843]
 [0.6471 0.4549 0.2   ]
 [0.651  0.4627 0.2078]
 [0.6275 0.4392 0.1804]
 [0.6118 0.4275 0.1608]
 [0.6353 0.451  0.1843]
 [0.6235 0.4431 0.1765]
 [0.6196 0.4353 0.1725]
 [0.6235 0.4431 0.1608]]...


In [None]:
# Config to make results repeatable/deterministic
tf.config.experimental.enable_op_determinism()
SEED=42
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.keras.utils.set_random_seed(SEED)

In [None]:
base_model = VGG16(
    weights='imagenet',  # load weights pretrained on ImageNet
    include_top=False,   # exclude the top classifier
    input_shape=(32, 32, 3),
    pooling='max'        # add a global max pooling layer after each layer
)
# Freeze the the trainable param (weights and biases) so they are not retrained
for layer in base_model.layers:
   layer.trainable = False
# print(base_model.summary())

model3 = Sequential()
initializer = RandomNormal(mean=0.0, stddev=1.0, seed=SEED)
for layer in base_model.layers[:-1]:
    model3.add(layer)

# model3.add(Flatten())
model3.add(GlobalAveragePooling2D(data_format='channels_last'))
model3.add(Dense(256, activation="relu", kernel_initializer='he_normal'))

# Try to prevent overfitting
model3.add(tf.keras.layers.Dropout(.5))
model3.add(Dense(256, activation="relu", kernel_initializer='he_normal'))

# Add the final classification layer with 10 units (for CIFAR-10 classes) and softmax activation
model3.add(Dense(10, activation='softmax', kernel_initializer=initializer))

# Compile the model - use Adagrad optimizer
model3.compile(Adagrad(learning_rate=0.001, use_ema=True, ema_momentum=0.9),
               loss='categorical_crossentropy', metrics=['accuracy'])
print(model3.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 block1_conv1 (Conv2D)       (None, 32, 32, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 32, 32, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 16, 16, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 16, 16, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 16, 16, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 8, 8, 128)         0         
                                                                 
 block3_conv1 (Conv2D)       (None, 8, 8, 256)        

In [None]:
# # Data Augmentation
# datagen_aug = ImageDataGenerator(rotation_range=20,
#                              width_shift_range=0.2,
#                              height_shift_range=0.2,
#                              horizontal_flip=True
#                              )
# datagen_aug.fit(X_train_val)

In [None]:
%%time
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint
# def lr_scheduler(epoch):
#     return 0.002 * (0.5 ** (epoch // 20))
# reduce_lr = LearningRateScheduler(lr_scheduler)

# Define a callback to monitor val_accuracy
monitor_acc = EarlyStopping(monitor='accuracy',
                       min_delta=0.001, patience=4)

# Train the model
print(X_train_val.shape, y_train_val_1hot.shape)
model3.fit(X_train_val, y_train_val_1hot, epochs=50, batch_size=64,
          validation_split=0.2, validation_freq=2, verbose=1,
          callbacks= [monitor_acc])

(50000, 32, 32, 3) (50000, 10)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
CPU times: user 13h 36min 49s, sys: 14h 19min 55s, total: 1d 3h 56min 45s
Wall time: 31min 32s


<keras.src.callbacks.History at 0x7e99d43cdb10>

In [None]:
# Train the model
print(X_train_val.shape, y_train_val_1hot.shape)
model3.fit(X_train_val, y_train_val_1hot, epochs=50, batch_size=64,
          validation_split=0.2, validation_freq=2, verbose=1,
          callbacks= [monitor_acc])

(50000, 32, 32, 3) (50000, 10)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50


<keras.src.callbacks.History at 0x7e99d428afb0>

In [None]:
# Train the model
print(X_train_val.shape, y_train_val_1hot.shape)
model3.fit(X_train_val, y_train_val_1hot, epochs=50, batch_size=64,
          validation_split=0.2, validation_freq=2, verbose=1,
          callbacks= [monitor_acc])

(50000, 32, 32, 3) (50000, 10)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50


<keras.src.callbacks.History at 0x7e99c447dc60>

In [None]:
# Train the model
print(X_train_val.shape, y_train_val_1hot.shape)
model3.fit(X_train_val, y_train_val_1hot, epochs=50, batch_size=1024,
          validation_split=0.2, validation_freq=2, verbose=1,
          callbacks= [monitor_acc])

(50000, 32, 32, 3) (50000, 10)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50


<keras.src.callbacks.History at 0x7e99c42a72b0>

In [None]:
# Evaluate the model on the test set
from sklearn.metrics import roc_auc_score, confusion_matrix

y_test_pred = model3.predict(X_test)
print(y_test_1hot.shape, y_test_pred.shape)

(10000, 10) (10000, 10)


In [None]:
test_score = roc_auc_score(y_test_1hot, y_test_pred)
print("roc_auc_score:", test_score)

y_test_pred_val = np.argmax(y_test_pred, axis=1).reshape(-1,)
y_test_val = np.ravel(y_test)
print(y_test_val.shape, y_test_pred_val.shape)
print(type(y_test), type(y_test_pred_val))

cm = confusion_matrix(y_test_val, y_test_pred_val)
print(cm)

roc_auc_score: 0.8899618222222221
(10000,) (10000,)
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
[[542  40 104  33  34   6  13  21 141  66]
 [ 23 577  19  39  13  18  35  26  47 203]
 [ 69  32 405  61 189  48 114  38  21  23]
 [ 12  69  86 313 133 107 122  39  22  97]
 [ 18  17  72  43 621  20  69  71  36  33]
 [  9  38 110 195 110 347  53  85   9  44]
 [  8  38  61  68 148  30 601   6  13  27]
 [ 13  22  74  53 166  46  12 495  16 103]
 [ 82  83  19  18  38   2  11   6 644  97]
 [ 32 156  14  38  35   8  23  40  57 597]]


Summary in Student_MLE_MiniProject_Fine_Tuning_m3a.ipynb