<a href="https://colab.research.google.com/github/sultanmr/cifar-resnet/blob/main/train_cifar10_resnet50.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

1. Import Libraries

# 1. Import Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Dense,
    GlobalAveragePooling2D,
    Input,
    BatchNormalization,
    Flatten,
    Activation,
    Dropout
)
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import (
    EarlyStopping,
    ReduceLROnPlateau,
    TensorBoard
)
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.regularizers import l2


# 2. Load and Limit the Dataset

In [None]:
#as per requirements from sprint
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()

n = 10000
train_images = train_images[:n]
train_labels = train_labels[:n]

# 3. Preprocess the Data

In [None]:
#converting to gray scale
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

#converting to one hot encoding
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)


# 4. Set Up the ResNet50 Base Model

In [None]:
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(32, 32, 3))
base_model.trainable = False  # Freeze base model
#include_top is set to false because we don't want to include the top layer of the model

# 5. Build the Custom Head

In [None]:
 model = Sequential([
  base_model,
  GlobalAveragePooling2D(),
  Dense(512, activation='relu', kernel_regularizer=l2(0.01)),
  BatchNormalization(),
  Dropout(0.5),
  Dense(256, activation='relu', kernel_regularizer=l2(0.01)),
  BatchNormalization(),
  Dropout(0.5),
  Dense(128, activation='relu', kernel_regularizer=l2(0.01)),
  BatchNormalization(),
  Dropout(0.5),
  Dense(10, activation='softmax')
])

# 6. Compile the Model

In [None]:
#categorical_crossentropy is being used because we do have multiple classes in one hot encoding and we want to predict the class with the highest probability
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#  7. Train the Head

In [None]:
history = model.fit(train_images, train_labels, epochs=10, validation_split=0.2)

Epoch 1/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 536ms/step - accuracy: 0.2725 - loss: 2.1523 - val_accuracy: 0.3055 - val_loss: 6.8729
Epoch 2/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 509ms/step - accuracy: 0.4392 - loss: 1.6621 - val_accuracy: 0.3700 - val_loss: 1.7948
Epoch 3/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 524ms/step - accuracy: 0.5114 - loss: 1.4413 - val_accuracy: 0.4570 - val_loss: 1.6192
Epoch 4/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m136s[0m 542ms/step - accuracy: 0.5768 - loss: 1.2472 - val_accuracy: 0.5820 - val_loss: 1.2667
Epoch 5/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 531ms/step - accuracy: 0.6329 - loss: 1.0777 - val_accuracy: 0.5325 - val_loss: 1.4484
Epoch 6/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 537ms/step - accuracy: 0.7082 - loss: 0.9202 - val_accuracy: 0.5285 - val_loss: 1.6134
Epoc

In [None]:
from tensorflow.keras.callbacks import TensorBoard
import datetime

# Create a log directory with a timestamp
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# Create TensorBoard callback
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# 8. Unfreeze and Train the Whole Model

In [None]:
base_model.trainable = True
model.compile(optimizer=Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history_fine = model.fit(train_images, train_labels, epochs=50, validation_split=0.2, callbacks=[early_stopping, tensorboard_callback])

Epoch 1/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 677ms/step - accuracy: 0.9428 - loss: 0.2314 - val_accuracy: 0.7155 - val_loss: 0.9547
Epoch 2/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 583ms/step - accuracy: 0.9496 - loss: 0.2025 - val_accuracy: 0.7165 - val_loss: 0.9724
Epoch 3/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 588ms/step - accuracy: 0.9560 - loss: 0.1826 - val_accuracy: 0.7160 - val_loss: 0.9816
Epoch 4/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 576ms/step - accuracy: 0.9565 - loss: 0.1764 - val_accuracy: 0.7200 - val_loss: 0.9871
Epoch 5/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 570ms/step - accuracy: 0.9613 - loss: 0.1662 - val_accuracy: 0.7190 - val_loss: 0.9956
Epoch 6/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 585ms/step - accuracy: 0.9578 - loss: 0.1664 - val_accuracy: 0.7165 - val_loss: 1.0040


# 9. Evaluate the Model

In [None]:
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f"Test accuracy:{test_acc*100:.2f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 140ms/step - accuracy: 0.7254 - loss: 0.9398
Test accuracy:72.40


# Ctrl + S

In [None]:
#View Results: https://dagshub.com/sultanmr/my-first-repo.mlflow/#/experiments/2/runs/bfc550c5403b44c0a980c0629be2de58/artifacts
#saved to load on dagshub using mlflow code is in mlflow-dagshub.py
#saved to load on streamlit based ui code is in app.py
model.save('resnet50_model.h5')

full_history = {
    'accuracy': history_fine.history['accuracy'] ,
    'val_accuracy': history_fine.history['val_accuracy'],
    'loss': history_fine.history['loss'],
    'val_loss': history_fine.history['val_loss']
}
#save test data for viz of train loss and accuracy on dagshub, code is in mlflow-dagshub.py
np.savez('history.npz', **full_history)

#saved to load on dagshub using mlflow code is in mlflow-dagshub.py with confusion matrix
np.savez('test_data.npz', images=test_images, labels=test_labels)



In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(full_history['accuracy'], label='Train Acc')
plt.plot(full_history['val_accuracy'], label='Val Acc')
plt.legend()
plt.title('Accuracy')

    # Loss
plt.subplot(1, 2, 2)
plt.plot(full_history['loss'], label='Train Loss')
plt.plot(full_history['val_loss'], label='Val Loss')
plt.legend()
plt.title('Loss')

plt.tight_layout()
plt.show()
