# Problem 1

# Problem 2

In [12]:
from keras.datasets import cifar10
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input
from keras.optimizers import Adam
from keras.models import Model
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.applications import VGG16
import keras
import mlflow
import mlflow.keras

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import matplotlib
from sklearn.metrics import confusion_matrix


matplotlib.style.use('ggplot')

In [13]:
(x_train_val, y_train_val), (x_test, y_test) = cifar10.load_data()

# split further into training and validation sets
n_train = 4_000
n_val = 1_000
x_train = x_train_val[:n_train]
x_val = x_train_val[n_train:n_train + n_val]
y_train = to_categorical(y_train_val[:n_train])
y_val = to_categorical(y_train_val[n_train:n_train + n_val])
y_test = to_categorical(y_test)


In [17]:
# Create my keras models
inputs = Input((32, 32, 3))
x = keras.applications.vgg16.preprocess_input(inputs)
vgg_model = VGG16(include_top = False)(x)
vgg_model.trainable = False

flatten = Flatten()(vgg_model)
dense = Dense(128, activation='relu')(flatten)
output = Dense(10, activation='softmax')(dense)

pretrained_model = Model(inputs=inputs, outputs=output)

pretrained_model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
pretrained_model.summary()


In [18]:
# Train the model
mlflow.keras.autolog()
callbacks = [EarlyStopping(monitor='val_loss', patience=10, mode='min', restore_best_weights=True)]
history = pretrained_model.fit(x_train, y_train, batch_size=128, epochs=100, validation_data=(x_val, y_val), callbacks=callbacks)

2024/04/25 19:14:14 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'c90f9633bacc42df9d69b5b15919dcc7', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current keras workflow


Epoch 1/100
[1m22/32[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m31s[0m 3s/step - accuracy: 0.1065 - loss: 19.7298

KeyboardInterrupt: 

In [None]:

# Plot the training and validation loss curves
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.show()

In [None]:

# Predict on validation data
y_pred = pretrained_model.predict(x_val, verbose=False)
y_pred_classes = np.argmax(y_pred, axis=1)
y_val_classes = np.argmax(y_val, axis=1)

class_labels = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

# Create confusion matrix and plot as a heatmap
cm = confusion_matrix(y_val_classes, y_pred_classes)

plt.figure(figsize=(10, 10))
sns.heatmap(cm)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.xticks(ticks=np.arange(10) + 0.5, labels=class_labels, rotation=45)
plt.yticks(ticks=np.arange(10) + 0.5, labels=class_labels, rotation=45)
plt.show()
