In [15]:
# Mount Google Drive

from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [16]:
# import os

# os.chdir("gdrive/MyDrive")

In [17]:
%cd ./'Colab Notebooks'

[Errno 2] No such file or directory: './Colab Notebooks'
/content/gdrive/MyDrive/Colab Notebooks


In [18]:
%pwd

'/content/gdrive/My Drive/Colab Notebooks'

In [19]:
# setup, importing libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.metrics import classification_report as cr, confusion_matrix as cm

print("TensorFlow version:", tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

TensorFlow version: 2.15.0
Num GPUs Available:  0


In [20]:
# loading/preprocessing data

# file paths
train_path = "/content/gdrive/MyDrive/data/mnist_train.csv"
test_path = "/content/gdrive/MyDrive/data/mnist_test.csv"

# loading data
train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)

# Print to check the upload
print("First 5 rows of training data:")
print(train_data.head())
print("First 5 rows of test data:")
print(test_data.head())

# separating features and labels
X_train = train_data.iloc[:, 1:].values / 255.0 # Normalizing pixel values
Y_train = train_data.iloc[:, 0].values
X_test = test_data.iloc[:, 1:].values / 255.0 # Normalizing pixel values
Y_test = test_data.iloc[:, 0].values

# Print the shape of the datasets
print("Shape of X_train:", X_train.shape)
print("Shape of Y_train:", Y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of Y_test:", Y_test.shape)

Y_train = tf.keras.utils.to_categorical(Y_train, num_classes=10)
Y_test = tf.keras.utils.to_categorical(Y_test, num_classes=10)

# Print the first 5 one-hot encoded labels to check
print("First 5 one-hot encoded labels for Y_train:")
print(Y_train[:5])

First 5 rows of training data:
   label  1x1  1x2  1x3  1x4  1x5  1x6  1x7  1x8  1x9  ...  28x19  28x20  \
0      5    0    0    0    0    0    0    0    0    0  ...      0      0   
1      0    0    0    0    0    0    0    0    0    0  ...      0      0   
2      4    0    0    0    0    0    0    0    0    0  ...      0      0   
3      1    0    0    0    0    0    0    0    0    0  ...      0      0   
4      9    0    0    0    0    0    0    0    0    0  ...      0      0   

   28x21  28x22  28x23  28x24  28x25  28x26  28x27  28x28  
0      0      0      0      0      0      0      0      0  
1      0      0      0      0      0      0      0      0  
2      0      0      0      0      0      0      0      0  
3      0      0      0      0      0      0      0      0  
4      0      0      0      0      0      0      0      0  

[5 rows x 785 columns]
First 5 rows of test data:
   label  1x1  1x2  1x3  1x4  1x5  1x6  1x7  1x8  1x9  ...  28x19  28x20  \
0      7    0    0    0  

In [21]:
# Defining Neural Network model
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=( 784,)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# verifying model architecture
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 128)               100480    
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dense_5 (Dense)             (None, 10)                650       
                                                                 
Total params: 109386 (427.29 KB)
Trainable params: 109386 (427.29 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [22]:
# Compiling model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
print("Model compiled succesfully")

Model compiled succesfully


In [23]:
from tensorflow.keras.callbacks import ModelCheckpoint


# filepath: Where to save the model file.
# monitor: Quantity to monitor (e.g., 'val_loss').
# save_best_only: If True, saves only the best model (based on the monitored quantity).
# save_weights_only: If True, saves only the model weights instead of the full model.
# mode: One of {'auto', 'min', 'max'}; decides whether to minimize or maximize the monitored quantity.
# verbose: Verbosity mode.
checkpoint = ModelCheckpoint('model_checkpoint.h5',
                             monitor='val_loss',
                             save_best_only=True,
                             save_weights_only=False,
                             mode='min',
                             verbose=1)

# Training the model
history = model.fit(X_train, Y_train, epochs=10, batch_size=32,
                    validation_data=(X_test, Y_test), callbacks=[checkpoint])

# Print the keys of the training history to check the training process
print("Training history keys:", history.history.keys())

# weights_specific_layer = model.get_layer('layer_name').get_weights()
weights_all = model.get_weights()

# Summary of the model
model.summary()

# Save model weights
# Checkpointing: You can save the current state of the model's weights during or after training. This is useful for resuming training or for future inference without needing to retrain the model.
# Experimentation: When running multiple experiments, you can save weights at different stages to compare performance.
# Deployment: Saved weights can be loaded into the model for deployment in a production environment.
model.save_weights('model_weights.h5')

# Load model weights
# Resuming Training: If training was interrupted, you can resume from the last saved state.
# Inference: Load pre-trained weights to use the model for making predictions without needing to retrain.
# Testing and Validation: Load specific saved weights to validate or test the model performance under those specific conditions.
# model.load_weights('model_weights.h5')

# Google Colab saves training weights. Restart session to train afresh.

# During an epoch, the model iterates over each batch of the dataset.
# For each batch, the following steps occur -
# Forward Pass: The model makes predictions on the batch of samples.
# Loss Calculation: The loss function calculates the error between the predictions and the true labels.
# Backward Pass: Backpropagation computes the gradients of the loss with respect to the model's weights.
# Weight Update: The optimizer updates the model's weights based on the gradients.

# History object, which is stored in the history variable. This object contains a record of training loss values
# and metrics values at successive epochs, as well as validation loss values and validation metrics values (if applicable).

# History object can be used to plot training and validation metrics

# import matplotlib.pyplot as plt

# # Plot training & validation accuracy values
# plt.plot(history.history['accuracy'])
# plt.plot(history.history['val_accuracy'])
# plt.title('Model accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Validation'], loc='upper left')
# plt.show()

# # Plot training & validation loss values
# plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
# plt.title('Model loss')
# plt.ylabel('Loss')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Validation'], loc='upper left')
# plt.show()


Epoch 1/10
Epoch 1: val_loss improved from inf to 0.12710, saving model to model_checkpoint.h5


  saving_api.save_model(


Epoch 2/10
Epoch 2: val_loss improved from 0.12710 to 0.09540, saving model to model_checkpoint.h5
Epoch 3/10
Epoch 3: val_loss improved from 0.09540 to 0.08939, saving model to model_checkpoint.h5
Epoch 4/10
Epoch 4: val_loss improved from 0.08939 to 0.07069, saving model to model_checkpoint.h5
Epoch 5/10
Epoch 5: val_loss did not improve from 0.07069
Epoch 6/10
Epoch 6: val_loss did not improve from 0.07069
Epoch 7/10
Epoch 7: val_loss did not improve from 0.07069
Epoch 8/10
Epoch 8: val_loss did not improve from 0.07069
Epoch 9/10
Epoch 9: val_loss did not improve from 0.07069
Epoch 10/10
Epoch 10: val_loss did not improve from 0.07069
Training history keys: dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 128)               100480    
                                                  

In [24]:
# evaluating the model
train_loss, train_accuracy = model.evaluate(X_train, Y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test, Y_test, verbose=0)

print(f"Training Accuracy : {train_accuracy}")
print(f"Test Accuracy : {test_accuracy}")

Training Accuracy : 0.9949333071708679
Test Accuracy : 0.9767000079154968


In [None]:
# Generating predictions
train_predictions = model.predict(X_train)
test_predictions = model.predict(X_test)

# np.argmax is a NumPy function that returns the indices of the maximum values along a
# specified axis. Models often output probabilities for each class in classification problems.
# To convert these probabilities into actual class predictions, np.argmax is used to find the
# class with the highest probability
train_pred_classes = np.argmax(train_predictions, axis=1)
test_pred_classes = np.argmax(test_predictions, axis=1)

Y_train_classes = np.argmax(Y_train, axis=1)
Y_test_classes = np.argmax(Y_test, axis=1)

# Print the first 5 predictions and their corresponding true labels for training and test data
print("First 5 training predictions:", train_pred_classes[:5])
print("First 5 training true labels:", Y_train_classes[:5])
print("First 5 test predictions:", test_pred_classes[:5])
print("First 5 test true labels:", Y_test_classes[:5])

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
# Cell 8: Classification Report and Confusion Matrix for Training Data
print("Training Classification Report:")
print(classification_report(Y_train_classes, train_pred_classes))

print("Training Confusion Matrix:")
print(confusion_matrix(Y_train_classes, train_pred_classes))