# Train MNIST 

MNIST is the "Hello World" of image classification.
This notebook is almost a carbon copy of [this](https://colab.research.google.com/github/rpi-techfundamentals/fall2018-materials/blob/master/10-deep-learning/04-pytorch-mnist.ipynb#scrollTo=p_K4RdAT4nGA) notebook by Analytics Dojo with some intentional errors built in.

In [None]:
#Import Libraries


import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable



## 1. Hyperparameter Adjustments
- **EPOCHS**: Increased from 2 to 30 to provide the model with more opportunities to learn from the data, which should improve overall performance.
- **LEARNING_RATE**: Reduced from 0.01 to 0.001. This change allows for smaller, more controlled parameter updates, leading to a more stable learning process.

In [None]:

DATA_DIR='../data'
TRAIN_BATCH=256
TEST_BATCH=1000
EPOCHS=30
LEARNING_RATE=0.001 
MOMENTUM=0.5
RANDOM_SEED=42
CUDA=False
IMAGE_NORM=(0.1307,), (0.3081,)


In [None]:
#load the data
def get_data_loader(is_train, batch_size, download=False):
    loader = torch.utils.data.DataLoader(
        datasets.MNIST(DATA_DIR, train=is_train, download=download,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize(*IMAGE_NORM)
                       ])),
        batch_size=batch_size, shuffle=True)
    return loader
    

train_loader = get_data_loader(True, TRAIN_BATCH, download=True)
test_loader = get_data_loader(False, TEST_BATCH)


## 2. Modifications to the Model Architecture
- In the original code, `NUM_CLASSES` was incorrectly set to 0, which would cause issues with classification. This was fixed by setting it to 10, matching the number of classes in the MNIST dataset.


In [None]:


class Net(nn.Module):
    #This defines the structure of the NN.
    def __init__(self):
        NUM_CLASSES = 10
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()  #Dropout
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, NUM_CLASSES)

    def forward(self, x):
        #Convolutional Layer/Pooling Layer/Activation
        x = F.relu(F.max_pool2d(self.conv1(x), 2)) 
        #Convolutional Layer/Dropout/Pooling Layer/Activation
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        #Fully Connected Layer/Activation
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        #Fully Connected Layer/Activation
        x = self.fc2(x)
        #Softmax gets probabilities. 
        return F.log_softmax(x, dim=1)


## 3. Early Stopping Mechanism
- An **EarlyStopping** class has been introduced to monitor validation loss and halt the training process if no improvement is observed for a specified number of epochs (patience=5). This mechanism enhances training efficiency by preventing overfitting and unnecessary computation beyond the point of diminishing returns.

In [None]:
# Define Early Stopping
class EarlyStopping:
    def __init__(self, patience=5, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.best_loss = float('inf')
        self.epochs_no_improve = 0
        self.should_stop = False

    def step(self, val_loss):
        if val_loss < self.best_loss:
            self.best_loss = val_loss
            self.epochs_no_improve = 0
        else:
            self.epochs_no_improve += 1
            if self.epochs_no_improve >= self.patience:
                self.should_stop = True
                if self.verbose:
                    print(f'Early stopping triggered after {self.patience} epochs.')


## 4. Implementation of Enhanced Error Metrics
- A custom **ErrorMetrics** function has been implemented to compute a range of evaluation metrics, including **accuracy**, **F1 score**, **precision**, and **recall**, using `sklearn`. These metrics provide a more comprehensive assessment of model performance, moving beyond the simple evaluation of loss and accuracy.

In [None]:
import numpy as np
def ErrorMetrics(realVec, estiVec):
    realVec = realVec.cpu().detach().numpy().astype(float)
    estiVec = estiVec.cpu().detach().numpy().astype(float)
    from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
    # Calculate accuracy,F1 score, precision and recall
    Acc = accuracy_score(realVec, estiVec)
    F1 = f1_score(realVec, estiVec, average='macro', zero_division=0)
    P = precision_score(realVec, estiVec, average='macro', zero_division=0)
    Recall = recall_score(realVec, estiVec, average='macro', zero_division=0)
    return {
        'Acc': Acc,
        'F1': F1,
        'P': P,
        'Recall': Recall
    }

## 5. Loss Function and Routine Updates
- **Loss Function**: The loss function has been updated to use `reduction='sum'`, which sums the loss over each batch. This gives better control over optimization, particularly for larger batches.

- **Training Routine**: The training loop output has been made more readable with separator lines, making it easier to monitor progress during training.

- **Testing Routine**: The testing loop now calculates not only the loss but also additional metrics like **accuracy**, **F1 score**, **precision**, and **recall**. This gives a more detailed view of the modelâ€™s performance, especially when dealing with imbalanced data.
This version

In [None]:

def train(epoch):
    model.train()
    print('-' * 100)
    for batch_idx, (data, target) in enumerate(train_loader):
        if CUDA:
            data, target = data.cuda(), target.cuda()
        #This will zero out the gradients for this batch. 
        optimizer.zero_grad()
        output = model(data)
        # Calculate the loss The negative log likelihood loss. It is useful to train a classification problem with C classes.
        loss = F.nll_loss(output, target, reduction='sum')
        #dloss/dx for every Variable 
        loss.backward()
        #to do a one-step update on our parameter.
        optimizer.step()
        #Print out the loss periodically. 
        if batch_idx % 50 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test():
    model.eval()
    test_loss = 0
    correct = 0

    for data, target in test_loader:
        if CUDA:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        # Sum up batch loss
        test_loss += F.nll_loss(output, target, reduction='sum').item()
        # Get the index of the max log_probability
        pred = output.data.max(1, keepdim=True)[1]
        # Calculates and stores accuracy, F1 score, precision, and recall in 'results'
        results = ErrorMetrics(pred, target)

    test_loss /= len(test_loader.dataset)
    

    print(f'\nTest set: Average loss: {test_loss:.4f}\n')
    print(f'Epoch:{epoch} Accuracy={results["Acc"] * 100:.2f}% F1={results["F1"] * 100:.2f}% Precision={results["P"] * 100:.2f}% Recall={results["Recall"] * 100:.2f}% \n')




## 6.Model Initialization and Early Stopping

In [None]:
model = Net()
if CUDA:
    model.cuda()

optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)

# Initialize early stopping
early_stopping = EarlyStopping(patience=5, verbose=True)

for epoch in range(1, EPOCHS + 1):
    train(epoch)
    test()
