# **Lenet5 for Fashion-Mnist**

## **Initialization**

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

PATH = "/content/gdrive/MyDrive/ex1_313581803_314882861"
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Mounted at /content/gdrive


### **Load Data**
Get mnist-fashion data from https://github.com/zalandoresearch/fashion-mnist.

In [None]:
torch.manual_seed(42)

total_train_set = datasets.FashionMNIST(
    root = f'{PATH}/Data/FashionMNIST',
    train = True,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)
test_set = datasets.FashionMNIST(
    root = f'{PATH}/Data/FashionMNIST',
    train = False,
    download = True,
    transform = transforms.Compose([
        transforms.ToTensor()                                 
    ])
)

train_set , val_set = random_split(total_train_set, [50000, 10000])

### **Network Architecture**

Implement Lenet5 for Fashion-Mnist


In [None]:
class FashionLeNet(nn.Module):
  def __init__(self, drop_p=0, bn_flag=False):
        super().__init__()
        self.batch_norm_layer = nn.BatchNorm2d(1) if bn_flag else None
        self.layer_1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer_2 = nn.Sequential(
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer_3 = nn.Sequential(
            nn.Linear(in_features=16*5*5, out_features=120),
            nn.ReLU(),
            nn.Dropout(drop_p)
        )
        self.layer_4 = nn.Sequential(
            nn.Linear(in_features=120, out_features=84),
            nn.ReLU(),
            nn.Dropout(drop_p)
        )
        self.layer_5 = nn.Sequential(
            nn.Linear(in_features=84, out_features=10)
        )
        self.criterion = nn.CrossEntropyLoss(reduction='sum')

  def forward(self, X):
        out = X if self.batch_norm_layer is None else self.batch_norm_layer(X)
        out = self.layer_1(out)
        out = self.layer_2(out)
        out = out.view(X.shape[0], -1)
        out = self.layer_3(out)
        out = self.layer_4(out)
        return self.layer_5(out)


In [None]:
def fit(model, train_loader, val_loader, file_name, max_epochs=50, save_model=False):
    train_results = []
    val_results = []
    best_acc = 0
    model.train()
    for epoch in range(max_epochs):
        for _, (samples, labels) in enumerate(train_loader):
            # Forward pass
            pred = model(samples.to(DEVICE))
            batch_loss = model.criterion(pred, labels.to(DEVICE))

            # Propagating the loss backward and optimizing the parameters
            optimizer.zero_grad()
            batch_loss.backward()
            optimizer.step()

        train_results.append(evaluation(model, train_loader))
        val_results.append(evaluation(model, val_loader))
        if save_model and val_results[-1][1] > best_acc :
          torch.save(model.state_dict(),f"{PATH}/Models/{file_name}.pt")
          best_acc = val_results[-1][1]
        print(f"epoch: {epoch}, train loss: {train_results[-1][0]}, validation loss: {val_results[-1][0]} \
        train accuracy: {train_results[-1][1]}, validation accuracy: {val_results[-1][1]}")
    generate_plots(train_results, val_results, file_name)
    return train_results[-1], val_results[-1]


def evaluation(model, data_loader):
    model.eval()
    corr = 0
    total_loss = 0

    with torch.no_grad():
      size = 0
      for _, (samples, labels) in enumerate(data_loader):
          size += len(labels)
          samples, labels = samples.to(DEVICE), labels.to(DEVICE)
          outputs = model(samples)  
          pred = torch.max(outputs.data, 1)[1]
          corr += (pred == labels).sum()
          total_loss += model.criterion(outputs, labels)
    return (float(total_loss) / size, float(corr) / size)


def generate_plots(train_results, val_results, file_name):
    if not os.path.exists(f"{PATH}/Results"):
      os.mkdir(f"{PATH}/Results")
    fig, axs = plt.subplots(1, 2, figsize=(50, 15))
    axs[0].set_xlabel("epoch")
    axs[0].set_ylabel("loss")
    axs[0].plot(np.array(train_results)[:, 0], label='training loss', linewidth=10.0)
    axs[0].plot(np.array(val_results)[:, 0], label='validation loss', linewidth=10.0)
    axs[0].set_title('Loss per epoch')
    axs[0].legend()
    axs[0].grid()

    axs[1].set_xlabel("epoch")
    axs[1].set_ylabel("accuracy")
    axs[1].plot(np.array(train_results)[:, 1], label='training accuracy', linewidth=10.0)
    axs[1].plot(np.array(val_results)[:, 1], label='validation accuracy', linewidth=10.0)
    axs[1].set_title('Accuracy per epoch')
    axs[1].legend()
    axs[1].grid()

    plt.savefig(f'{PATH}/Results/{file_name}.png', bbox_inches='tight')
    plt.close()



## **Find Hyperparameters**

In [None]:
lr_list = np.arange(0.0005, 0.001, 0.00005)
batch_size = [128,256, 64] 
results = {}

MODE = "Original"

if not os.path.exists(f"{PATH}/Results/{MODE}"):
    os.makedirs(f"{PATH}/Results/{MODE}")
if not os.path.exists(f"{PATH}/Models/{MODE}"):
    os.makedirs(f"{PATH}/Models/{MODE}")

for bs in batch_size:
  train_loader = DataLoader(train_set, batch_size=bs, shuffle=True)
  val_loader = DataLoader(val_set, batch_size=bs, shuffle=False)
  for lr in lr_list:
    model = FashionLeNet().to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    print(f"Start fit model with Batch Size: {bs}, learning_rate: {lr}")
    results[(bs, lr)] = fit(model, train_loader, val_loader, f"{MODE}/{bs}_{lr}", save_model=False)

np.save(f"{PATH}\res_dic", results, allow_pickle=True)
   

## **Train Models**

In [None]:
LEARNING_RATE = 0.0007
BATCH_SIZE = 256
MODES = ["Original", "Dropout", "L2", "BatchNormalization"]


train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False)

def fit_model_for_mode(mode, model, optimizer):
  if not os.path.exists(f"{PATH}/Results/{mode}"):
    os.makedirs(f"{PATH}/Results/{mode}")
  if not os.path.exists(f"{PATH}/Models/{mode}"):
      os.makedirs(f"{PATH}/Models/{mode}")
  return fit(model, train_loader, val_loader, f"{mode}/{BATCH_SIZE}_{LEARNING_RATE}")

def evaluate_trained_model(mode):
  if mode == 'BatchNormalization':
      model = FashionLeNet(bn_flag=True).to(DEVICE)
  else:
      model = FashionLeNet().to(DEVICE)
  model.load_state_dict(torch.load(f"{PATH}/Models/{mode}/{BATCH_SIZE}_{LEARNING_RATE}.pt"))
  _, train_accuracy = evaluation(model, train_loader)
  _, test_accuracy = evaluation(model, test_loader)
  print(f"Model: {mode}, Train Accuracy: {train_accuracy}, Test Accuracy: {test_accuracy}")


### **Original**

In [None]:
mode = MODES[0]

model = FashionLeNet().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
results = fit_model_for_mode(mode, model, optimizer)
   

### **Dropout**

In [None]:
mode = MODES[1]

model = FashionLeNet(drop_p=0.5).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
results = fit_model_for_mode(mode, model, optimizer)   

### **Weight Decay**

In [None]:
mode = MODES[2]

model = FashionLeNet().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=0.01)
results = fit_model_for_mode(mode, model, optimizer)

### **Batch Normalization**

In [None]:
mode = MODES[3]

model = FashionLeNet(bn_flag=True).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
results = fit_model_for_mode(mode, model, optimizer)


###**Evaluations**
Get train and test evaluation for existing models


In [None]:
train_loader = DataLoader(total_train_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)

for mode in MODES:
  evaluate_trained_model(mode)

Model: Original, Train Accuracy: 0.9385166666666667, Test Accuracy: 0.9071
Model: Dropout, Train Accuracy: 0.9362666666666667, Test Accuracy: 0.9029
Model: L2, Train Accuracy: 0.9298833333333333, Test Accuracy: 0.9012
Model: BatchNormalization, Train Accuracy: 0.9318166666666666, Test Accuracy: 0.9013
