<a href="https://colab.research.google.com/github/saimathura/deeplearning-MDSC302/blob/main/22233_assignment_6_optimization_MDSC_302(P).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Prerequisite COde



In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

training_data = datasets.FashionMNIST(
    root = "data",
    train = True,
    download = True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size = 64)
test_dataloader = DataLoader(test_data, batch_size = 64)

class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28,512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10),
        )
    def forward(self, x):
        x= self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


In [None]:
model = NeuralNetwork()

#Hyperparameters
Hyperparameters are adjustable parameters that let you control the model optimization process. Different hyperparameter values can impact model training and convergence rates (read more about hyperparameter tuning)

We define the following hyperparameters for training:
Number of Epochs - the number times to iterate over the dataset

Batch Size - the number of data samples propagated through the network before the parameters are updated

Learning Rate - how much to update models parameters at each batch/epoch. Smaller values yield slow learning speed, while large values may result in unpredictable behavior during training.

In [None]:
learning_rate = 1e-3
batch_size = 64
epochs = 5

#Optimization Loop
Once we set our hyperparameters, we can then train and optimize our model with an optimization loop. Each iteration of the optimization loop is called an epoch.

Each epoch consists of two main parts:
The Train Loop - iterate over the training dataset and try to converge to optimal parameters.

The Validation/Test Loop - iterate over the test dataset to check if model performance is improving.

#Loss Function
When presented with some training data, our untrained network is likely not to give the correct answer. Loss function measures the degree of dissimilarity of obtained result to the target value, and it is the loss function that we want to minimize during training. To calculate the loss we make a prediction using the inputs of our given data sample and compare it against the true data label value.

Common loss functions include nn.MSELoss (Mean Square Error) for regression tasks, and nn.NLLLoss (Negative Log Likelihood) for classification. nn.CrossEntropyLoss combines nn.LogSoftmax and nn.NLLLoss.

In [None]:
#initializing the loss function
loss_fn = nn.CrossEntropyLoss()

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

# we are using stochasstic Gradient Descent

In [None]:
def train_loop(dataloader, mdoel, loss_fn, optimizer):
    size = len(dataloader.dataset)

    model.train()
    for batch, (X,y) in enumerate(dataloader):
        pred = model(X)
        loss = loss_fn(pred, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss , current = loss.item(), (batch+1)*len(X)
            print(f"loss: {loss:>7f} [{current:>5d}{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X,y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

        test_loss /= num_batches
        correct /= size
        print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.294838 [   6460000]
loss: 2.279994 [ 646460000]
loss: 2.260392 [1286460000]
loss: 2.263224 [1926460000]
loss: 2.248012 [2566460000]
loss: 2.213525 [3206460000]
loss: 2.225666 [3846460000]
loss: 2.186835 [4486460000]
loss: 2.190080 [5126460000]
loss: 2.156047 [5766460000]
Test Error: 
 Accuracy: 49.3%, Avg loss: 2.149728 

Epoch 2
-------------------------------
loss: 2.163769 [   6460000]
loss: 2.146467 [ 646460000]
loss: 2.090441 [1286460000]
loss: 2.112086 [1926460000]
loss: 2.057679 [2566460000]
loss: 1.992190 [3206460000]
loss: 2.026538 [3846460000]
loss: 1.943049 [4486460000]
loss: 1.957940 [5126460000]
loss: 1.875282 [5766460000]
Test Error: 
 Accuracy: 58.4%, Avg loss: 1.878058 

Epoch 3
-------------------------------
loss: 1.914753 [   6460000]
loss: 1.877289 [ 646460000]
loss: 1.765360 [1286460000]
loss: 1.809445 [1926460000]
loss: 1.694783 [2566460000]
loss: 1.642719 [3206460000]
loss: 1.664721 [3846460000]
loss: 1.567552 [4486

# Saving the Models in Different ways


In [None]:
import torchvision.models as models

#Saving the weights


In [None]:
torch.save(model.state_dict(), 'model_weights.pth')

# loading model with weights

In [None]:
model.load_state_dict(torch.load('model_weights.pth'))
model.eval()

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

# Model State Saving


In [None]:
torch.save(model, 'model.pth')

In [None]:
model = torch.load('model.pth')