# Introduction

*Deep Learning workflows*

1. Data
2. Create a Model
3. Optimize Model paramter (finding the best weights)
4. Save the trained model

# Importing the libraries

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

# Download the data

In [None]:
training_data = datasets.FashionMNIST(root = 'data',train = True,download = True,transform = ToTensor())
test_data = datasets.FashionMNIST(root = 'data',train = False,download = True,transform = ToTensor())

In [None]:
training_data

In [None]:
test_data

# Batching of this data

In [None]:
batch_size = 64
train_dataloader = DataLoader(training_data,batch_size = batch_size)
test_dataloader = DataLoader(test_data,batch_size = batch_size)

In [None]:
for x,y in test_dataloader:
  print("Input Image Shape : ",x.shape)
  print("Label Shape : ",y.shape)
  break

Input Image Shape :  torch.Size([64, 1, 28, 28])
Batch Size (64):This is the number of samples in each batch. In the context of neural networks, training is often done in batches to improve efficiency.

Number of Channels (1): For grayscale or black-and-white images, the number of channels is typically 1. For color images, you would typically have 3 channels (red, green, and blue).

Height (28): This is the height of the image in pixels.

Width (28): This is the width of the image in pixels.

Label Shape :  torch.Size([64]) -- Label


## Creating the Model

In [None]:
#torch.cuda.is_available() checks for your system has gpu or cpu
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
class NeuralNetwork(nn.Module): #nn.Module is a parent class
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten() #28*28 image into a 764*1 vector
    self.linear1 = nn.Linear(28*28,512)
    self.linear2 = nn.Linear(512,512)
    self.linear3 = nn.Linear(512,10)
    self.relu = nn.ReLU()
  def forward(self,x): #is always used to pass the inputs to the neural network
    x = self.flatten(x)
    x = self.linear1(x)
    x = self.relu(x)
    x = self.linear2(x)
    x = self.relu(x)
    x = self.linear3(x)
    return x

In [None]:
model = NeuralNetwork()
model = model.to(device) #copies your entire architecture to the GPU

# Optimization - Gradient Descent + Backpropagation

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr = 1e-3)

In [None]:
#steps in the GD : Batch of the input / Pass it to the model / Compute loss function / Update the weights
def train(dataloader,model,loss_fn,optimizer):
  model.train()  #putting the model in the training mode
  for batch,(x,y) in enumerate(dataloader):
    #sending data to GPU
    x = x.to(device)
    y = y.to(device)

    #compute Predictions
    pred = model(x)

    #compute Loss
    loss = loss_fn(pred,y)

    #Backpropagation
    loss.backward()
    optimizer.step() #Wnew = Wold - lr * dl/dw
    optimizer.zero_grad() # If you don't zero the gradients, the new gradients will be accumulated with the existing ones, leading to incorrect updates.

    if batch % 100 == 0:
      print(f'Loss of the Model{loss.item()}')

In [None]:
def test(dataloader,model,loss_fn):
  model.eval() #putting the model in the evaluation mode
  num_batched = len(dataloader)
  test_loss, correct = 0,0
  with torch.no_grad(): #We will not compute gradients for the test data
    for X,y in dataloader:
      X = X.to(device)
      y = y.to(device)

      #Compute predictions
      pred = model(X)

      #Compute loss
      test_loss += loss_fn(pred,y).item()

      #Find how many correct predictions
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()
  test_loss = test_loss/num_batched
  correct = correct/(len(dataloader.dataset))

  print(f'Test Accuracy {100*correct}, Avg_loss : {test_loss}')

# model.train() --
 This method is used to set the model in training mode. When the model is in training mode, certain layers (e.g., dropout layers) may behave differently compared to when the model is in evaluation mode. During training, dropout layers are active, introducing randomness to the network by randomly dropping units.
# Dropout Layers
Dropout is a regularization technique used in neural networks during training to prevent overfitting. It involves randomly setting a fraction of input units to zero at each update during training time, which helps prevent the network from relying too much on specific units. Dropout is typically applied to the input or hidden layers of a neural network.
# During Training
For each training batch, dropout randomly sets a fraction (e.g., 20%) of the input units to zero.
This means that the contribution of those units is temporarily removed, and the network must learn to rely on the remaining units to make predictions.
# model.eval()
This method is used to set the model in evaluation mode. In evaluation mode, layers like dropout layers are typically deactivated.
# During Testing or Inference:
During testing or inference, dropout is turned off, and all units are used.
The idea is that during testing, you want the model to use all the learned information for making predictions without introducing randomness.


# Training Phase

In [None]:
epochs = 5
for t in range(epochs):
  print(f'Epoch {t+1}')
  train(train_dataloader,model,loss_fn,optimizer)
  test(test_dataloader,model,loss_fn)

# Save the Model

In [None]:
torch.save(model.state_dict(),"/content/data.pth")

# Loading the saved weights

In [None]:
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("/content/data.pth"))

# Preidctions

In [None]:
classes = ["T-shirt/top","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle Boot"]

model.eval()
X,y = test_data[0][0], test_data[0][1]

with torch.no_grad():
  X = X.to(device)
  pred = model(X)
  predicted,actual = classes[pred[0].argmax(0)],classes[y]
  print(f'Predicted {predicted}')
  print(f'Actual {actual}')

In [None]:
y