In [None]:
'''
Deep Learning workflows
1. Data
2. Create a model
3. Optimize model parameter (finding the beast weights)
4. Save the trained model
'''

In [24]:
#importing libraries
import torch  #pytorch library that helps in building the deep learning algorithms
from torch import nn  #nn- neural networks
from torch.utils.data import DataLoader #performs the process of batching by loading the data
from torchvision import datasets #downloads data
from torchvision.transforms import ToTensor #data should be transformed to tensor
#In dataloader we pass dataset which is combination of (X,Y) datapoints 

In [25]:
#download data
#Inbuild dataset that python library has - FashionMNIST dataset
training_data=datasets.FashionMNIST(root='data',train=True,download=True,transform=ToTensor())
test_data=datasets.FashionMNIST(root='data',train=False,download=True,transform=ToTensor())

In [26]:
training_data

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [27]:
test_data

Dataset FashionMNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [28]:
#batching of data
batch_size=64
train_dataloader= DataLoader(training_data,batch_size=batch_size)
test_dataloader= DataLoader(test_data,batch_size=batch_size)

In [29]:
for x,y in test_dataloader:  #Image - Colour image shape(batch_size, number of channel, length, width)
    print (x.shape)          #Image - Black and white image- number of channels is 1
    print (y.shape)
    break

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [30]:
#creating model
device= 'cuda' if torch.cuda.is_available() else 'cpu'#torch.cuda.is_available() checks for your system has gpu or cpu
device

'cuda'

In [31]:
class NeuralNetwork(nn.Module):  #child class and nn. module is a parent class --(defined in the pytorch library)
    def __init__(self): #declare the architecture
        super().__init__() #initializes all variables of parent class
        self.flatten=nn.Flatten() #converts 28x28 image into 764x1 vector
        self.linear1=nn.Linear(28*28,512) #input of size of 28*28 converted to 512 dimension
        #hidden layer 1
        self.linear2=nn.Linear(512,512)  #converts 512 to 512 
        #hidden layer 2
        self.linear3=nn.Linear(512,10)  #converts 512 to output size 10
        self.relu=nn.ReLU()  #activation fn
    
    #forward method does forward propogation
    def forward(self,x): #used to pass inputs to neural network
        #self has arrchitecture and x has data
        x=self.flatten(x)
        #x which contains data will be passed to flatten 
        x=self.linear1(x)
        x=self.relu(x)
        x=self.linear2(x)
        x=self.relu(x)
        x=self.linear3(x)
        #from flaatten it goes to linear1, then to relu then to linear2....
        #In between hidden layers, we can do any type of activation fn but atlast we need to be careful while performing activatn fn
        #here as this is a classification problem we need to softmax at last but we are not dng bcz this will be performed in CrossEntropyLoss()
        return x

In [32]:
model=NeuralNetwork()
model=model.to(device) #copies your entire architecture to gpu

In [33]:
#optimization-- gradient descent+backpropogation
loss_fn=nn.CrossEntropyLoss()  # cross entropy loss fn has the capability to do softmax activation fn and loss fn that is the reason y we haven't done activation fn previously
optimizer=torch.optim.SGD(model.parameters(),lr=1e-3)  #stochastic gradient desent

In [34]:
'''
Steps in gradient decent:
1. Batch of input
2. Pass it to model
3. Compute loss fn
4. Update weights
'''

'\nSteps in gradient decent:\n1. Batch of input\n2. Pass it to model\n3. Compute loss fn\n4. Update weights\n'

In [38]:
#training
def train(dataloader,model,loss_fn,optimizer):
    model.train() #putting model in training mode
    for batch, (x,y) in enumerate(dataloader): #loads the data where (x,y) have batches of data
        #sending data (x,y) to gpu
        x=x.to(device)
        y=y.to(device)
        #input x will be passed to model and the forward method in architecture runs and gives some predictions
        #compute predictions
        pred=model(x)
        #By predicted values compute loss
        loss=loss_fn(pred,y)
        
        #after that do back propogation
        loss.backward() #calculate gradients of the loss with respect to the model's parameters i.e dl/dw
        optimizer.step() #updates the model parameters based on those gradients. ie Wnew=Wold-lr*dl/dw
        optimizer.zero_grad() #clears the gradients for nxt time ie the new gradients are added to the existing gradients
        
        if batch %100==0:
            print(f'Loss of model {loss.item()}')
        
        #loss.backward() is the powerful step bcz it will remember the track of forward pass and automatically comes back in same path 

In [39]:
def test(dataloader,model,loss_fn):
  model.eval() #it is in test mode
  test_loss, correct = 0,0
  size = len(dataloader.dataset)
  with torch.no_grad(): #What ever is inside this steps no gradient should be applied
    for batch,(X,y) in enumerate(dataloader):
      X,y = X.to(device), y.to(device)
      pred = model(X) #10x1 vector
      # print (pred.shape)
      # print (pred.argmax(1))
      # print (y)
      # print ((pred.argmax(1) == y).type(torch.float).sum().item())
      # break
      test_loss += loss_fn(pred,y).item()
      '''
      pred.argmax(1) -- position of the maximum probability
      torch.float
      torch.int
      torch.float32
      '''
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()
  print (f'Total correct {correct} out of {size}')
  Accuracy = correct/size
  print (f'Accuracy : {Accuracy*100}')

In [40]:
epochs = 3

for t in range(epochs):
  print ('Epoch---------------------------')
  train(train_dataloader,model,loss_fn,optimizer)
  test(test_dataloader, model, loss_fn)

Epoch---------------------------
Loss of model 2.3020730018615723
Loss of model 2.2868471145629883
Loss of model 2.272218942642212
Loss of model 2.2659924030303955
Loss of model 2.245800495147705
Loss of model 2.2092368602752686
Loss of model 2.2268576622009277
Loss of model 2.1817915439605713
Loss of model 2.17868971824646
Loss of model 2.158397674560547
Total correct 4208.0 out of 10000
Accuracy : 42.08
Epoch---------------------------
Loss of model 2.149186849594116
Loss of model 2.1359856128692627
Loss of model 2.0842831134796143
Loss of model 2.10636568069458
Loss of model 2.0498125553131104
Loss of model 1.9909969568252563
Loss of model 2.0289971828460693
Loss of model 1.9410905838012695
Loss of model 1.946912407875061
Loss of model 1.8925796747207642
Total correct 5788.0 out of 10000
Accuracy : 57.879999999999995
Epoch---------------------------
Loss of model 1.9046450853347778
Loss of model 1.869931697845459
Loss of model 1.7598949670791626
Loss of model 1.8076472282409668
Loss

In [43]:
#Predictions
classes = ["T-shirt/top","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle Boot"]

model.eval()
X,y = test_data[0][0], test_data[0][1]

with torch.no_grad():
  X = X.to(device)
  pred = model(X)
  predicted,actual = classes[pred[0].argmax(0)],classes[y]
  print(f'Predicted {predicted}')
  print(f'Actual {actual}')

Predicted Ankle Boot
Actual Ankle Boot
