In [5]:
#loading database
from torchvision import datasets
import os
import numpy as np

In [6]:
root_directory = os.path.join(os.getcwd(),'Dataset')

In [7]:
train_data = datasets.MNIST(root_directory, train=False,download=True)

In [8]:
x_train = np.array(list(map(lambda x : np.array(x[0]) , train_data)))
y_train = np.array(list(map(lambda x : x[1] , train_data)))

In [9]:
import gc
del train_data
gc.collect()

290

In [10]:
import matplotlib.pyplot as plt

In [11]:
from torch.utils.data import DataLoader,TensorDataset
from torchvision import transforms
from torch import Tensor
import torch

In [12]:
x_train = x_train/255 - 0.5
y_train = Tensor(y_train)
x_train = Tensor(x_train)

In [15]:
x_train = x_train.view(-1,1,28,28)

In [16]:
dataset = TensorDataset(x_train,y_train)
dataloader = DataLoader(dataset,batch_size=8)

In [17]:
del x_train,y_train
del dataset
gc.collect()

23187

### Model

In [18]:
from torch import nn

In [29]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net,self).__init__()
        self.seq_conv = nn.Sequential(
            nn.Conv2d(1,32,3),
            nn.ReLU(),
            nn.Conv2d(32,64,3),
            nn.ReLU()
        )
        self.seq_lin = nn.Sequential(
            nn.Linear(24*24*64,128),
            nn.ReLU(),
            nn.Linear(128,32),
            nn.ReLU(),
            nn.Linear(32,10),
            nn.Softmax(dim=1)
        )
    
    def forward(self,x):
        
        x = self.seq_conv(x)
        x_shape = x.shape
        x = x.view(-1,x_shape[1]*x_shape[2]*x_shape[3])
        x = self.seq_lin(x)
        return x
        

In [30]:
sample_input = Tensor(np.ndarray(2*28*28).reshape(2,1,28,28))


In [31]:
net = Net()

In [32]:
from torchsummary import summary
summary(net,input_size=(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 26, 26]             320
              ReLU-2           [-1, 32, 26, 26]               0
            Conv2d-3           [-1, 64, 24, 24]          18,496
              ReLU-4           [-1, 64, 24, 24]               0
            Linear-5                  [-1, 128]       4,718,720
              ReLU-6                  [-1, 128]               0
            Linear-7                   [-1, 32]           4,128
              ReLU-8                   [-1, 32]               0
            Linear-9                   [-1, 10]             330
          Softmax-10                   [-1, 10]               0
Total params: 4,741,994
Trainable params: 4,741,994
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.90
Params size (MB): 18.09
Estima

### loss function and optimizer

In [78]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters())

In [79]:
for x,y in dataloader:
    out = net(x)
    loss = loss_fn(out,y.long())
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    

tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4627, grad_fn=<NllLossBackward>)
tensor(1.4616, grad_fn=<NllLossBackward>)
tensor(1.4773, grad_fn=<NllLossBackward>)
tensor(1.5382, grad_fn=<NllLossBackward>)
tensor(1.4618, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.7105, grad_fn=<NllLossBackward>)
tensor(1.4620, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.5858, grad_fn=<NllLossBackward>)
tensor(1.5487, grad_fn=<NllLossBackward>)
tensor(1.5826, grad_fn=<NllLossBackward>)
tensor(1.7097, grad_fn=<NllLossBackward>)
tensor(1.6088, grad_fn=<NllLossBackward>)
tensor(1.5921, grad_fn=<NllLossBackward>)
tensor(1.8315, grad_fn=<NllLossBackward>)
tensor(1.5076, grad_fn=<NllLossBackward>)
tensor(1.9570, grad_fn=<NllLossBackward>)
tensor(1.4619, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4991, grad_fn=<NllLossBackward>)
tensor(1.8299, grad_fn=<NllLossBackward>)
tensor(1.5864, grad_fn=<NllLossBac

tensor(1.9568, grad_fn=<NllLossBackward>)
tensor(1.7080, grad_fn=<NllLossBackward>)
tensor(1.5905, grad_fn=<NllLossBackward>)
tensor(1.4984, grad_fn=<NllLossBackward>)
tensor(1.4814, grad_fn=<NllLossBackward>)
tensor(1.7143, grad_fn=<NllLossBackward>)
tensor(1.5862, grad_fn=<NllLossBackward>)
tensor(1.4625, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.5109, grad_fn=<NllLossBackward>)
tensor(1.5913, grad_fn=<NllLossBackward>)
tensor(1.5861, grad_fn=<NllLossBackward>)
tensor(1.7101, grad_fn=<NllLossBackward>)
tensor(1.8347, grad_fn=<NllLossBackward>)
tensor(1.8276, grad_fn=<NllLossBackward>)
tensor(1.5887, grad_fn=<NllLossBackward>)
tensor(1.5857, grad_fn=<NllLossBackward>)
tensor(1.5857, grad_fn=<NllLossBackward>)
tensor(1.7095, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4624, grad_fn=<NllLossBackward>)
tensor(1.7103, grad_fn=<NllLossBackward>)
tensor(1.4620, grad_fn=<NllLossBackward>)
tensor(1.9597, grad_fn=<NllLossBac

tensor(1.8337, grad_fn=<NllLossBackward>)
tensor(1.6426, grad_fn=<NllLossBackward>)
tensor(1.5863, grad_fn=<NllLossBackward>)
tensor(1.7034, grad_fn=<NllLossBackward>)
tensor(1.7123, grad_fn=<NllLossBackward>)
tensor(1.8332, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.7145, grad_fn=<NllLossBackward>)
tensor(1.7159, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.8356, grad_fn=<NllLossBackward>)
tensor(1.5934, grad_fn=<NllLossBackward>)
tensor(1.5816, grad_fn=<NllLossBackward>)
tensor(1.6934, grad_fn=<NllLossBackward>)
tensor(1.6138, grad_fn=<NllLossBackward>)
tensor(1.5861, grad_fn=<NllLossBackward>)
tensor(1.5861, grad_fn=<NllLossBackward>)
tensor(1.4614, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.8312, grad_fn=<NllLossBackward>)
tensor(1.7077, grad_fn=<NllLossBackward>)
tensor(1.7277, grad_fn=<NllLossBackward>)
tensor(1.7053, grad_fn=<NllLossBackward>)
tensor(1.6957, grad_fn=<NllLossBac

tensor(2.0828, grad_fn=<NllLossBackward>)
tensor(1.9586, grad_fn=<NllLossBackward>)
tensor(1.7040, grad_fn=<NllLossBackward>)
tensor(1.5887, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.5823, grad_fn=<NllLossBackward>)
tensor(1.7010, grad_fn=<NllLossBackward>)
tensor(1.7082, grad_fn=<NllLossBackward>)
tensor(1.5862, grad_fn=<NllLossBackward>)
tensor(1.4636, grad_fn=<NllLossBackward>)
tensor(1.9570, grad_fn=<NllLossBackward>)
tensor(1.8355, grad_fn=<NllLossBackward>)
tensor(1.8510, grad_fn=<NllLossBackward>)
tensor(1.5838, grad_fn=<NllLossBackward>)
tensor(1.8300, grad_fn=<NllLossBackward>)
tensor(1.7112, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.8325, grad_fn=<NllLossBackward>)
tensor(1.5374, grad_fn=<NllLossBackward>)
tensor(1.7064, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4617, grad_fn=<NllLossBackward>)
tensor(1.5460, grad_fn=<NllLossBackward>)
tensor(1.5860, grad_fn=<NllLossBac

tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.6864, grad_fn=<NllLossBackward>)
tensor(1.7093, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4617, grad_fn=<NllLossBackward>)
tensor(1.4614, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4872, grad_fn=<NllLossBackward>)
tensor(1.4651, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.5437, grad_fn=<NllLossBackward>)
tensor(1.4870, grad_fn=<NllLossBackward>)
tensor(1.4881, grad_fn=<NllLossBackward>)
tensor(1.4656, grad_fn=<NllLossBackward>)
tensor(1.4617, grad_fn=<NllLossBackward>)
tensor(1.4654, grad_fn=<NllLossBackward>)
tensor(1.4621, grad_fn=<NllLossBackward>)
tensor(1.6048, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.5872, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBac

tensor(1.4701, grad_fn=<NllLossBackward>)
tensor(1.5280, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.5750, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.5862, grad_fn=<NllLossBackward>)
tensor(1.5829, grad_fn=<NllLossBackward>)
tensor(1.4721, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4629, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.6056, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4643, grad_fn=<NllLossBackward>)
tensor(1.5861, grad_fn=<NllLossBackward>)
tensor(1.8347, grad_fn=<NllLossBackward>)
tensor(1.5198, grad_fn=<NllLossBackward>)
tensor(1.4620, grad_fn=<NllLossBac

tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.5825, grad_fn=<NllLossBackward>)
tensor(1.4628, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.5862, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.5862, grad_fn=<NllLossBackward>)
tensor(1.5225, grad_fn=<NllLossBackward>)
tensor(1.4709, grad_fn=<NllLossBackward>)
tensor(1.8235, grad_fn=<NllLossBackward>)
tensor(1.5862, grad_fn=<NllLossBackward>)
tensor(1.5394, grad_fn=<NllLossBackward>)
tensor(1.5861, grad_fn=<NllLossBackward>)
tensor(1.5845, grad_fn=<NllLossBackward>)
tensor(1.5490, grad_fn=<NllLossBackward>)
tensor(1.4612, grad_fn=<NllLossBackward>)
tensor(1.4631, grad_fn=<NllLossBac

In [110]:
from IPython.display import clear_output

In [120]:
epochs = 2
for epoch in range(epochs):
    loss_t = 0
    accuracy = 0
    for inx,(x,y) in enumerate(dataloader):
        
        if(inx%100==0):
            clear_output()
            print('epoch {} : {} / {}'.format(epoch+1,inx,len(dataloader)))
        out = net(x)
        if(not all(np.argmax(out.detach().numpy(),axis=1)==y.numpy())):
            accuracy+=sum((np.argmax(out.detach().numpy(),axis=1) == y.numpy()))
        loss = loss_fn(out,y.long())
        loss_t+=loss.item()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    loss_t/=len(dataloader)
    print(loss_t)
    input()

epoch 2 : 1200 / 1250
1.5009912710189819

