In [1]:
import torch
from torch import nn
from torchvision import datasets
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor

In [2]:
# get data
train=datasets.MNIST(train=True,download=True,transform=ToTensor(),root="data")
test=datasets.MNIST(train=False,download=True,transform=ToTensor(),root="data")

In [71]:
#Create Data Loader
train_set=DataLoader(dataset=train,batch_size=64)
test_set=DataLoader(dataset=test,batch_size=64)

In [70]:
for x,y in train_set:
    print(x.shape)
    print(y.shape)
    break

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [5]:
# import matplotlib.pyplot as plt
# plt.imshow(train_set.dataset.train_data[0])


In [72]:
train_set.dataset.classes

['0 - zero',
 '1 - one',
 '2 - two',
 '3 - three',
 '4 - four',
 '5 - five',
 '6 - six',
 '7 - seven',
 '8 - eight',
 '9 - nine']

In [7]:
# create architechture

In [42]:
class network(nn.Module):
    def __init__(self):
        super(network,self).__init__()
        self.flatten=nn.Flatten()
        self.linear_stack=nn.Sequential(
            nn.Linear(28*28,512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10)
            
        )
    def forward(self,x):
        x=self.flatten(x)
        logits=self.linear_stack(x)
        return logits

In [45]:
model=network().to("cpu")
model

network(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [35]:
model.parameters()

<generator object Module.parameters at 0x0000028B604FA340>

In [54]:
optimizer=torch.optim.SGD(model.parameters())
loss_fn=nn.CrossEntropyLoss()


In [55]:
def train_process(model,dataloader,optimizer,loss):
    size=len(dataloader.dataset)
    model.train()
    for batch,(x,y) in enumerate(dataloader):
        x,y=x.to("cpu"),y.to("cpu")
        pred=model(x)
        loss=loss_fn(pred,y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch%100==0:
            l=loss.item()
            curr=batch*len(x)
            print(f"loss={round(l,4)},batch={curr}/{size}")
 

In [68]:
def test_process(model,dataloader,loss_fn):
    size=len(dataloader.dataset)
    batches=len(dataloader)
    test_loss=0
    correct=0
    model.eval()
    with torch.no_grad():
        for x,y in dataloader:
            pred=model(x)
            loss=loss_fn(pred,y)
            test_loss+=loss.item()

    test_loss = test_loss/batches
    print(test_loss,"avg test loss")


In [73]:
for i in range(10):
    print(f"Epoch={i+1}")
    train_process(model,train_set,optimizer,loss_fn)
    test_process(model,train_set,loss_fn)
    


Epoch=1
loss=1.6719,batch=0/60000
loss=1.5874,batch=6400/60000
loss=1.6636,batch=12800/60000
loss=1.4764,batch=19200/60000
loss=1.5116,batch=25600/60000
loss=1.5086,batch=32000/60000
loss=1.3886,batch=38400/60000
loss=1.5734,batch=44800/60000
loss=1.4134,batch=51200/60000
loss=1.3293,batch=57600/60000
1.3544045718494 avg test loss
Epoch=2
loss=1.375,batch=0/60000
loss=1.2656,batch=6400/60000
loss=1.3547,batch=12800/60000
loss=1.1781,batch=19200/60000
loss=1.1971,batch=25600/60000
loss=1.2039,batch=32000/60000
loss=1.0848,batch=38400/60000
loss=1.2991,batch=44800/60000
loss=1.1455,batch=51200/60000
loss=1.0685,batch=57600/60000
1.090835335539348 avg test loss
Epoch=3
loss=1.1303,batch=0/60000
loss=1.0092,batch=6400/60000
loss=1.0937,batch=12800/60000
loss=0.9548,batch=19200/60000
loss=0.963,batch=25600/60000
loss=0.9729,batch=32000/60000
loss=0.8638,batch=38400/60000
loss=1.0799,batch=44800/60000
loss=0.9564,batch=51200/60000
loss=0.8891,batch=57600/60000


KeyboardInterrupt: 

In [80]:
torch.save(model.state_dict(),"mode.pt")

In [81]:
model = network()
model.load_state_dict(torch.load("mode.pt"))

<All keys matched successfully>

In [85]:
## Prediction

classes = [ 'zero',
 'one',
 'two',
 'three',
 'four',
 'five',
 'six',
 'seven',
 'eight',
 'nine'
]

model.eval()
x, y = test[1][0], test[1][1]
with torch.no_grad():
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')

Predicted: "two", Actual: "two"
