In [1]:
from fastai import *
from fastai.vision.all import *
import seaborn as sns
from fastbook import *

In [2]:
#downloading the MNIST dataset
path = untar_data(URLs.MNIST)

In [3]:
path, path.ls()

(Path('/storage/data/mnist_png'),
 (#2) [Path('/storage/data/mnist_png/training'),Path('/storage/data/mnist_png/testing')])

In [4]:
#Getting the training datasets ready
'''
Important info:
#training images = 60,000
Each of the categories having roughly equal distribution: train_y.unique(return_counts=True)
'''
train_images_list = get_image_files(path/'training')
train_x_list = [tensor(Image.open(img_path)) for img_path in train_images_list]
train_y_list = [int(img_path.parent.name) for img_path in train_images_list]
train_x = (torch.stack(train_x_list).float()/255).view(-1,28*28)
train_y = tensor(train_y_list).view(-1,1)

train_x.shape, train_y.shape

(torch.Size([60000, 784]), torch.Size([60000, 1]))

In [5]:
train_dset = list(zip(train_x, train_y))

In [7]:
#Getting the validation datasets ready
'''
Important info:
#validation images = 10,000
Each of the categories having roughly equal distribution: valid_y.unique(return_counts=True)
'''
valid_images_list = get_image_files(path/'testing')
valid_x_list = [tensor(Image.open(img_path)) for img_path in valid_images_list]
valid_y_list = [int(img_path.parent.name) for img_path in valid_images_list]
valid_x = (torch.stack(valid_x_list).float()/255).view(-1,28*28)
valid_y = tensor(valid_y_list).view(-1,1)

valid_x.shape, valid_y.shape

(torch.Size([10000, 784]), torch.Size([10000, 1]))

In [None]:
valid_dset = list(zip(valid_x, valid_y))

#### Using fastai packages

In [8]:
dls = ImageDataLoaders.from_folder(path, train='training',valid='testing')
learn = cnn_learner(dls, resnet18, pretrained=False,
                    loss_func=F.cross_entropy, metrics=accuracy, n_out=10)
learn.fit_one_cycle(1, 0.1)

epoch,train_loss,valid_loss,accuracy,time
0,0.09094,3.860469,0.9833,02:04


#### Manual SGD & Model training 

In [10]:
train_dl = DataLoader(train_dset, batch_size=64)
#valid_dl = DataLoader(valid_dset, batch_size=64)

In [11]:
# function to calculate loss
def mnist_loss(pred, actual):
    l = nn.CrossEntropyLoss()
    return l(pred, actual.squeeze())

# function to calculate gradient
def calc_grad(xb, yb, model):
    pred = model(xb)
    loss = mnist_loss(pred, yb)
    loss.backward()    
    return loss

# function to define accuracy
def batch_accuracy(pred, actual):
    digit_pred = pred.max(dim=1)[1]
    return (digit_pred==actual).float().mean()

#function to train 1 epoch and print average batch loss
def train_epoch(model):
    batch_loss = []
    for xb,yb in train_dl:
        batch_loss.append(calc_grad(xb, yb, model))
        opt.step()
        opt.zero_grad()
    print('Average batch loss: ', tensor(batch_loss).mean())

In [12]:
#Optimizer
class BasicOptim:
    def __init__(self,params,lr): self.params,self.lr = list(params),lr

    def step(self, *args, **kwargs):
        for p in self.params: p.data -= p.grad.data * self.lr

    def zero_grad(self, *args, **kwargs):
        for p in self.params: p.grad = None

In [13]:
#Simple 2 activations function NN
simple_net = nn.Sequential(
    nn.Linear(28*28,100),
    nn.ReLU(),
    nn.Linear(100,30),
    nn.ReLU(),
    nn.Linear(30,10)
)

In [14]:
#random accuracy
batch_accuracy(simple_net(valid_x),valid_y)

tensor(0.1007)

In [15]:
opt = BasicOptim(simple_net.parameters(), lr=0.04)

In [16]:
#function to train model for multiple epochs
def train_model(model,epochs):
    for i in range(epochs):
        train_epoch(model)
        print('epoch', i, ': ', batch_accuracy(model(valid_x),valid_y))

In [17]:
#model training call
train_model(simple_net,60)

Average batch loss:  tensor(0.2343)
epoch 0 :  tensor(0.0974)
Average batch loss:  tensor(0.2014)
epoch 1 :  tensor(0.0974)
Average batch loss:  tensor(0.1632)
epoch 2 :  tensor(0.0974)
Average batch loss:  tensor(0.1209)
epoch 3 :  tensor(0.0975)
Average batch loss:  tensor(0.1023)
epoch 4 :  tensor(0.0976)
Average batch loss:  tensor(0.0945)
epoch 5 :  tensor(0.0978)
Average batch loss:  tensor(0.0923)
epoch 6 :  tensor(0.0980)
Average batch loss:  tensor(0.0886)
epoch 7 :  tensor(0.0981)
Average batch loss:  tensor(0.0863)
epoch 8 :  tensor(0.0985)
Average batch loss:  tensor(0.0815)
epoch 9 :  tensor(0.0986)
Average batch loss:  tensor(0.0753)
epoch 10 :  tensor(0.0987)
Average batch loss:  tensor(0.0694)
epoch 11 :  tensor(0.0988)
Average batch loss:  tensor(0.0665)
epoch 12 :  tensor(0.0988)
Average batch loss:  tensor(0.0667)
epoch 13 :  tensor(0.0989)
Average batch loss:  tensor(0.0653)
epoch 14 :  tensor(0.0990)
Average batch loss:  tensor(0.0647)
epoch 15 :  tensor(0.0990)
Av