In [59]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [60]:

#load data alreadytrain, and transform appply to data
mnist_train = datasets.MNIST(
    root="./mnist_data", train=True, download=True, transform=transforms.ToTensor()
)

#load test data no train , transforrm apply to data 
mnist_test = datasets.MNIST(
    root="./mnist_data", train=False, download=True, transform=transforms.ToTensor()
)

In [61]:

# check size of data 60000 pic-train, 10000img-test
print(len(mnist_train), len(mnist_test))

#check shape of data
print(mnist_train[0][0].shape, mnist_train[0][1]) #image size 1(greycale) *28*28 pixel
#5 = first img [0][1] is digit show in imge

60000 10000
torch.Size([1, 28, 28]) 5


In [62]:
#
batch_size = 32  # number of sample per batch Hyperparameter

#load both data with the batchsize, random sample during each epoch
train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)

#for test too but no random
test_loader = DataLoader(mnist_test, batch_size=batch_size, shuffle=False)

In [63]:

#see one batch from train loader just to check
train_iter = iter(train_loader)
imgs, labels = next(train_iter)

#so one batch have 32 img shape 1*28*28
print(imgs.shape, labels.shape) #labels shape is 32 per one batch

torch.Size([32, 1, 28, 28]) torch.Size([32])


In [64]:
import torch
from torch import nn
from torch.nn import functional as F
from typing import List

class MLPClassifier(nn.Module):
    #make layer (input, hidden, output)
    def __init__(self, input_dimension: int, hidden_layer_sizes: List[int], output_dimension: int):
        super(MLPClassifier, self).__init__()
        self.input_dimension = input_dimension
        self.hidden_layers = nn.ModuleList()
        previous_size = input_dimension
        for hidden_size in hidden_layer_sizes:
            self.hidden_layers.append(nn.Linear(previous_size, hidden_size))
            previous_size = hidden_size
        
        self.output_linear = nn.Linear(previous_size, 10) #will have 10 output

    #train use foword first 
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        #print(x.shape)
        x = x.view(-1, self.input_dimension) #same like reshape in np the input data into 1d. -1+2 = 1d
        #print(x.shape)
        for layer in self.hidden_layers:
            x = F.gelu(layer(x)) #send to hidden layer and use activation function gelu 
         #   print(x.shape)

        z_3 = self.output_linear(x) # get output
        #print(z_3.shape)
        return z_3

In [65]:
device = torch.device("cpu") # use cpu to train, can use gpu too for cuda

# input use the image size which is 1*28*28 = 784
input_dimension = 1*28*28
output_dimension = 10 # get 10 output on output layer

hidden_layer_size = [128,128,128] # hidden layer have 128 node each, have 3 layer
num_epochs = 5 # iterate 5 times
learning_rate = 0.01
lr_decay_rate = 0.5 # decrese rate by 0.8 

#start train 
model = MLPClassifier(input_dimension, hidden_layer_size, output_dimension)
model = model.to(device)

#get loss 
criterion = nn.CrossEntropyLoss()
#use adam optimizer for learning rate to get good gradiernt decent
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
#if learning rate to slow or will decrease use gamma by decay rate and optimizer for better training
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=lr_decay_rate)


In [66]:
output = model(imgs) # put img data into the model 
#print shape in foword x.shape
#make 2d into 1d in foword

#torch.Size([32, 1, 28, 28]) # raw data
#torch.Size([32, 784]) 28*28= 784 first hidden like 784,128- input layer
#torch.Size([32, 128]) = next hidden layer 1hidden layer
#torch.Size([32, 128]) =next hidden l - 2nd hidden layer
#torch.Size([32, 128]) = same - 3rd hidden layer
#torch.Size([32, 10]) = last output should be 32,10 - output layer - 10 output 

In [67]:

# convert result raw score into probabilities sum to 1, for 10 output when add all will =1 
# highest prob more accurate
torch.softmax(output[0], dim=0)


tensor([0.1067, 0.0963, 0.0901, 0.1055, 0.0962, 0.1008, 0.0952, 0.1009, 0.1038,
        0.1044], grad_fn=<SoftmaxBackward0>)

In [68]:
from typing import Tuple

# next calc for backward before traning
def evaluate(
    model: nn.Module,
    test_loader: DataLoader,
    criterion: nn.Module,
    device: torch.device,
) -> Tuple[float, float]:

    loss = 0.0 # calc loss
    accuracy = 0.0 # calc accuracy
    total = 0

    with torch.no_grad():
        for input_images, labels in test_loader:
            input_images = input_images.to(device)
            labels = labels.to(device)

            #print(input_images.shape) # input raw 32 1 28 28
            #print(labels.shape) # label shape the batch size 32

            outputs = model(input_images)
            #print(outputs.shape) # outut shape 32 10
            #(batch_size, num_classes)
            predicted = torch.argmax(outputs, 1)
            #print(predicted) # predict a lot 0 since before training

            batch_size = labels.size(0)
            total += batch_size
            
            #print((predicted==labels)) # check the prediction true or false
            #print((predicted==labels).sum()) # check total true =  5

            accuracy += (predicted == labels).sum().item() # calc accuray based on the predict n label
            loss += criterion(outputs, labels).item() * batch_size # calc loss

            #print(criterion(outputs, labels).item()) # check loss 
            #break

    accuracy /= total
    loss /= total

    return accuracy, loss

In [69]:

# run the evaluate with model  loader and data , return calc or loss and accuracy
evaluate(model, test_loader, criterion, device)

#torch.Size([32, 1, 28, 28])- input
#torch.Size([32])
#torch.Size([32, 1, 28, 28])
#torch.Size([32, 784])
#torch.Size([32, 128])
#torch.Size([32, 128])
#torch.Size([32, 128])
#torch.Size([32, 10])
#torch.Size([32, 10])
#tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#        0, 0, 0, 0, 0, 0, 0, 0])
#tensor([False, False, False,  True, False, False, False, False, False, False,
#         True, False, False,  True, False, False, False, False, False, False,
#       False, False, False, False, False,  True, False, False,  True, False,
##        False, False])
#tensor(5)
#2.3018720149993896 # total loss since before training
# accuracy , loss
#(0.15625, 2.3018720149993896)


(0.0884, 2.305179496765137)

In [70]:

# to test the accury and loss. evaluate the accuracy and loss
eval_accuracy, eval_loss = evaluate(model, test_loader, criterion, device)
print(f"Test initial - accuracy: {eval_accuracy:.4f}  loss: {eval_loss:.4f}") #from eval

#train for each epoch
for epoch in range(num_epochs):
    train_accuracy = 0.0
    train_loss = 0.0

    model.train() # train the model loop to epoch num = 5 the one we declare above
    for inputs, labels in train_loader: # loop over each batch
        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs) # foword pass
        loss = criterion(outputs, labels) # compute loss
        loss.backward() # compute gradient

        #gradient desent update step with alpha val
        optimizer.step() # update model weight use the gradient in backward we got
        optimizer.zero_grad() # clear gradient for next epoch

        batch_size = inputs.size(0)
        train_loss += loss.item()* batch_size # calc total lost

        predicted = torch.argmax(outputs, dim=1) # predict class
        train_accuracy += (predicted == labels).sum().item() # calc total accuracy

    lr_scheduler.step() # update learning rate after each epoch

    train_accuracy /= len(train_loader.dataset) #make mean with accuracy
    train_loss /= len(train_loader.dataset) # make mean with loss

    model.eval() # eval model, disable dropout
    eval_accuracy, eval_loss = evaluate(model,test_loader, criterion, device) # eval use data

    print(f"Train epoch - {epoch}/ {num_epochs} - accuracy: {train_accuracy:.4f}  loss: {train_loss:.4f}")
    print(f"Test epoch - {epoch}/ {num_epochs} - accuracy: {eval_accuracy:.4f}  loss: {eval_loss:.4f}")

        

Test initial - accuracy: 0.0884  loss: 2.3052
Train epoch - 0/ 5 - accuracy: 0.9143  loss: 0.3198
Test epoch - 0/ 5 - accuracy: 0.9410  loss: 0.2273
Train epoch - 1/ 5 - accuracy: 0.9622  loss: 0.1363
Test epoch - 1/ 5 - accuracy: 0.9648  loss: 0.1298
Train epoch - 2/ 5 - accuracy: 0.9766  loss: 0.0805
Test epoch - 2/ 5 - accuracy: 0.9702  loss: 0.1135
Train epoch - 3/ 5 - accuracy: 0.9846  loss: 0.0523
Test epoch - 3/ 5 - accuracy: 0.9774  loss: 0.0906
Train epoch - 4/ 5 - accuracy: 0.9899  loss: 0.0340
Test epoch - 4/ 5 - accuracy: 0.9789  loss: 0.0899


In [71]:
#Test epoch - 4/ 5 - accuracy: 0.9688  loss: 0.1788 - 0.02 ,0.8
#-0.01,0.7

#Test initial - accuracy: 0.1562  loss: 2.2850
#Train epoch - 0/ 5 - accuracy: 0.8824  loss: 0.4533
#Test epoch - 0/ 5 - accuracy: 0.9375  loss: 0.2239
#Train epoch - 1/ 5 - accuracy: 0.9392  loss: 0.2273
#Test epoch - 1/ 5 - accuracy: 1.0000  loss: 0.0639
#Train epoch - 2/ 5 - accuracy: 0.9580  loss: 0.1554
#Test epoch - 2/ 5 - accuracy: 0.9688  loss: 0.0591
#Train epoch - 3/ 5 - accuracy: 0.9681  loss: 0.1103
#Test epoch - 3/ 5 - accuracy: 0.9688  loss: 0.0454
#Train epoch - 4/ 5 - accuracy: 0.9745  loss: 0.0856
#Test epoch - 4/ 5 - accuracy: 1.0000  loss: 0.0116
#Test epoch - 4/ 5 - accuracy: 1.0000  loss: 0.0116 - 0.02, 0.5