## Step by step 
- 1 . Checking device , GPU support CUDA ? CPU .
- 2 . Preparing dataset (MNIST).
- 3 . Define model .
- 4 . Creating Entropy loss function .
- 5 . Training model .
- 6 . Testing .
- 7 . Saving parameters .
- 8 . Prediction .


In [7]:
# 1. Checking device 
import torch

# Check GPU , CPU respectively 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device :", device)


Device : cpu


### 2. Loading database from Pytorch library 
* MNIST 

In [8]:
# import libraries pytorch , numpy , pandas ....

# basic libraries 
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import torch

# more 
import torchvision 
import torchvision.transforms as transforms 

In [9]:
# const values

mean = 0.1307 
std = 0.3081 
# well -known values , standard for dataset

In [10]:
# Define transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((mean,), (std ,))  # MNIST mean and std
])

# Download and load training dataset
trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                     download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                         shuffle=True, num_workers=2)

# Download and load test dataset
testset = torchvision.datasets.MNIST(root='./data', train=False,
                                    download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=1000,
                                        shuffle=False, num_workers=2)

In [11]:
print(f"length of trainloader : {len(trainloader)}")

print(f"length of trainset {len(trainset)}")


print(trainset[0][1])

print(f"size of each image {len(trainset[0])} x {len(trainset[0][0])} x {len(trainset[0][0][0])} x {len(trainset[0][0][0][0])}")
print(f"(image , label ) ; the number of channel ; height ; width respectively ")



length of trainloader : 938
length of trainset 60000
5
size of each image 2 x 1 x 28 x 28
(image , label ) ; the number of channel ; height ; width respectively 


In [12]:
# declare libraries 
import torch 
import torchvision 
import torch.nn.functional as F 
import torch.nn as nn 

### 3. Define model

#### CNN more complex

In [13]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN  , self).__init__()
        self.model = nn.Sequential(
            # Layer 1st 
            nn.Conv2d(1 , 32 , 3 ), # (1 , 28 , 28 ) -> (32 , 26 , 26)
            nn.ReLU(),
            nn.MaxPool2d(2 , 2),    # (32 , 26 , 26) -> (32 , 13 , 13)

            # Layer 2nd 
            nn.Conv2d(32 , 64 , 3 ),    # (32 , 13 , 13) -> (64 , 11 , 11)
            # nn.ReLU(),
            # nn.MaxPool2d(2 , 2),     # ( 64 , 5 , 5)
            nn.BatchNorm2d(64),         # (64 , 11, 11)
            nn.ReLU(),                  # (64 , 11 , 11)

            # Layer 3rd 
            nn.Conv2d(64 , 128 , 3),    # (128 , 9 , 9)
            nn.ReLU() ,
            nn.MaxPool2d(2 , 2),          #  ( 128 , 4 , 4)

            # Layer 4th
            nn.Flatten() , # (128 * 4 * 4)
            nn.Linear(128 * 4 * 4 , 256),  # Fully Connected 1
            nn.Linear(256 , 128 ), 
            nn.ReLU() , 
            nn.Linear( 128 , 10)
        )

    def forward(self , x):
        return self.model(x) 
    
model = CNN().to(device)
print(model)

CNN(
  (model): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Flatten(start_dim=1, end_dim=-1)
    (10): Linear(in_features=2048, out_features=256, bias=True)
    (11): Linear(in_features=256, out_features=128, bias=True)
    (12): ReLU()
    (13): Linear(in_features=128, out_features=10, bias=True)
  )
)


#### Simple CNN  

In [8]:
# class simpleCNN(nn.Module):
#     def __init__(self):
#         super(simpleCNN  , self).__init__()
#         self.model = nn.Sequential(
#             # Layer 1st 
#             nn.Conv2d(1 , 32 , 3 ), # (1 , 28 , 28 ) -> (32 , 26 , 26)
#             nn.ReLU(),
#             nn.MaxPool2d(2 , 2),    # (32 , 26 , 26) -> (32 , 13 , 13)

#             # Layer 2nd 
#             nn.Conv2d(32 , 64 , 3 ),    # (32 , 13 , 13) -> (64 , 11 , 11)
#             nn.ReLU(),
#             nn.MaxPool2d(2 , 2),     # ( 64 , 5 , 5)

#             # Layer 3rd 
#             nn.Flatten() , # (64 , 5 , 5 ) -> ( 64 * 5 * 5)
#             nn.Linear(64 * 5 * 5, 128),  # Fully Connected 1
#             nn.ReLU() , 
#             nn.Linear( 128 , 10)
#         )

#     def forward(self , x):
#         return self.model(x) 
    
# model = simpleCNN().to(device)
# print(model)

### 4. Creating Entropy Loss function

In [9]:
# init parameters 
learning_rate = 0.001 

In [None]:
import torch.optim as optim

# using entropy loss function 
criterion = nn.CrossEntropyLoss()

# Optimizer: Adam với learning rate = 0.001
optimizer = optim.Adam(model.parameters(), lr = learning_rate )


### 5. Training model
* Forward pass 
* Calculate loss 
* Backward pass 
* Update parameters

In [70]:
num_epochs = 15 

for epoch in range(num_epochs):     # 5 times 
    model.train()   # training mode for model 
    running_loss = 0.0
    correct = 0 
    total = 0

    # batch 
    for images , labels in trainloader :
        images , labels = images.to(device) , labels.to(device)     # convert image and label to device 

        # Forward pass 
        outputs = model(images)

        # Calculate loss value 
        loss = criterion(outputs , labels)

        # Backward pass 
        optimizer.zero_grad()   # setup zero to re compute 
        loss.backward()         # compute gradient 
        optimizer.step()        # update parameters 

        running_loss += loss.item()

        # Counting true value 
        _, predicted = torch.max(outputs , 1)
        total += labels.size(0)
        correct += (predicted == labels ).sum().item()
            
        accuracy = 100 * correct / total
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(trainloader):.4f}, Accuracy: {accuracy:.2f}%')
    
print('Training Finished!')


Epoch [1/15], Loss: 0.0000, Accuracy: 98.44%
Epoch [1/15], Loss: 0.0000, Accuracy: 98.44%
Epoch [1/15], Loss: 0.0000, Accuracy: 98.96%
Epoch [1/15], Loss: 0.0001, Accuracy: 99.22%
Epoch [1/15], Loss: 0.0001, Accuracy: 98.75%
Epoch [1/15], Loss: 0.0002, Accuracy: 98.44%
Epoch [1/15], Loss: 0.0003, Accuracy: 97.99%
Epoch [1/15], Loss: 0.0003, Accuracy: 98.24%
Epoch [1/15], Loss: 0.0003, Accuracy: 98.44%
Epoch [1/15], Loss: 0.0003, Accuracy: 98.59%
Epoch [1/15], Loss: 0.0004, Accuracy: 98.58%
Epoch [1/15], Loss: 0.0004, Accuracy: 98.70%
Epoch [1/15], Loss: 0.0004, Accuracy: 98.80%
Epoch [1/15], Loss: 0.0004, Accuracy: 98.88%
Epoch [1/15], Loss: 0.0004, Accuracy: 98.96%
Epoch [1/15], Loss: 0.0004, Accuracy: 98.93%
Epoch [1/15], Loss: 0.0004, Accuracy: 98.99%
Epoch [1/15], Loss: 0.0005, Accuracy: 99.05%
Epoch [1/15], Loss: 0.0005, Accuracy: 99.10%
Epoch [1/15], Loss: 0.0005, Accuracy: 99.14%
Epoch [1/15], Loss: 0.0005, Accuracy: 99.18%
Epoch [1/15], Loss: 0.0005, Accuracy: 99.22%
Epoch [1/1

### 6. Testing 

In [14]:
# Testing on test set  
model.eval()

correct = 0
total = 0

with torch.no_grad():  
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)   # load to device 

        # predict
        outputs = model(images)
        
        # get highest value 
        _, predicted = torch.max(outputs, 1)

        """
        explain torch.max
        outputs = torch.tensor([
    [0.1, 0.3, 0.6, 0.0],  # Ảnh 1: xác suất lớp 2 cao nhất (0.6)
    [0.7, 0.2, 0.1, 0.0],  # Ảnh 2: xác suất lớp 0 cao nhất (0.7)
    [0.1, 0.8, 0.05, 0.05] # Ảnh 3: xác suất lớp 1 cao nhất (0.8)
])
        """
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# calculate accuracy
accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')

Test Accuracy: 9.00%


### 7. Saving parameters model

In [15]:
# Lưu mô hình đã huấn luyện vào một file
PATH = './simple_cnn.pth'
torch.save(model.state_dict(), PATH)
print("Save Done!")


Save Done!


In [16]:

model = CNN().to(device)

model.load_state_dict(torch.load(PATH))

model.eval()

print("Done")


Done


  model.load_state_dict(torch.load(PATH))


### 8. Prediction 

In [17]:
def min_max_normalize(tensor, min_value=0.0, max_value=1.0):
    tensor_min = tensor.min()
    tensor_max = tensor.max()
    
    # Apply Min-Max Normalization
    normalized_tensor = (tensor - tensor_min) / (tensor_max - tensor_min)
    # Scale to the desired range
    normalized_tensor = normalized_tensor * (max_value - min_value) + min_value
    return normalized_tensor

def probability( outputs ):
    norm = min_max_normalize(outputs)
    return norm / norm.sum() 


In [27]:
from PIL import Image
import torchvision.transforms as transforms


image = Image.open('data/number2.png')  # your image path 

# Tranform like input training data 
# Define transformations
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # (1 channel)
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # 1 channel
])




image = transform(image).unsqueeze(0)  #  (batch size = 1)

model.eval()  # test mode
image = image.to(device) 
output = model(image)

# get label
# value , predicted = torch.max(output, 1)

# print(f'Predict: {predicted.item()}')
# print(f'accuracy : {value}')

print(output)


# print(f"OUTPUT : {output}")
# print(min_max_normalize(output))
# print(f"Softmax output : {F.softmax(output)}")

predictions = probability(output)

# print("en")
# print(predictions[0][0])

for i in range(10):
    print(f"Digits {i} [{predictions[0][i] * 100} %]")


tensor([[ 0.0095, -0.0362,  0.0564, -0.0345,  0.0291, -0.0200,  0.0147,  0.0320,
          0.0913, -0.0286]], grad_fn=<AddmmBackward0>)
Digits 0 [9.601054191589355 %]
Digits 1 [0.0 %]
Digits 2 [19.462858200073242 %]
Digits 3 [0.36953553557395935 %]
Digits 4 [13.73012924194336 %]
Digits 5 [3.4069597721099854 %]
Digits 6 [10.698058128356934 %]
Digits 7 [14.331719398498535 %]
Digits 8 [26.788795471191406 %]
Digits 9 [1.6108887195587158 %]
