# Overview of Some Deep Learning Libraries

## 1.2 Python Libraries

In [4]:
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import iterators, optimizers, training, Chain
from chainer.datasets import mnist

# Load MNIST dataset
train, test = mnist.get_mnist()

# Hyperparameters
batchsize = 128
max_epoch = 10

# Iterator
train_iter = iterators.SerialIterator(train, batchsize)

# MLP model definition
class MLP(Chain):
    def __init__(self, n_mid_units=100, n_out=10):
        super(MLP, self).__init__()
        with self.init_scope():
            self.l1 = L.Linear(None, n_mid_units)  # input -> hidden
            self.l2 = L.Linear(None, n_mid_units)  # hidden -> hidden
            self.l3 = L.Linear(None, n_out)        # hidden -> output

    def forward(self, x):
        h1 = F.relu(self.l1(x))
        h2 = F.relu(self.l2(h1))
        return self.l3(h2)

# Create model and wrap with Classifier (adds softmax + loss calc)
model = MLP()
model = L.Classifier(model)

# Set up optimizer
optimizer = optimizers.MomentumSGD()
optimizer.setup(model)

# Set up updater
updater = training.updaters.StandardUpdater(train_iter, optimizer)

# Set up trainer
trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='mnist_result')

# Run training
trainer.run()

AttributeError: `np.sctypes` was removed in the NumPy 2.0 release. Access dtypes explicitly instead.

## 1.3 PyTorch and TensorFlow

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=2)
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.pool2 = nn.AvgPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(16, 120, kernel_size=5)
        self.flatten = nn.Flatten()
        self.linear4 = nn.Linear(120, 84)
        self.linear5 = nn.Linear(84, 10)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = torch.tanh(self.conv1(x))
        x = self.pool1(x)
        x = torch.tanh(self.conv2(x))
        x = self.pool2(x)
        x = torch.tanh(self.conv3(x))
        x = self.flatten(x)
        x = torch.tanh(self.linear4(x))
        x = self.linear5(x)
        return self.softmax(x)

# Khởi tạo mô hình
model = Model()
print(model)

Model(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool1): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (pool2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (conv3): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear4): Linear(in_features=120, out_features=84, bias=True)
  (linear5): Linear(in_features=84, out_features=10, bias=True)
  (softmax): LogSoftmax(dim=1)
)


In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import datetime

# ==== 1. Model LeNet-5 ====
model = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=2),
    nn.Tanh(),
    nn.AvgPool2d(kernel_size=2, stride=2),

    nn.Conv2d(6, 16, kernel_size=5),
    nn.Tanh(),
    nn.AvgPool2d(kernel_size=2, stride=2),

    nn.Conv2d(16, 120, kernel_size=5),
    nn.Tanh(),
    nn.Flatten(),

    nn.Linear(120, 84),
    nn.Tanh(),
    nn.Linear(84, 10),
    nn.LogSoftmax(dim=1)
)

# ==== 2. DataLoader cho MNIST ====
transform = transforms.ToTensor()

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=64, shuffle=False)

# ==== 3. Hàm huấn luyện ====
def training_loop(model, optimizer, loss_fn, train_loader, val_loader=None, n_epochs=10):
    best_loss, best_epoch = np.inf, -1
    best_state = model.state_dict()

    for epoch in range(n_epochs):
        model.train()
        train_loss = 0
        for data, target in train_loader:
            output = model(data)
            loss = loss_fn(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        model.eval()
        status = (f"{str(datetime.datetime.now())} - Epoch {epoch} - "
                  f"Training Loss: {train_loss / len(train_loader):.4f}")

        if val_loader:
            val_loss = 0
            with torch.no_grad():
                for data, target in val_loader:
                    output = model(data)
                    loss = loss_fn(output, target)
                    val_loss += loss.item()
            val_loss_avg = val_loss / len(val_loader)
            status += f", Validation Loss: {val_loss_avg:.4f}"

            if val_loss_avg < best_loss:
                best_loss = val_loss_avg
                best_epoch = epoch
                best_state = model.state_dict()

        print(status)

    print(f"\n✅ Best Validation Loss: {best_loss:.4f} at epoch {best_epoch}")
    model.load_state_dict(best_state)

# ==== 4. Gọi huấn luyện ====
optimizer = optim.Adam(model.parameters())
criterion = nn.NLLLoss()

training_loop(model, optimizer, criterion, train_loader, test_loader, n_epochs=3)

2025-04-11 12:07:01.076087 - Epoch 0 - Training Loss: 0.3110, Validation Loss: 0.1196
2025-04-11 12:07:22.520595 - Epoch 1 - Training Loss: 0.0964, Validation Loss: 0.0742
2025-04-11 12:07:42.806444 - Epoch 2 - Training Loss: 0.0643, Validation Loss: 0.0593

✅ Best Validation Loss: 0.0593 at epoch 2
