In [1]:
import torch
import torch.nn
import torch.optim as optim
import numpy as np
import glob
import cv2
import torch.nn.functional as F
from torch.autograd import Variable

import torchvision
import torchvision.transforms as transform
from torch.nn import CrossEntropyLoss, Dropout, Softmax, Linear, Conv2d,LayerNorm
import matplotlib.pyplot as plt
from torchsummary import summary

In [2]:
import torch
import torch.nn as nn

def compare_bn(bn1, bn2):
    err = False
    if not torch.allclose(bn1.running_mean, bn2.running_mean):
        print('Diff in running_mean: {} vs {}'.format(bn1.running_mean, bn2.running_mean))
        err = True
    if not torch.allclose(bn1.running_var, bn2.running_var):
        print('Diff in running_var: {} vs {}'.format(bn1.running_var, bn2.running_var))
        err = True
    if bn1.affine and bn2.affine:
        if not torch.allclose(bn1.weight, bn2.weight):
            print('Diff in weight: {} vs {}'.format(bn1.weight, bn2.weight))
            err = True
        if not torch.allclose(bn1.bias, bn2.bias):
            print('Diff in bias: {} vs {}'.format(bn1.bias, bn2.bias))
            err = True
    if not err:
        print('All parameters are equal!')

class MyBatchNorm2d(nn.BatchNorm2d):
    def __init__(self, num_features, eps=1e-5, momentum=0.1,
                 affine=True, track_running_stats=True):
        super(MyBatchNorm2d, self).__init__(num_features, eps, momentum, affine, track_running_stats)

    def forward(self, input):
        self._check_input_dim(input)
        exponential_average_factor = 0.0

        if self.training and self.track_running_stats:
            if self.num_batches_tracked is not None:
                self.num_batches_tracked += 1
            if self.momentum is None:
                exponential_average_factor = 1.0 / float(self.num_batches_tracked)
            else:
                exponential_average_factor = self.momentum

        if self.training:
            mean = input.mean([0, 2, 3])
            var  = input.var([0, 2, 3], unbiased=False)
            n = input.numel() / input.size(1)
            with torch.no_grad():
                self.running_mean = exponential_average_factor * mean + (1 - exponential_average_factor) * self.running_mean
                self.running_var  = exponential_average_factor * var * n / (n - 1) + (1 - exponential_average_factor) * self.running_var
        else:
            mean = self.running_mean
            var  = self.running_var

        input = (input - mean[None, :, None, None]) / torch.sqrt(var[None, :, None, None] + self.eps)

        if self.affine:
            input = self.weight[None, :, None, None] * input + self.bias[None, :, None, None]

        return input


my_bn = MyBatchNorm2d(3, affine=True)
bn    = nn.BatchNorm2d(3, affine=True)

compare_bn(my_bn, bn)

my_bn.load_state_dict(bn.state_dict())
compare_bn(my_bn, bn)  #

for _ in range(10):
    scale = torch.randint(1, 10, (1,)).float()
    bias  = torch.randint(-10, 10, (1,)).float()
    x = torch.randn(10, 3, 100, 100) * scale + bias
    out1 = my_bn(x)
    out2 = bn(x)
    compare_bn(my_bn, bn)
    print('Max diff: ', (out1 - out2).abs().max())

my_bn.eval()
bn.eval()
for _ in range(10):
    scale = torch.randint(1, 10, (1,)).float()
    bias  = torch.randint(-10, 10, (1,)).float()
    x = torch.randn(10, 3, 100, 100) * scale + bias
    out1 = my_bn(x)
    out2 = bn(x)
    compare_bn(my_bn, bn)
    print('Max diff: ', (out1 - out2).abs().max())


All parameters are equal!
All parameters are equal!
All parameters are equal!
Max diff:  tensor(4.7684e-07, grad_fn=<MaxBackward1>)
All parameters are equal!
Max diff:  tensor(4.7684e-07, grad_fn=<MaxBackward1>)
All parameters are equal!
Max diff:  tensor(9.5367e-07, grad_fn=<MaxBackward1>)
All parameters are equal!
Max diff:  tensor(4.7684e-07, grad_fn=<MaxBackward1>)
All parameters are equal!
Max diff:  tensor(4.7684e-07, grad_fn=<MaxBackward1>)
All parameters are equal!
Max diff:  tensor(9.5367e-07, grad_fn=<MaxBackward1>)
All parameters are equal!
Max diff:  tensor(7.1526e-07, grad_fn=<MaxBackward1>)
All parameters are equal!
Max diff:  tensor(4.7684e-07, grad_fn=<MaxBackward1>)
All parameters are equal!
Max diff:  tensor(4.7684e-07, grad_fn=<MaxBackward1>)
All parameters are equal!
Max diff:  tensor(4.7684e-07, grad_fn=<MaxBackward1>)
All parameters are equal!
Max diff:  tensor(9.5367e-07, grad_fn=<MaxBackward1>)
All parameters are equal!
Max diff:  tensor(9.5367e-07, grad_fn=<Max

In [3]:
import torch
import torchvision
import torchvision.transforms as transforms

def load_data(data_dir="./data"):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    trainset = torchvision.datasets.CIFAR10(
        root=data_dir, train=True, download=True, transform=transform
    )
    testset = torchvision.datasets.CIFAR10(
        root=data_dir, train=False, download=True, transform=transform
    )
    return trainset, testset

In [4]:
trainset, testset = load_data(data_dir = './data')

100%|██████████| 170M/170M [00:14<00:00, 11.9MB/s]


In [5]:
image, label = trainset[0]


In [6]:
print(type(image))


<class 'torch.Tensor'>


In [7]:
image.shape

torch.Size([3, 32, 32])

In [8]:
image[:1]

tensor([[[-0.5373, -0.6627, -0.6078,  ...,  0.2392,  0.1922,  0.1608],
         [-0.8745, -1.0000, -0.8588,  ..., -0.0353, -0.0667, -0.0431],
         [-0.8039, -0.8745, -0.6157,  ..., -0.0745, -0.0588, -0.1451],
         ...,
         [ 0.6314,  0.5765,  0.5529,  ...,  0.2549, -0.5608, -0.5843],
         [ 0.4118,  0.3569,  0.4588,  ...,  0.4431, -0.2392, -0.3490],
         [ 0.3882,  0.3176,  0.4039,  ...,  0.6941,  0.1843, -0.0353]]])

In [9]:
from torch.utils.data import Dataset

In [10]:
class MyNet(Dataset):
  def __init__(self, dataset):
    self.dataset = dataset
  def __len__(self):
    return len(self.dataset)
  def __getitem__(self, idx):
    image, label = self.dataset[idx]
    return image, label

In [11]:
train_set = MyNet(trainset)
test_set = MyNet(testset)

In [12]:
from torch.utils.data import DataLoader

In [13]:
train_loader = DataLoader(train_set, batch_size = 64, shuffle=True)
test_loader = DataLoader(test_set, batch_size = 64, shuffle=True)

In [14]:
class Net_NN(nn.Module):
  def __init__(self, pdrop=0.4):
    super(Net_NN,self).__init__()
    self.dropout = nn.Dropout(pdrop)
    self.relu = nn.ReLU()
    self.cnn1 = nn.Conv2d(in_channels = 3, out_channels = 16,kernel_size = 3,padding = 1)
    self.bn1 = nn.BatchNorm2d(16)
    self.pool1 = nn.MaxPool2d(kernel_size =2, stride = 2)

    self.cnn2 = nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 3, padding = 1 )
    self.bn2 = nn.BatchNorm2d(32)
    self.pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)

    self.linear1 = nn.Linear(in_features = 32 * 8 * 8, out_features = 256)
    self.linear2 = nn.Linear(in_features = 256, out_features = 128)
    self.linear3 = nn.Linear(in_features = 128, out_features = 10)
  def forward(self, x):
    x = self.cnn1(x)       #(64, 3, 32, 32) => (64,16,32,32)
    x = self.bn1(x)
    x = self.relu(x)
    x = self.pool1(x)      #(64,16,32,32) => (64,16,16,16)

    x = self.cnn2(x)       #(64,32,16,16)
    x = self.bn2(x)
    x = self.relu(x)
    x = self.pool2(x)      #(64,32,8,8)

    x = x.view(x.size(0),-1)

    tensor_x = self.linear1(x)
    tensor_x = self.relu(tensor_x)
    tensor_x = self.dropout(tensor_x)

    tensor_x = self.linear2(tensor_x)
    tensor_x = self.relu(tensor_x)
    tensor_x = self.dropout(tensor_x)

    tensor_x = self.linear3(tensor_x)
    return tensor_x







In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using:", device)

Using: cuda


In [16]:
model_cnn = Net_NN(pdrop=0.4).to(device)
critetion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_cnn.parameters(), lr = 0.001)
epochs = 10
train_loss = []
test_loss = []

In [17]:
for epoch in range(epochs):
    model_cnn.train()
    running_loss = 0.0
    for img, label in train_loader:
        img = img.to(device)
        label = label.to(device)

        optimizer.zero_grad()
        outputs = model_cnn(img)
        loss = critetion(outputs, label)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    avg_training_loss = running_loss / len(train_loader)
    train_loss.append(avg_training_loss)
    print(f"Epoch {epoch+1}/{epochs} | Train Loss: {avg_training_loss:.4f}")

    model_cnn.eval()
    test_total = 0
    with torch.no_grad():
        for img, label in test_loader:
            img = img.to(device)
            label = label.to(device)

            test_out = model_cnn(img)
            loss = critetion(test_out, label)
            test_total += loss.item()
    avg_test_loss = test_total / len(test_loader)
    test_loss.append(avg_test_loss)
    print(f"Epoch {epoch+1}/{epochs} | Test Loss: {avg_test_loss:.4f}")


Epoch 1/10 | Train Loss: 1.5473
Epoch 1/10 | Test Loss: 1.2203
Epoch 2/10 | Train Loss: 1.2297
Epoch 2/10 | Test Loss: 1.0827
Epoch 3/10 | Train Loss: 1.1085
Epoch 3/10 | Test Loss: 0.9934
Epoch 4/10 | Train Loss: 1.0338
Epoch 4/10 | Test Loss: 0.9172
Epoch 5/10 | Train Loss: 0.9687
Epoch 5/10 | Test Loss: 0.9023
Epoch 6/10 | Train Loss: 0.9217
Epoch 6/10 | Test Loss: 0.8579
Epoch 7/10 | Train Loss: 0.8790
Epoch 7/10 | Test Loss: 0.8429
Epoch 8/10 | Train Loss: 0.8424
Epoch 8/10 | Test Loss: 0.8499
Epoch 9/10 | Train Loss: 0.8167
Epoch 9/10 | Test Loss: 0.8178
Epoch 10/10 | Train Loss: 0.7790
Epoch 10/10 | Test Loss: 0.8098


In [18]:
torch.save(model_cnn.state_dict(), "cnn_model.pth")


In [19]:
from google.colab import drive

# Thử unmount nếu đã bị kẹt:
!fusermount -u /content/drive 2>/dev/null

drive.mount('/content/drive', force_remount=True)



Mounted at /content/drive


In [20]:

!ls -lh


total 2.2M
-rw-r--r-- 1 root root 2.2M Oct 22 11:00 cnn_model.pth
drwxr-xr-x 3 root root 4.0K Oct 22 10:57 data
drwx------ 5 root root 4.0K Oct 22 11:00 drive
drwxr-xr-x 1 root root 4.0K Oct 20 20:02 sample_data


In [21]:
!mv cnn_model.pth /content/drive/MyDrive/
print("✅ Model moved to Google Drive!")

✅ Model moved to Google Drive!


In [22]:
!pip install ray[tune]


Collecting ray[tune]
  Downloading ray-2.50.1-cp312-cp312-manylinux2014_x86_64.whl.metadata (21 kB)
Collecting click!=8.3.0,>=7.0 (from ray[tune])
  Downloading click-8.2.1-py3-none-any.whl.metadata (2.5 kB)
Collecting tensorboardX>=1.9 (from ray[tune])
  Downloading tensorboardx-2.6.4-py3-none-any.whl.metadata (6.2 kB)
Downloading click-8.2.1-py3-none-any.whl (102 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.2/102.2 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tensorboardx-2.6.4-py3-none-any.whl (87 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ray-2.50.1-cp312-cp312-manylinux2014_x86_64.whl (71.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.1/71.1 MB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tensorboardX, click, ray
  Attempting uninstall: click
    Found existing installation: click 8

In [23]:
from ray import tune

def train_ray(config):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = Net_NN(pdrop=config["pdrop"]).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
    criterion = nn.CrossEntropyLoss()

    train_loader = DataLoader(train_set, batch_size=config["batch_size"], shuffle=True)
    test_loader  = DataLoader(test_set,  batch_size=config["batch_size"], shuffle=False)

    for epoch in range(config["epochs"]):  # hoặc config["epochs"]
        model.train()
        for img, label in train_loader:
            img, label = img.to(device), label.to(device)
            optimizer.zero_grad()
            loss = criterion(model(img), label)
            loss.backward()
            optimizer.step()

        model.eval()
        test_loss = 0
        correct = 0
        with torch.no_grad():
            for img, label in test_loader:
                img, label = img.to(device), label.to(device)
                outputs = model(img)
                loss = criterion(outputs, label)
                test_loss += loss.item()
                preds = outputs.argmax(dim=1)
                correct += (preds == label).sum().item()

        tune.report(loss=test_loss / len(test_loader), accuracy=correct / len(test_set))


In [30]:
import os, torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import DataLoader, Subset
from ray import tune
from ray.tune.schedulers import ASHAScheduler
N_TRAIN = min(10000, len(train_set))
N_TEST  = min(2000, len(test_set))
train_small = Subset(train_set, range(N_TRAIN))
test_small  = Subset(test_set,  range(N_TEST))
len(train_small), len(test_small)




(10000, 2000)

In [31]:
def train_ray(config):
    train_loader = DataLoader(train_small, batch_size=config["batch_size"], shuffle=True, num_workers=2, pin_memory=True)
    test_loader  = DataLoader(test_small,  batch_size=config["batch_size"], shuffle=False, num_workers=2, pin_memory=True)

    model = Net_NN(pdrop=config["pdrop"], in_channels=3, num_classes=10).to(device)  # đổi in_channels=1 nếu MNIST
    if config["optimizer"] == "adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
    else:
        optimizer = torch.optim.SGD(model.parameters(), lr=config["lr"], momentum=0.9)

    criterion = nn.CrossEntropyLoss()

    best_acc = 0.0
    for epoch in range(config["epochs"]):
        model.train()
        for img, label in train_loader:
            img, label = img.to(device), label.to(device)
            optimizer.zero_grad(set_to_none=True)
            out = model(img)
            loss = criterion(out, label)
            loss.backward()
            optimizer.step()

        model.eval()
        test_loss, correct = 0.0, 0
        with torch.no_grad():
            for img, label in test_loader:
                img, label = img.to(device), label.to(device)
                out = model(img)
                test_loss += criterion(out, label).item()
                pred = out.argmax(dim=1)
                correct += (pred == label).sum().item()

        avg_test_loss = test_loss / len(test_loader)
        acc = correct / len(test_small)

        tune.report(loss=avg_test_loss, accuracy=acc)

In [34]:
import ray
ray.init(num_gpus=1)


In [None]:

search_space = {
    "lr": tune.loguniform(1e-4, 1e-2),
    "batch_size": tune.choice([64, 128]),
    "pdrop": tune.uniform(0.2, 0.55),
    "optimizer": tune.choice(["adam"]),
    "epochs": 5,
}

import ray
ray.init(num_gpus=1, ignore_reinit_error=True)  # ÉP Ray kích hoạt GPU
scheduler = ASHAScheduler(metric="loss", mode="min")
analysis = tune.run(
    train_ray,
    config=search_space,
    num_samples=4,   # giảm samples để chạy nhanh trước, ổn rồi tăng sau
    scheduler=scheduler,
    resources_per_trial={"cpu": 1, "gpu": 1},  # ✅ ÉP dùng đúng 1 GPU cho mỗi trial
    verbose=1
)

print("== Best result ==")
print(analysis.best_config)



2025-10-22 11:13:32,938	INFO worker.py:1851 -- Calling ray.init() again after it has already been called.


+------------------------------------------------------------------+
| Configuration for experiment     train_ray_2025-10-22_11-13-35   |
+------------------------------------------------------------------+
| Search algorithm                 BasicVariantGenerator           |
| Scheduler                        AsyncHyperBandScheduler         |
| Number of trials                 4                               |
+------------------------------------------------------------------+

View detailed results here: /root/ray_results/train_ray_2025-10-22_11-13-35
To visualize your results with TensorBoard, run: `tensorboard --logdir /tmp/ray/session_2025-10-22_11-13-01_452062_1076/artifacts/2025-10-22_11-13-35/train_ray_2025-10-22_11-13-35/driver_artifacts`

Trial status: 4 PENDING
Current time: 2025-10-22 11:13:49. Total running time: 14s
Logical resource usage: 0/2 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:T4)
+------------------------------------------------------------------------------------