In [None]:
import numpy as np
import torch
import torch.optim as optim
from torch import nn
from torch.nn import functional as F
from PIL import Image
import torchvision
from torchvision import datasets, transforms
import pandas as pd
from torch.optim.lr_scheduler import StepLR
import matplotlib.pyplot as plt
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader, random_split, ConcatDataset

In [None]:
!pip install huggingface_hub
from huggingface_hub import login

# Replace "YOUR_TOKEN_HERE" with your actual token
login(token="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")



In [None]:
!pip install datasets



In [None]:
from datasets import load_dataset

ds = load_dataset("zh-plus/tiny-imagenet")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
ds.shape

{'train': (100000, 2), 'valid': (10000, 2)}

In [None]:
device = torch.device("cuda" if torch.cuda.is_available else "cpu")

In [None]:
device

device(type='cuda')

In [None]:
transform_train_rgb = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.RandomApply([
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    ], p=0.5),  # Áp dụng các biến đổi này với xác suất 0.5
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

transform_test_rgb = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Định nghĩa các biến đổi cho ảnh grayscale
transform_train_gray = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1) if x.size(0) == 1 else x),  # Chuyển đổi sang RGB nếu cần
])

transform_test_gray = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.Grayscale(num_output_channels=3),  # Chuyển đổi thành RGB
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Định nghĩa Dataset cho ảnh RGB và grayscale
class ImageNetHardDataset(Dataset):
    def __init__(self, data, transform):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data[idx]['image'].convert('RGB')  # Chuyển đổi thành RGB
        label = self.data[idx]['label']
        if self.transform:
            image = self.transform(image)
        return image, label


# Chia tập dữ liệu thành train và test
train_size = int(0.8 * len(ds['train']))
test_size = len(ds['train']) - train_size
train_data, test_data = random_split(ds['train'], [train_size, test_size])

# Tạo Dataset cho ảnh RGB và grayscale
train_dataset_rgb = ImageNetHardDataset(train_data, transform_train_rgb)
train_dataset_gray = ImageNetHardDataset(train_data, transform_train_gray)
test_dataset_rgb = ImageNetHardDataset(test_data, transform_test_rgb)
test_dataset_gray = ImageNetHardDataset(test_data, transform_test_gray)


# Kết hợp Dataset bằng ConcatDataset
train_dataset = ConcatDataset([train_dataset_rgb, train_dataset_gray])
test_dataset = ConcatDataset([test_dataset_rgb, test_dataset_gray])

# Tạo DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
from torchvision import models
model = models.googlenet(weights = 'DEFAULT')

In [None]:
for param in model.parameters():
   param.requires_grad = True

In [None]:
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 200)

In [None]:
model = model.to(device)

In [None]:
num_epochs = 1
optimizer = optim.Adam(model.parameters(), lr = 0.01)
criterion = nn.CrossEntropyLoss()

In [None]:
def train(model, train_loader, optimizer, criterion, device, epoch):
    model.train()  # Chuyển mô hình sang chế độ huấn luyện
    total_loss = 0
    correct = 0
    total = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch + 1}", leave=False)  # Hiển thị tiến trình

    for batch_idx, (inputs, targets) in enumerate(progress_bar):
        inputs, targets = inputs.to(device), targets.to(device)  # Di chuyển dữ liệu sang thiết bị
        optimizer.zero_grad()  # Đặt gradient về 0
        outputs = model(inputs)  # Tính toán output
        loss = criterion(outputs, targets)  # Tính toán loss
        loss.backward()  # Tính toán gradient
        optimizer.step()  # Cập nhật trọng số mô hình

        total_loss += loss.item()  # Cộng dồn loss
        _, predicted = outputs.max(1)  # Lấy dự đoán
        total += targets.size(0)  # Cộng dồn tổng số mẫu
        correct += predicted.eq(targets).sum().item()  # Cộng dồn số mẫu dự đoán đúng

        progress_bar.set_postfix(
            loss=f"{total_loss / (batch_idx + 1):.3f}",
            acc=f"{100. * correct / total:.2f}%"
        )  # Hiển thị loss và độ chính xác trên progress bar

    return total_loss / len(train_loader), 100. * correct / total  # Trả về độ lỗi trung bình và độ chính xác

In [None]:
for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion, device, epoch)
    print(f"Epoch {epoch + 1}: Train Loss = {train_loss:.3f}, Train Acc = {train_acc:.2f}%")

                                                                                   

Epoch 1: Train Loss = 4.969, Train Acc = 2.83%




train loss = 4.969 train acc = 2.83%

Model AlexNet

In [None]:
model1 = models.alexnet(weights = 'DEFAULT')

In [None]:
for param in model1.parameters():
   param.requires_grad = True

In [None]:
num_features = model1.fc.in_features
model1.fc = nn.Linear(num_features, 200)

In [None]:
model1 = model1.to(device)

In [None]:
for epoch in range(num_epochs):
    train_loss, train_acc = train(model1, train_loader, optimizer, criterion, device, epoch)
    print(f"Epoch {epoch + 1}: Train Loss = {train_loss:.3f}, Train Acc = {train_acc:.2f}%")

                                                                                    

Epoch 1: Train Loss = 10.496, Train Acc = 0.06%




train loss = 10.496 train acc = 0.06%

In [None]:
model2 = models.resnet50(weights = 'DEFAULT')

In [None]:
for param in model2.parameters():
   param.requires_grad = True

In [None]:
num_features = model2.fc.in_features
model2.fc = nn.Linear(num_features, 200)

In [None]:
model2 = model2.to(device)

In [None]:
for epoch in range(num_epochs):
    train_loss, train_acc = train(model2, train_loader, optimizer, criterion, device, epoch)
    print(f"Epoch {epoch + 1}: Train Loss = {train_loss:.3f}, Train Acc = {train_acc:.2f}%")

                                                                                   

Epoch 1: Train Loss = 5.307, Train Acc = 0.47%




train loss = 5.307, train acc = 0.47%