## 1. Prepare python script for running dual GPU:

#### Prepare dataset:

Images after preprocessing:
- Remove noisy images
- Cropping using YOLO

In [1]:
%%writefile dataset_loading.py
from torch.utils.data import Dataset, Subset
import os
from PIL import Image
import torchvision.transforms as transforms

class FashionDataset(Dataset):
  def __init__(self, image_dir):
    """
      image_dir=/content/drive/MyDrive/data
      image_dir
          |____ ao_hoodie_nam
          |____ ao_lien_quan
          ......
          |____ trung_nien_nu
    """
    self.image_dir = image_dir

    # read image and load label
    self.categories = sorted([folder for folder in os.listdir(image_dir) if folder != 'quan_ao_nam_trung_nien'])
    labels = {k:v for k, v in zip(self.categories, range(len(self.categories)))}
    self.num_classes = len(self.categories)
    self.images = []
    self.labels = []
    for category in self.categories:
      image_files = [f for f in os.listdir(os.path.join(image_dir, category)) if f.endswith(('.jpg', '.png'))]
      self.images.extend(image_files)
      self.labels.extend([labels[category]]*len(image_files))

  def __len__(self):
    return len(self.images)

  def __getitem__(self, idx):
    img_file = self.images[idx]
    label = self.labels[idx]
    category = self.categories[label]
    img = Image.open(os.path.join(self.image_dir, category, img_file)).convert('RGB')
    return img, label

class TransformedDataset(Dataset):
  def __init__(self, subset, transform=None):
    self.subset = subset
    if not transform:
      self.transform = transforms.ToTensor()
    else:
      self.transform = transform

  def __len__(self):
    return len(self.subset)

  def __getitem__(self, idx):
    img, label = self.subset[idx]
    return self.transform(img), label

Writing dataset_loading.py


#### Train model:

ResNet50:

In [2]:
%%writefile model.py
import torch
import torch.nn as nn
import torchvision

class FashionModel(nn.Module):
  def __init__(self, num_classes):
    super().__init__()
    self.num_classes = num_classes
    self.backbone = torchvision.models.resnet50(torchvision.models.ResNet50_Weights.DEFAULT)

    last_num_features = self.backbone.fc.in_features
    self.backbone.fc = nn.Identity()
    self.fc = nn.Linear(in_features=last_num_features, out_features=num_classes) # replace last fc layer

  def forward(self, x):
    x = self.backbone(x)
    return self.fc(x)

Writing model.py


Training:

In [3]:
%%writefile train.py
import torch
from accelerate import Accelerator, notebook_launcher
import torch.multiprocessing as mp
from tqdm import tqdm
from torch.utils.data import DataLoader, random_split
import torchvision.transforms as transforms
from dataset_loading import FashionDataset, TransformedDataset
from model import FashionModel
import matplotlib.pyplot as plt
import seaborn as sns

def data_prepare():
    # create dataset
    ## data transformations
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    valtest_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    img_dir = '/kaggle/input/fashion/filtered_cropped_images'
    dataset = FashionDataset(image_dir=img_dir)
    train_set, val_set, test_set = random_split(
        dataset,
        lengths=[0.6, 0.2, 0.2],
        generator=torch.Generator().manual_seed(42)
    )
    ## apply transformation
    train_set = TransformedDataset(train_set, transform=train_transform)
    val_set = TransformedDataset(val_set, transform=valtest_transform)
    test_set = TransformedDataset(test_set, transform=valtest_transform)
    
    # create data loader
    train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=32, shuffle=False)
    test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

    return train_loader, val_loader, test_loader, dataset.num_classes

# train loop
def train_model(train_loader, val_loader, test_loader, model, optimizer, scheduler):        
    accelerator = Accelerator()
    train_loader, val_loader, model, optimizer, scheduler = accelerator.prepare(
        train_loader, val_loader, model, optimizer, scheduler
    )
    
    num_epochs = 100
    best_acc = 0.0
    patience = 10
    patience_counter = 0
    train_losses = [] # save train losses for visualization
    val_losses = [] # save validation loss for visualization
    train_accuracies = [] # save train accuracy for visualization
    val_accuracies = [] # save validation accuracy for visualization
    for epoch in range(num_epochs):
      model.train()
      train_loss, correct, total = 0, 0, 0
      loop = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}')
    
      for images, labels in loop:
        images = images.to(device)
        labels = labels.to(device)
    
        # training phase
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        accelerator.backward(loss)
        optimizer.step()

        all_gpu_loss = accelerator.gather(loss).mean().item()
        train_loss += all_gpu_loss
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
    
        # update visualization
        loop.set_postfix(loss=all_gpu_loss, acc=correct / total)
    
      scheduler.step()  
      train_acc = correct / total
      train_accuracies.append(train_acc)
      train_losses.append(train_loss / len(train_loader))
    
      # evaluation phase
      model.eval()
      val_loss, correct, total = 0, 0, 0
      with torch.no_grad():
        for images, labels in val_loader:
          images, labels = images.to(device), labels.to(device)
          outputs = model(images)
          loss = criterion(outputs, labels)
          val_loss += loss.item()
          _, predicted = torch.max(outputs, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum()
    
      val_losses.append(val_loss / len(val_loader))
      val_acc = correct / total
      val_accuracies.append(val_acc)
      print(f"Epoch {epoch+1}: Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")
    
      # save checkpoint
      if val_acc > best_acc:
        best_acc = val_acc
        
        unwrapped_model = accelerator.unwrap_model(model)
        # Lưu lại state_dict
        torch.save(unwrapped_model.state_dict(), "/kaggle/working/best.pth")
        patience_counter = 0
      else:
        patience_counter += 1
        if patience_counter >= patience:
          print(f'Early stopping at epoch {epoch+1}.')
          break
    else:
      print(f'Finished {num_epochs} epochs.')
      unwrapped_model = accelerator.unwrap_model(model)
      # Lưu lại state_dict
      torch.save(unwrapped_model.state_dict(), "/kaggle/working/last.pth")

    return train_losses, val_losses, train_accuracies, val_accuracies

def visualize(train_losses, val_losses, train_accuracies, val_accuracies):
    # train/val loss visualize
    plt.figure(figsize=(20, 6))
    sns.set_style("whitegrid")  # Thêm nền grid nhẹ
    
    # Vẽ đường train loss
    sns.lineplot(x=range(1, len(train_losses)+1), y=train_losses, marker='o', linestyle='-', color='red', label="Train Loss")
    
    # Vẽ đường val loss
    sns.lineplot(x=range(1, len(val_losses)+1), y=val_losses, marker='s', linestyle='--', color='blue', label="Validation Loss")
    
    # Tùy chỉnh biểu đồ
    plt.title("Train vs Validation Loss", fontsize=14, fontweight="bold")
    plt.xlabel("Epoch", fontsize=12)
    plt.ylabel("Loss", fontsize=12)
    plt.xticks(range(1, 41))
    plt.legend(fontsize=12)
    plt.grid(True, linestyle="--", alpha=0.6)
    plt.savefig('/kaggle/working/train_val_losses.png')

    
    # train/val accuracies visualize
    plt.figure(figsize=(20, 6))
    sns.set_style("whitegrid")  # Thêm nền grid nhẹ
    
    # Vẽ đường train loss
    sns.lineplot(x=range(1, len(train_accuracies)+1), y=train_accuracies, marker='o', linestyle='-', color='red', label="Train accuracy")
    
    # Vẽ đường val loss
    sns.lineplot(x=range(1, len(val_accuracies)+1), y=val_accuracies, marker='s', linestyle='--', color='blue', label="Validation accuracy")
    
    # Tùy chỉnh biểu đồ
    plt.title("Train vs Validation Accuracy", fontsize=14, fontweight="bold")
    plt.xlabel("Epoch", fontsize=12)
    plt.ylabel("Accuracy", fontsize=12)
    plt.xticks(range(1, 41))
    plt.legend(fontsize=12)
    plt.grid(True, linestyle="--", alpha=0.6)
    plt.savefig('/kaggle/working/train_val_accuracies.png')

if __name__ == '__main__':
    mp.set_start_method("spawn", force=True)

    # prepare data loader
    train_loader, val_loader, test_loader, num_classes = data_prepare()

    # create model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = FashionModel(num_classes=num_classes).to(device)
    
    # loss function and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)
    
    train_losses, val_losses, train_accuracies, val_accuracies = train_model(train_loader, val_loader, test_loader, model, optimizer, scheduler)
    
    visualize(train_losses, val_losses, train_accuracies, val_accuracies)

Writing train.py


In [4]:
!ls /kaggle/working

dataset_loading.py  model.py  __notebook__.ipynb  train.py


## 2. Run this to start train model:

In [5]:
!accelerate launch train.py

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████████████████████████████████| 97.8M/97.8M [00:01<00:00, 88.3MB/s]
100%|██████████████████████████████████████| 97.8M/97.8M [00:01<00:00, 86.4MB/s]
Epoch 1/100: 100%|█| 206/206 [02:04<00:00,  1.65it/s, acc=tensor(0.5349, device=
Epoch 1/100: 100%|█| 206/206 [02:04<00:00,  1.65it/s, acc=tensor(0.5616, device=
Epoch 1: Train Acc: 0.53% | Val Acc: 0.67%
Epoch 2/100:   0%|                                      | 0/206 [00:00<?, ?it/s]Epoch 1: Train Acc: 0.56% | Val Acc: 0.71%
Epoch 2/100: 100%|█| 206/206 [01:39<00:00,  2.08it/s, acc=tensor(0.7676, device=
Epoch 2/100: 100%|█| 206/206 [01:39<00:00,  2.07it/s, acc=tensor(0.7570, device=
Epoch 2: Train Acc: 0.76% | Val Acc: 0.73%
Epoch 3/100:   0%|          

## 3. Evaluate:

In [6]:
from train import data_prepare
from model import FashionModel
import torch

train_loader, val_loader, test_loader, num_classes = data_prepare()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = FashionModel(num_classes)
model = model.to(device)
model.load_state_dict(
    torch.load('/kaggle/working/best.pth', map_location=device)
)

with torch.no_grad():
    correct, total = 0, 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
    print(f'Test accuracy: {correct / total}')

  torch.load('/kaggle/working/best.pth', map_location=device)


Test accuracy: 0.7807498574256897
