In [5]:
# =========================================================
# GTSRB Traffic Sign Classification - Full Notebook
# =========================================================

# C√†i ƒë·∫∑t th∆∞ vi·ªán c·∫ßn thi·∫øt
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# =========================================================
# 1. Config + Menu
# =========================================================
class Config:
    DATA_DIR = r"C:\Users\luuph\Downloads\Nh·∫≠p m√¥n h·ªçc m√°y\Nhom_14-CNTT1708"
    BATCH_SIZE = 64
    EPOCHS = 10
    LR = 0.001
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    CHECKPOINT = "checkpoint.pth"
    SUBMISSION_FILE = "submission.csv"

def show_menu():
    print("="*50)
    print("üö¶")
    print("="*50)
    print("1. Hu·∫•n luy·ªán l·∫°i m√¥ h√¨nh")
    print("2. Ti·∫øp t·ª•c hu·∫•n luy·ªán t·ª´ checkpoint")
    print("3. D·ª± ƒëo√°n v√† t·∫°o file submission")
    print("="*50)

# =========================================================
# 2. Dataset
# =========================================================
class GTSRBDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.has_labels = "label" in self.data.columns

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.data.iloc[idx, 0])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        if self.has_labels:
            label = int(self.data.iloc[idx, 1])
            return image, label
        return image, self.data.iloc[idx, 0]

transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# =========================================================
# 3. M√¥ h√¨nh CNN
# =========================================================
class CNNModel(nn.Module):
    def __init__(self, num_classes=43):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# =========================================================
# 4. H√†m train + save checkpoint
# =========================================================
def train_model(model, train_loader, val_loader, optimizer, criterion, epochs, device, resume=False):
    best_acc = 0
    start_epoch = 0

    # N·∫øu resume th√¨ load checkpoint
    if resume and os.path.exists(Config.CHECKPOINT):
        checkpoint = torch.load(Config.CHECKPOINT, map_location=device)
        model.load_state_dict(checkpoint["model_state"])
        optimizer.load_state_dict(checkpoint["optim_state"])
        start_epoch = checkpoint["epoch"] + 1
        best_acc = checkpoint["best_acc"]
        print(f"üîÑ Resume t·ª´ epoch {start_epoch} (best_acc={best_acc:.4f})")

    for epoch in range(start_epoch, epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        val_acc = evaluate_model(model, val_loader, device)
        print(f"üìä Epoch {epoch+1}: Loss={total_loss/len(train_loader):.4f}, Train_Acc={train_acc:.4f}, Val_Acc={val_acc:.4f}")

        # L∆∞u checkpoint
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save({
                "epoch": epoch,
                "model_state": model.state_dict(),
                "optim_state": optimizer.state_dict(),
                "best_acc": best_acc
            }, Config.CHECKPOINT)
            print(f"üíæ Checkpoint saved (Best Acc={best_acc:.4f})")

def evaluate_model(model, val_loader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)
    return correct / total

# =========================================================
# 5. H√†m d·ª± ƒëo√°n + submission
# =========================================================
def predict_and_submit(model, test_loader, device):
    model.eval()
    results = []
    with torch.no_grad():
        for images, ids in tqdm(test_loader, desc="Predicting"):
            images = images.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            for id, pred in zip(ids, predicted.cpu().numpy()):
                results.append([id, pred])
    df = pd.DataFrame(results, columns=["id", "label"])

    # Lu√¥n l∆∞u v√†o folder DATA_DIR
    output_path = os.path.join(Config.DATA_DIR, Config.SUBMISSION_FILE)
    df.to_csv(output_path, index=False)
    print(f"‚úÖ Submission saved: {output_path}")


# =========================================================
# 6. Ch·∫°y menu ch√≠nh
# =========================================================
if __name__ == "__main__":
    show_menu()
    choice = input("üëâ Ch·ªçn ch·ª©c nƒÉng (1/2/3): ")

    # Load d·ªØ li·ªáu


    train_csv = os.path.join(Config.DATA_DIR, "train.csv")
    test_csv = os.path.join(Config.DATA_DIR, "test.csv")

    train_data = pd.read_csv(train_csv)
    train_split, val_split = train_test_split(train_data, test_size=0.2, stratify=train_data['label'], random_state=42)

# L∆∞u t·∫°m CSV nh·ªè ƒë·ªÉ DataLoader d√πng
    train_split.to_csv(os.path.join(Config.DATA_DIR, "train_split.csv"), index=False)
    val_split.to_csv(os.path.join(Config.DATA_DIR, "val_split.csv"), index=False)

    train_dataset = GTSRBDataset(os.path.join(Config.DATA_DIR, "train_split.csv"), Config.DATA_DIR, transform)
    val_dataset   = GTSRBDataset(os.path.join(Config.DATA_DIR, "val_split.csv"),   Config.DATA_DIR, transform)

    model = CNNModel().to(Config.DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=Config.LR)
    criterion = nn.CrossEntropyLoss()

    if choice == "1":
        train_model(model, train_loader, val_loader, optimizer, criterion, Config.EPOCHS, Config.DEVICE, resume=False)
    elif choice == "2":
        train_model(model, train_loader, val_loader, optimizer, criterion, Config.EPOCHS, Config.DEVICE, resume=True)
    elif choice == "3":
        if os.path.exists(Config.CHECKPOINT):
            checkpoint = torch.load(Config.CHECKPOINT, map_location=Config.DEVICE)
            model.load_state_dict(checkpoint["model_state"])
            print("‚úÖ Loaded checkpoint for prediction.")
        predict_and_submit(model, test_loader, Config.DEVICE)
    else:
        print("‚ùå L·ª±a ch·ªçn kh√¥ng h·ª£p l·ªá!")


üö¶
1. Hu·∫•n luy·ªán l·∫°i m√¥ h√¨nh
2. Ti·∫øp t·ª•c hu·∫•n luy·ªán t·ª´ checkpoint
3. D·ª± ƒëo√°n v√† t·∫°o file submission


üëâ Ch·ªçn ch·ª©c nƒÉng (1/2/3):  3


  checkpoint = torch.load(Config.CHECKPOINT, map_location=Config.DEVICE)


‚úÖ Loaded checkpoint for prediction.


Predicting: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 123/123 [00:08<00:00, 14.53it/s]

‚úÖ Submission saved: C:\Users\luuph\Downloads\Nh·∫≠p m√¥n h·ªçc m√°y\Nhom_14-CNTT1708\submission.csv



