# Phan Khánh Toàn - HE170983

In [1]:
import os
import copy
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, models

In [2]:
class ButterflyDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None, label_encoder=None):
        """
        csv_file: đường dẫn file CSV (Training_set.csv)
        root_dir: thư mục chứa ảnh train (butterfl/train/train)
        transform: phép biến đổi (transforms) cho ảnh
        label_encoder: dict ánh xạ label_string -> label_index
        """
        self.df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.label_encoder = label_encoder
        
        # Nếu chưa có label_encoder, ta sẽ tự tạo (áp dụng cho train dataset)
        if self.label_encoder is None:
            unique_labels = self.df['label'].unique().tolist()
            self.label_encoder = {lab: i for i, lab in enumerate(unique_labels)}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_name = row['filename']
        label_str = row['label']
        
        # Đường dẫn đầy đủ tới ảnh
        img_path = os.path.join(self.root_dir, img_name)
        
        # Mở ảnh
        image = Image.open(img_path).convert("RGB")
        
        # Gán nhãn (dạng index)
        label = self.label_encoder[label_str]
        
        # Áp dụng transform
        if self.transform:
            image = self.transform(image)
        
        return image, label


In [3]:
csv_path = "/kaggle/input/new-dataset-butterflies/Training_set.csv"
train_img_dir = "/kaggle/input/new-dataset-butterflies/train"

# Bước 1: Tạo dataset tạm để lấy label_encoder
temp_dataset = ButterflyDataset(csv_file=csv_path, root_dir=train_img_dir)

# Lấy label_encoder ra, để dùng nhất quán cho train/val
label_encoder = temp_dataset.label_encoder
num_classes = len(label_encoder)
print("Số lớp (num_classes):", num_classes)

# Bước 2: Tạo dataset chính thức (có label_encoder)
# Ta dùng chung dataset, sau đó split ra train_dataset và val_dataset
full_dataset = ButterflyDataset(csv_file=csv_path, 
                               root_dir=train_img_dir,
                               transform=None, 
                               label_encoder=label_encoder)

# -----------------------------------------
#  CHIA TRAIN/VAL THEO TỶ LỆ (VD: 80% / 20%)
# -----------------------------------------
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

print(f"Tổng số mẫu: {len(full_dataset)}")
print(f"Train size: {len(train_dataset)} | Val size: {len(val_dataset)}")

Số lớp (num_classes): 75
Tổng số mẫu: 6499
Train size: 5199 | Val size: 1300


In [5]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std =[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std =[0.229, 0.224, 0.225])
])

# Gán transform tương ứng
train_dataset.dataset.transform = train_transform  # dataset gốc nằm trong .dataset
val_dataset.dataset.transform   = val_transform

# -----------------------------------------
#  DATALOADER
# -----------------------------------------
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,  num_workers=2)
val_loader   = DataLoader(val_dataset,   batch_size=32, shuffle=False, num_workers=2)


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cuda


In [8]:
# -----------------------------------------
#  KHỞI TẠO RESNET18 PRETRAINED
# -----------------------------------------
model = models.resnet18(pretrained=True)
# Số đầu vào của FC cuối
in_features = model.fc.in_features
# Thay FC cuối bằng lớp Linear có num_classes đầu ra
model.fc = nn.Linear(in_features, num_classes)

model = model.to(device)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 175MB/s]


In [9]:
# Loss và Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# -----------------------------------------
#  TRAIN VỚI EARLY STOPPING
# -----------------------------------------
best_acc = 0.0
best_model_wts = copy.deepcopy(model.state_dict())

patience = 5
trigger_times = 0

num_epochs = 50
for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    print("-" * 30)

    for phase in ['train', 'val']:
        if phase == 'train':
            model.train()
            dataloader = train_loader
        else:
            model.eval()
            dataloader = val_loader

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(dataloader.dataset)
        epoch_acc  = running_corrects.double() / len(dataloader.dataset)

        print(f"{phase} Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}")

        # Check early stopping trên val
        if phase == 'val':
            if epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                trigger_times = 0
            else:
                trigger_times += 1
                print(f"-> EarlyStopping counter: {trigger_times} / {patience}")
                if trigger_times >= patience:
                    print("-> Early stopping!")
                    model.load_state_dict(best_model_wts)
                    break

    if trigger_times >= patience:
        break

print(f"\nHoàn tất training. Best val Acc: {best_acc:.4f}")
model.load_state_dict(best_model_wts)


Epoch 1/50
------------------------------
train Loss: 1.9847 | Acc: 0.6284
val Loss: 0.7170 | Acc: 0.8769

Epoch 2/50
------------------------------
train Loss: 0.4301 | Acc: 0.9331
val Loss: 0.4046 | Acc: 0.9223

Epoch 3/50
------------------------------
train Loss: 0.1623 | Acc: 0.9823
val Loss: 0.3730 | Acc: 0.9115
-> EarlyStopping counter: 1 / 5

Epoch 4/50
------------------------------
train Loss: 0.0730 | Acc: 0.9931
val Loss: 0.3202 | Acc: 0.9277

Epoch 5/50
------------------------------
train Loss: 0.0314 | Acc: 0.9990
val Loss: 0.3039 | Acc: 0.9285

Epoch 6/50
------------------------------
train Loss: 0.0195 | Acc: 0.9996
val Loss: 0.3070 | Acc: 0.9277
-> EarlyStopping counter: 1 / 5

Epoch 7/50
------------------------------
train Loss: 0.0165 | Acc: 0.9990
val Loss: 0.2930 | Acc: 0.9338

Epoch 8/50
------------------------------
train Loss: 0.0117 | Acc: 0.9996
val Loss: 0.2808 | Acc: 0.9362

Epoch 9/50
------------------------------
train Loss: 0.0086 | Acc: 0.9996
val 

<All keys matched successfully>

In [10]:
#  LƯU MODEL (NẾU MUỐN)
# -----------------------------------------
torch.save(model.state_dict(), "best_resnet18.pth")
print("Model đã lưu vào best_resnet18.pth")

Model đã lưu vào best_resnet18.pth


In [11]:
#  PREDICT TRÊN TẬP TEST & TẠO SUBMISSION
# -----------------------------------------
# 1) Tạo Dataset cho test (không có nhãn)
#    Ta chỉ cần cột 'filename' và đường dẫn ảnh.
class ButterflyTestDataset(Dataset):
    def __init__(self, test_dir, transform=None):
        """
        test_dir: thư mục 'butterfl/test/test' chứa các ảnh test
        transform: phép biến đổi ảnh
        """
        self.test_dir = test_dir
        self.filenames = os.listdir(test_dir)  # danh sách file ảnh
        self.transform = transform

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_name = self.filenames[idx]
        img_path = os.path.join(self.test_dir, img_name)

        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, img_name  # trả về cả filename để ghi kết quả

test_dir = "/kaggle/input/butterfl/test/test"
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

test_dataset = ButterflyTestDataset(test_dir=test_dir, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# 2) Dự đoán
model.eval()
predictions = []
filenames_list = []

# Đảo ngược label_encoder: index -> label_str
inv_label_encoder = {v: k for k, v in label_encoder.items()}

with torch.no_grad():
    for inputs, fnames in test_loader:
        inputs = inputs.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        # Chuyển index -> tên class
        for i in range(len(preds)):
            pred_label_idx = preds[i].item()
            pred_label_str = inv_label_encoder[pred_label_idx]

            filename = fnames[i]
            filenames_list.append(filename)
            predictions.append(pred_label_str)

# 3) Tạo file submission
submission_df = pd.DataFrame({
    'ID': filenames_list,
    'label': predictions
})

submission_df.to_csv("submission.csv", index=False)
print("Đã tạo file submission.csv")


Đã tạo file submission.csv
