In [60]:
from IPython.display import display
import os

# 上傳檔案（會跳出檔案選擇器）
from ipywidgets import FileUpload

upload = FileUpload()
display(upload)

FileUpload(value={}, description='Upload')

In [61]:
import os
from pathlib import Path

# 假設你只上傳了一個檔案
# Check if any file is uploaded and get the filename
if upload.value:
    # Get the first (and likely only) filename from the dictionary keys
    filename = list(upload.value.keys())[0]
    fileinfo = upload.value[filename] # Access the file info dictionary using the filename as key
else:
    print("No file uploaded.")
    # Handle the case where no file is uploaded, perhaps by exiting or prompting the user.
    # For this example, we'll assume a file was uploaded as per the traceback context.
    raise FileNotFoundError("No file was uploaded.")


# 顯示內容結構（除錯用）
print(fileinfo)

# 儲存 kaggle.json
# filename is already obtained above
content = fileinfo['content']

kaggle_dir = Path.home() / ".kaggle"
kaggle_dir.mkdir(exist_ok=True)

kaggle_json_path = kaggle_dir / "kaggle.json"
with open(kaggle_json_path, "wb") as f:
    f.write(content)

# 設定權限（Linux/macOS 建議）
os.chmod(kaggle_json_path, 0o600)

print(f"{filename} 已成功儲存至 {kaggle_json_path}")

{'metadata': {'name': 'kaggle.json', 'type': 'application/json', 'size': 64, 'lastModified': 1745215961986}, 'content': b'{"username":"suchiwen","key":"01a925cee9e9e9d232008524b0434fb9"}'}
kaggle.json 已成功儲存至 /root/.kaggle/kaggle.json


In [62]:
!kaggle datasets list -s cifar


ref                                                     title                                                  size  lastUpdated                 downloadCount  voteCount  usabilityRating  
------------------------------------------------------  ----------------------------------------------  -----------  --------------------------  -------------  ---------  ---------------  
fedesoriano/cifar100                                    CIFAR-100 Python                                  168517809  2020-12-26 08:37:10.143000          12302        176  1.0              
pankrzysiu/cifar10-python                               CIFAR-10 Python                                   340613496  2018-01-27 13:42:40.967000          14829        252  0.75             
petitbonney/cifar10-image-recognition                   CIFAR-10                                         1007971063  2019-10-01 12:50:23.227000           2890         27  0.8235294        
valentynsichkar/cifar10-preprocessed                   

In [63]:
!pip install -U kaggle
!pip install --upgrade pandas
import os
import zipfile

# 建立 Kaggle 資料夾
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# 下載 Dog Breed Identification 資料集
!kaggle competitions download -c dog-breed-identification --force
!unzip -oq dog-breed-identification.zip -d dog-breed-identification


import pandas as pd
import numpy as np

labels = pd.read_csv('dog-breed-identification/labels.csv')


cp: cannot stat 'kaggle.json': No such file or directory
Downloading dog-breed-identification.zip to /content
 97% 667M/691M [00:11<00:00, 101MB/s] 
100% 691M/691M [00:11<00:00, 64.7MB/s]


In [77]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split, WeightedRandomSampler
from torchvision import transforms, models
from torchvision.datasets import ImageFolder
from torch.cuda.amp import GradScaler, autocast
from collections import Counter
import numpy as np

# ---------- 參數 ----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
num_epochs = 25
lr = 1e-4
num_workers = 2
val_ratio = 0.2
gamma = 2.0  # Focal Loss gamma

# ---------- 資料增強 ----------
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(0.1, 0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])


In [78]:
# ---------- Dataset 與切分 ----------
dataset = ImageFolder("dog_images/train", transform=train_transform)
val_dataset = ImageFolder("dog_images/train", transform=val_transform)

val_size = int(len(dataset) * val_ratio)
train_size = len(dataset) - val_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
val_dataset.dataset.transform = val_transform  # 替換 val transform


In [68]:
# 計算訓練集標籤權重
train_indices = train_dataset.indices
train_labels = [dataset.targets[i] for i in train_indices]
label_counts = Counter(train_labels)

In [79]:
# ---------- 建立 Weighted Sampler ----------
train_indices = train_dataset.indices
train_labels = [dataset.targets[i] for i in train_indices]
label_counts = Counter(train_labels)
class_sample_counts = [label_counts[i] for i in range(len(label_counts))]
sample_weights = [1.0 / class_sample_counts[label] for label in train_labels]
sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)


In [80]:
# ---------- Dataloader ----------
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler, num_workers=num_workers)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)


In [81]:
# ---------- Focal Loss ----------
class FocalLoss(nn.Module):
    def __init__(self, gamma=2.0):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.ce = nn.CrossEntropyLoss()

    def forward(self, input, target):
        logpt = -self.ce(input, target)
        pt = torch.exp(logpt)
        focal_loss = -((1 - pt) ** self.gamma) * logpt
        return focal_loss.mean()

criterion = FocalLoss(gamma=gamma)

In [82]:
# ---------- 建立模型 ----------
model = models.vgg16(pretrained=True)
for param in model.features.parameters():
    param.requires_grad = False
model.classifier[6] = nn.Linear(model.classifier[6].in_features, len(label_counts))
model = model.to(device)

optimizer = optim.AdamW(model.parameters(), lr=lr)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
scaler = GradScaler()

  scaler = GradScaler()


In [83]:
# ---------- 訓練 ----------
for epoch in range(num_epochs):
    model.train()
    running_loss = 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            outputs = model(imgs)
            loss = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item()

    avg_train_loss = running_loss / len(train_loader)

    # ---------- 驗證 ----------
    model.eval()
    val_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    avg_val_loss = val_loss / len(val_loader)
    val_acc = correct / total
    scheduler.step()

    print(f"[Epoch {epoch+1}] Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}")


  with autocast():


[Epoch 1] Train Loss: 1.1280 | Val Loss: 0.3360 | Val Acc: 0.7211
[Epoch 2] Train Loss: 0.1316 | Val Loss: 0.2580 | Val Acc: 0.7613
[Epoch 3] Train Loss: 0.0407 | Val Loss: 0.2363 | Val Acc: 0.7725
[Epoch 4] Train Loss: 0.0162 | Val Loss: 0.2146 | Val Acc: 0.7823
[Epoch 5] Train Loss: 0.0071 | Val Loss: 0.2164 | Val Acc: 0.7789
[Epoch 6] Train Loss: 0.0043 | Val Loss: 0.2100 | Val Acc: 0.7916
[Epoch 7] Train Loss: 0.0017 | Val Loss: 0.2113 | Val Acc: 0.7882
[Epoch 8] Train Loss: 0.0014 | Val Loss: 0.2056 | Val Acc: 0.7896
[Epoch 9] Train Loss: 0.0009 | Val Loss: 0.2074 | Val Acc: 0.7862
[Epoch 10] Train Loss: 0.0008 | Val Loss: 0.2050 | Val Acc: 0.7896
[Epoch 11] Train Loss: 0.0005 | Val Loss: 0.2093 | Val Acc: 0.7872
[Epoch 12] Train Loss: 0.0003 | Val Loss: 0.2074 | Val Acc: 0.7886
[Epoch 13] Train Loss: 0.0005 | Val Loss: 0.2056 | Val Acc: 0.7867
[Epoch 14] Train Loss: 0.0003 | Val Loss: 0.2060 | Val Acc: 0.7882
[Epoch 15] Train Loss: 0.0003 | Val Loss: 0.2076 | Val Acc: 0.7896
[Epo

In [76]:
# 查看類別分布
val_labels = [dataset.targets[i] for i in val_dataset.indices]
print("Train label distribution:", Counter(train_labels))
print("Val label distribution:", Counter(val_labels))


Train label distribution: Counter({100: 99, 97: 99, 42: 97, 1: 95, 73: 92, 94: 92, 7: 92, 26: 89, 87: 89, 11: 89, 117: 86, 75: 85, 52: 84, 88: 83, 68: 82, 6: 80, 13: 80, 10: 80, 3: 80, 9: 80, 69: 80, 101: 79, 78: 79, 109: 79, 61: 78, 82: 77, 86: 77, 59: 77, 30: 77, 70: 75, 80: 75, 55: 75, 84: 75, 40: 73, 93: 73, 17: 72, 56: 72, 114: 72, 60: 72, 19: 72, 35: 72, 92: 71, 98: 71, 63: 71, 39: 71, 15: 71, 119: 69, 2: 69, 14: 68, 71: 68, 33: 68, 8: 68, 90: 67, 96: 67, 74: 66, 102: 66, 110: 66, 105: 66, 28: 65, 116: 65, 64: 65, 18: 65, 76: 64, 58: 64, 67: 64, 79: 64, 62: 64, 107: 64, 118: 64, 57: 63, 5: 63, 37: 63, 106: 63, 20: 63, 115: 63, 31: 63, 50: 62, 0: 62, 85: 62, 38: 61, 77: 61, 104: 61, 111: 61, 95: 61, 12: 61, 41: 61, 21: 61, 32: 61, 54: 60, 4: 60, 36: 60, 53: 60, 47: 60, 108: 59, 51: 59, 49: 58, 34: 58, 81: 58, 44: 58, 25: 57, 65: 57, 16: 57, 29: 57, 113: 57, 103: 57, 22: 56, 99: 55, 66: 55, 89: 55, 91: 55, 24: 55, 72: 54, 48: 54, 83: 54, 27: 54, 112: 53, 45: 53, 46: 52, 23: 50, 43:

In [89]:
import os
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import pandas as pd # Import pandas

# ---------- Data Transformation for Test Set ----------
# Using the same transformation as the validation set
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std), # Assuming mean and std are defined
])

# ---------- Create Test Dataset and DataLoader ----------
# Assuming your test images are in 'dog-breed-identification/test'
# We need a custom dataset to get the image filenames
class TestImageFolder(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.image_files = sorted([os.path.join(root, f) for f in os.listdir(root) if f.endswith('.jpg')])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        # Return the filename without the directory path and the image tensor
        return os.path.basename(img_path), image

test_dataset = TestImageFolder("dog-breed-identification/test", transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) # Assuming batch_size and num_workers are defined

# ---------- 預測 ----------
model.eval()
predictions = []

with torch.no_grad():
    # The test_loader now yields image names and image tensors
    for image_names, images in test_loader:
        images = images.to(device)
        outputs = model(images)
        preds = outputs.argmax(dim=1).cpu().numpy()
        # image_names is a list of filenames (strings)
        for name, pred in zip(image_names, preds):
            predictions.append((name, pred))

# ---------- 產生 submission.csv ----------
# 類別 index 對應類別名稱（以 train dataset 的 class_to_idx 為準）
# Make sure 'dataset' is accessible and has the class_to_idx attribute
idx_to_class = {v: k for k, v in dataset.class_to_idx.items()}  # 反轉
submission = pd.DataFrame(predictions, columns=["id", "label"])
submission["label"] = submission["label"].map(idx_to_class)
submission["id"] = submission["id"].str.replace(".jpg", "", regex=False)
submission.to_csv("submission.csv", index=False)
print("✅ 已產出 submission.csv！")

✅ 已產出 submission.csv！
