# 這一段可以選擇是否執行

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!nvidia-smi

In [None]:

!wget -O food11.zip https://www.dropbox.com/s/up5q1gthsz3v0dq/food-11.zip?dl=0

In [None]:
! unzip food11.zip

In [None]:
_exp_name = "sample"

# import package

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image, UnidentifiedImageError
import torchvision.models as models
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import time
import os

In [None]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

# transforms

In [None]:
test_tfm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_tfm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.8, 1.2)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),
    transforms.TrivialAugmentWide(),  
    # transforms.RandomRotation(15),
    # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1), 
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

])

# model

In [None]:
class Classifier(nn.Module):
    def __init__(self, num_classes=11):
        super(Classifier, self).__init__()

        
        self.resnet = models.resnet50(pretrained=True)

        
        for param in list(self.resnet.parameters())[-6:]:
            param.requires_grad = True

        in_features = self.resnet.fc.in_features
        '''
        self.resnet.fc = nn.Sequential(
            nn.Dropout(0.4),  # Dropout保持原樣0.5→→→改0.4
            nn.BatchNorm1d(in_features),  # 添加 BatchNorm
            nn.Linear(in_features, num_classes)
        )
        '''
        self.resnet.fc = nn.Sequential(
            nn.Dropout(0.4),                # ← ① 第一個 Dropout（輸入層前）
            nn.BatchNorm1d(in_features),   # ← ② 第一個 BN（對原特徵進行標準化）
            nn.Linear(in_features, 512),   # ← 線性變換
            nn.ReLU(),                     # ← 非線性激活
            nn.BatchNorm1d(512),           # ← ③ 第二個 BN（對隱藏層標準化）
            nn.Dropout(0.3),               # ← ④ 第二個 Dropout（隱藏層後防過擬合）
            nn.Linear(512, num_classes)    # ← 輸出層
        )

    def forward(self, x):
        x = self.resnet(x)
        return x


# configuration

In [None]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize a model, and put it on the device specified.
model = Classifier(num_classes=11).to(device)

# The number of batch size.
batch_size = 64

# The number of training epochs.
n_epochs = 36  
patience = 7  # Early stopping patience

learning_rate = 3e-5  # 进一步降低学习率1e-3→→→5e-4→→→1e-4→→→5e-5→→→3e-5（配合adamw）

# 添加更强的权重衰减（L2正则化）
weight_decay = 3e-3  # 增加权重衰减系数1e-3→→→3e-3→→→5e-3（配合adamw）
# Optimizer and Scheduler
optimizer = torch.optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),  # 只优化可训练参数
    lr=learning_rate,
    weight_decay=weight_decay  # 应用L2正则化
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=1e-6)
criterion = torch.nn.CrossEntropyLoss(label_smoothing=0.1)

# 自動略過壞圖 dataloader

In [None]:
# for kaggle
class FoodDataset(Dataset):
    def __init__(self, path, tfm=None, files=None):
        super(FoodDataset, self).__init__()
        self.path = path
        self.transform = tfm

        # === ✅ 更改：考慮大小寫副檔名 .JPG / .jpeg 等
        all_files = sorted([
            os.path.join(path, x)
            for x in os.listdir(path)
            if x.lower().endswith((".jpg", ".jpeg", ".png"))
        ])
        if files is not None:
            all_files = files

        # === ✅ 新增：過濾掉無法正常開啟的圖片
        self.files = []
        for f in all_files:
            try:
                with Image.open(f) as img:
                    img.verify()  # 驗證圖像
                self.files.append(f)
            except Exception as e:
                print(f"⚠️ 略過壞圖: {f} ({e})")

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fname = self.files[idx]

        # === ✅ 新增：錯誤保護，防止 Image.open() 崩潰（Colab有時I/O異常）
        try:
            im = Image.open(fname).convert("RGB")
        except Exception as e:
            print(f"⚠️ 無法開啟圖像 {fname}: {e}，使用黑圖代替")
            im = Image.new("RGB", (224, 224), color=(0, 0, 0))  # 回傳黑圖，避免崩潰

        if self.transform:
            im = self.transform(im)

        # 讀取 label
        try:
            filename = fname.replace("\\", "/").split("/")[-1]
            label = int(filename.split("_")[0])
        except:
            label = -1  # 用於 test/test_no_label 時

        return im, label




train_set = FoodDataset("./train", tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

valid_set = FoodDataset("./valid", tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

test_set = FoodDataset("./test", tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)


# start training

In [None]:
'''
try:
    model.load_state_dict(torch.load("/kaggle/input/r50ckpt/best_model.ckpt"))
    print("成功載入 best_model.ckpt 權重，從中斷處繼續訓練。")
    # ... (註解部分可自行決定是否依需求處理 best_acc)
except FileNotFoundError:
    print("best_model.ckpt 檔案不存在，將從頭開始訓練。")
except Exception as e:
    print(f"載入權重時發生錯誤: {e}，將從頭開始訓練。")
'''
# 创建记录训练过程的列表
# 创建记录训练过程的列表
train_losses = []
train_accs = []
valid_losses = []
valid_accs = []

stale = 0
best_acc = 0
for epoch in range(n_epochs):
    # 训练模式
    model.train()
    train_loss = []
    train_accs_epoch = []
    for batch in tqdm(train_loader):
        imgs, labels = batch
        imgs = imgs.to(device)
        labels = labels.to(device)

# === 新增開始：Mixup 實作 ===
        # Mixup 超參數
        mixup_alpha = 0.4  # 可調整
        lam = np.random.beta(mixup_alpha, mixup_alpha)
        index = torch.randperm(imgs.size(0)).to(device)

        mixed_imgs = lam * imgs + (1 - lam) * imgs[index]
        labels_a, labels_b = labels, labels[index]
# === 新增結束 ===

        #因mixup修改logits以及loss
        #logits = model(imgs)
        #loss = criterion(logits, labels)
# === 修改：forward 與 loss 計算 ===
        logits = model(mixed_imgs)
        loss = lam * criterion(logits, labels_a) + (1 - lam) * criterion(logits, labels_b)
# === 修改結束 ===

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        acc = (logits.argmax(dim=-1) == labels).float().mean()
        train_loss.append(loss.item())
        train_accs_epoch.append(acc.item())

    train_loss_avg = sum(train_loss) / len(train_loss)
    train_acc_avg = sum(train_accs_epoch) / len(train_accs_epoch)

    # 验证模式
    model.eval()
    valid_loss = []
    valid_accs_epoch = []
    for batch in tqdm(valid_loader):
        imgs, labels = batch
        imgs = imgs.to(device)
        labels = labels.to(device)

        with torch.no_grad():
            logits = model(imgs)

        loss = criterion(logits, labels)
        acc = (logits.argmax(dim=-1) == labels).float().mean()

        valid_loss.append(loss.item())
        valid_accs_epoch.append(acc.item())

    valid_loss_avg = sum(valid_loss) / len(valid_loss)
    valid_acc_avg = sum(valid_accs_epoch) / len(valid_accs_epoch)

    # 更新学习率调度器
    scheduler.step()
    #scheduler.step(valid_acc_avg)  #for ReduceLROnPlateau

    # 记录这个 epoch 的指标（新增部分）
    train_losses.append(train_loss_avg)
    train_accs.append(train_acc_avg)
    valid_losses.append(valid_loss_avg)
    valid_accs.append(valid_acc_avg)

    # 打印训练信息，包括当前学习率
    #current_lr = scheduler.get_last_lr()[0] #改用ReduceLROnPlateau所以先註解
    current_lr = optimizer.param_groups[0]['lr'] #改用ReduceLROnPlateau
    print(f"Epoch {epoch+1}/{n_epochs}, Train Acc: {train_acc_avg:.4f}, Valid Acc: {valid_acc_avg:.4f}, Train Loss: {train_loss_avg:.4f}, Valid Loss: {valid_loss_avg:.4f}, LR: {current_lr:.6f}")

    # 保存最佳模型
    if valid_acc_avg > best_acc:
        best_acc = valid_acc_avg
        torch.save(model.state_dict(), "best_model.ckpt")
        print("  <-- Best model updated!")  # 顯示提示訊息
        stale = 0
    else:
        stale += 1
        if stale > patience:  # 调整 patience 为 5→→→使用ReduceLROnPlateau 調整為7
            #（因為ReduceLROnPlateau 也有使用patience）
            print(f"No improvement for {stale} epochs, early stopping")
            break

# 繪製loss acc

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# 绘制训练过程图
plt.figure(figsize=(10, 6))
epochs = np.arange(1, len(train_losses) + 1)

plt.plot(epochs, train_losses, 'r-', label='train_loss')
plt.plot(epochs, valid_losses, 'b-', label='val_loss')
plt.plot(epochs, train_accs, 'g-', label='train_acc')
plt.plot(epochs, valid_accs, 'k-', label='val_acc')

plt.title('Training Loss and Accuracy W/OUT Learning Rate Decay')
plt.xlabel('Epoch #')
plt.ylabel('Loss/Accuracy')
plt.grid(True)
plt.legend()

# 设置 y 轴范围，使图像更接近示例
plt.ylim(0, 1.8)

# 保存图像
plt.savefig('training_history.png', dpi=300, bbox_inches='tight')
plt.show()

# Testing and generate prediction CSV

In [None]:
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load("best_model.ckpt"))#移掉/kaggle/working/，等回kaggle再用
# model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

100%|██████████| 94/94 [00:25<00:00,  3.75it/s]


In [None]:
import pandas as pd  # Import the pandas library
# create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)