<a href="https://colab.research.google.com/github/nick125015/1213132123/blob/main/HW1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms

from PIL import Image
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import DatasetFolder
from tqdm.auto import tqdm

In [None]:
myseed = 9527
torch.manual_seed(myseed)
np.random.seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

train_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

_dataset_dir = "/content/drive/My Drive/HomeworkData"

In [None]:
class FoodDataset(Dataset):
    def __init__(self, path, tfm=test_tfm):
        super(FoodDataset, self).__init__()
        self.path = path
        self.files = sorted([os.path.join(path, x) for x in os.listdir(path) if x.endswith(".jpg")])
        print(f"Reading a sample from {path}", self.files[0])
        self.transform = tfm

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = 0
        return im, label

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.cnn = nn.Sequential(
          nn.Conv2d(3, 32, 3, 1, 1),
          nn.BatchNorm2d(32),
          nn.ReLU(),
          nn.MaxPool2d(2, 2, 0),

          nn.Conv2d(32, 64, 3, 1, 1),
          nn.BatchNorm2d(64),
          nn.ReLU(),
          nn.MaxPool2d(2, 2, 0),

          nn.Conv2d(64, 128, 3, 1, 1),
          nn.BatchNorm2d(128),
          nn.ReLU(),
          nn.MaxPool2d(2, 2, 0),

          nn.Conv2d(128, 256, 3, 1, 1),
          nn.BatchNorm2d(256),
          nn.ReLU(),
          nn.MaxPool2d(2, 2, 0),

          nn.Conv2d(256, 512, 3, 1, 1),
          nn.BatchNorm2d(512),
          nn.ReLU(),
          nn.AdaptiveAvgPool2d((1, 1)),
        )
        self.fc = nn.Sequential(
          nn.Linear(512, 1024),
          nn.ReLU(),
          nn.Linear(1024, 512),
          nn.ReLU(),
          nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

In [None]:
batch_size = 32
train_set = FoodDataset(os.path.join(_dataset_dir, "train"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)

test_set = FoodDataset(os.path.join(_dataset_dir, "test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

device = "cuda" if torch.cuda.is_available() else "cpu"

n_epochs = 60
patience = 150

model = Classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.003, weight_decay=1e-5)

stale = 0
best_acc = 0

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

model = Classifier().to(device)
model.load_state_dict(torch.load("/content/drive/My Drive/foodmodel.ckpt"))



for epoch in range(n_epochs):
    model.train()
    train_loss = []
    train_accs = []

    try:
      for batch_idx, (imgs, labels) in enumerate(tqdm(train_loader)):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        logits = model(imgs)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        acc = (logits.argmax(dim=-1) == labels).float().mean()

        train_loss.append(loss.item())
        train_accs.append(acc)

        if batch_idx % 10 == 0:
            print(f"Epoch [{epoch + 1}/{n_epochs}], Batch [{batch_idx}/{len(train_loader)}], Loss: {loss.item():.4f}, Accuracy: {acc:.4f}")

    except Exception as e:
        print(f"An error occurred during training: {e}")
        break

    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    print(f"[ Training | {epoch + 1:03d}/{n_epochs:03d} ] Loss = {train_loss:.5f}, Accuracy = {train_acc:.5f}")

    model.eval()
    valid_loss = []
    valid_accs = []

    for imgs, labels in tqdm(test_loader):
        imgs, labels = imgs.to(device), labels.to(device)
        with torch.no_grad():
            logits = model(imgs)

        loss = criterion(logits, labels)
        acc = (logits.argmax(dim=-1) == labels).float().mean()

        valid_loss.append(loss.item())
        valid_accs.append(acc)

    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    print(f"[ Testing | {epoch + 1:03d}/{n_epochs:03d} ] Loss = {valid_loss:.5f}, Accuracy = {valid_acc:.5f}")

    if valid_acc > best_acc:
        print(f"Found better model in epoch {epoch}, saving the model")
        torch.save(model.state_dict(), "/content/drive/My Drive/foodmodel.ckpt")  # Save to Google Drive
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvement for {patience} epochs, stopping early")
            break

    # 儲存過程到CSV檔案
    model.eval()
    prediction = []
    with torch.no_grad():
        for data, _ in tqdm(test_loader):
          test_pred = model(data.to(device))
          test_label = torch.argmax(test_pred, dim=1)
          prediction += test_label.cpu().tolist()

    df = pd.DataFrame()
    df["Id"] = [str(i).zfill(4) for i in range(1, len(test_set)+1)]
    df["Category"] = prediction
    df.to_csv(f"/content/drive/My Drive/foodsubmission{epoch + 1}.csv", index=False)