# [IAPR][iapr]: Final project - Chocolate Recognition


**Moodle group ID:** *xx*  
**Kaggle challenge:** *xx* (either `Classic` or `Deep learning`)  
**Kaggle team name (exact):** "*xx*"  

**Author 1 (SCIPER):** *Student Name 1 (xxxxx)*  
**Author 2 (SCIPER):** *Student Name 2 (xxxxx)*  
**Author 3 (SCIPER):** *Student Name 3 (xxxxx)*  

**Due date:** 21.05.2025 (11:59 pm)


## Key Submission Guidelines:
- **Before submitting your notebook, <span style="color:red;">rerun</span> it from scratch!** Go to: `Kernel` > `Restart & Run All`
- **Only groups of three will be accepted**, except in exceptional circumstances.


[iapr]: https://github.com/LTS5/iapr2025

---

In [1]:
# ==============================
# src/classification_dataset.py
# ==============================

import pandas as pd
from torchvision import transforms
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
import torchvision.transforms as T

class ClassificationDataset(Dataset):
    # def __init__(self, img_dir, csv_path, transform=None):
    #     self.img_dir = img_dir
    #     self.data = pd.read_csv(csv_path)
    #     self.transform = transform

    # def __len__(self):
    #     return len(self.data)

    # def __getitem__(self, idx):
    #     row = self.data.iloc[idx]
    #     img_path = os.path.join(self.img_dir, row['filename'])
    #     image = Image.open(img_path).convert("RGB")
    #     labels = torch.tensor(row[1:].values.astype(float), dtype=torch.float32)

    #     if self.transform:
    #         image = self.transform(image)

    #     return image, labels
        def __init__(self, csv_file, img_dir, transform=None):
            self.labels_df = pd.read_csv(csv_file)
            self.img_dir = img_dir
            self.transform = transform or T.Compose([
                T.Resize((224, 224)),
                T.ToTensor()
                ])
            self.image_ids = self.labels_df['id'].astype(str)
            self.labels = self.labels_df.drop(columns=['id']).values.astype(float)

        def __len__(self):
            return len(self.image_ids)

        def __getitem__(self, idx):
            img_id = self.image_ids.iloc[idx]
            img_path = os.path.join(self.img_dir, f"L{img_id}.JPG")
            image = Image.open(img_path).convert("RGB")
            image = self.transform(image)
            label = torch.tensor(self.labels[idx], dtype=torch.float32)
            return image, label

In [2]:
# ==============================
# src/classification_model.py
# ==============================
from torchvision import models
import torch.nn as nn

def get_classification_model(num_labels):
    model = models.resnet18(weights=None)
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, num_labels),
        nn.Sigmoid()
    )
    return model

In [3]:
# ==============================
# src/detection_dataset.py
# ==============================
import os
import torch
from torch.utils.data import Dataset
from PIL import Image

class DetectionDataset(Dataset):
    def __init__(self, img_dir, label_dir, transforms=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.transforms = transforms
        self.images = [f for f in os.listdir(img_dir) if f.endswith(('.JPG', '.png'))]

    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = os.path.join(self.img_dir, img_name)
        label_path = os.path.join(self.label_dir, img_name.replace('.JPG', '.txt').replace('.png', '.txt'))

        image = Image.open(img_path).convert("RGB")
        w, h = image.size
        boxes, labels = [], []

        with open(label_path) as f:
            for line in f:
                class_id, x, y, w_ratio, h_ratio = map(float, line.strip().split())
                x1 = (x - w_ratio/2) * w
                y1 = (y - h_ratio/2) * h
                x2 = (x + w_ratio/2) * w
                y2 = (y + h_ratio/2) * h
                boxes.append([x1, y1, x2, y2])
                labels.append(int(class_id))

        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.int64),
            "image_id": torch.tensor([idx])
        }

        if self.transforms:
            image = self.transforms(image)

        return image, target

    def __len__(self):
        return len(self.images)


In [4]:
# ==============================
# src/detection_model.py
# ==============================
import torchvision

def get_detection_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=None)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    return model

In [5]:
# ==============================
# src/train_classification.py
# ==============================

from torchvision import transforms
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

def train_classification():
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])
    dataset = ClassificationDataset("dataset_project_iapr2025/train", "dataset_project_iapr2025/train.csv", transform=transform)
    dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

    model = get_classification_model(num_labels=13)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCELoss()

    for epoch in range(10):
        model.train()
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f"Epoch {epoch+1} done")

    torch.save(model.state_dict(), "weights/classification_model.pth")

In [6]:
from torchvision.transforms import ToTensor

def train_detection():
    dataset = DetectionDataset("D:/exchange/EE-451/Choco/EE-451-project/project/dataset_project_iapr2025/train", "D:/exchange/EE-451/Choco/EE-451-project/project/choco_annotation/obj_train_data", transforms=ToTensor())
    dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

    model = get_detection_model(num_classes=13)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available():
        print("gpu running")
    else:
        print("No gpu")
    model.to(device)

    optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

    for epoch in range(10):
        model.train()
        for images, targets in dataloader:
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(images, targets)
            loss = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print(f"Epoch {epoch+1} done")

    torch.save(model.state_dict(), "weights/detection_model.pth")

In [7]:
def run_pipeline():
    model_det = get_detection_model(num_classes=5)
    model_det.load_state_dict(torch.load("weights/detection_model.pth"))
    model_det.eval().to("cpu")

    model_cls = get_classification_model(num_labels=5)
    model_cls.load_state_dict(torch.load("weights/classification_model.pth"))
    model_cls.eval().to("cpu")

    cls_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    results = []
    for img_name in os.listdir("dataset_project_iapr2025/test"):
        if not img_name.endswith(('.jpg', '.png')): continue
        image = Image.open(os.path.join("data/test_images", img_name)).convert("RGB")
        image_tensor = F.to_tensor(image).unsqueeze(0)
        outputs = model_det(image_tensor)
        boxes = outputs[0]['boxes']

        for i, box in enumerate(boxes):
            x1, y1, x2, y2 = box.int().tolist()
            crop = image.crop((x1, y1, x2, y2))
            crop = cls_transform(crop).unsqueeze(0)
            preds = model_cls(crop).squeeze()

            result = {"filename": img_name, "box_id": i}
            for j in range(len(preds)):
                result[f"class_{j}"] = preds[j].item()
            results.append(result)

    df = pd.DataFrame(results)
    df.to_csv("output.csv", index=False)

In [9]:
import torch
print(torch.__version__)            # 打印 PyTorch 版本
print(torch.version.cuda)           # 打印 CUDA 版本（None = 没有CUDA）
print(torch.cuda.is_available())

2.6.0+cpu
None
False


In [12]:
train_detection()
train_classification()
run_pipeline()

No gpu
Epoch 1 done
Epoch 2 done
Epoch 3 done
Epoch 4 done
Epoch 5 done
Epoch 6 done
Epoch 7 done
Epoch 8 done
Epoch 9 done
Epoch 10 done


RuntimeError: Parent directory weights does not exist.