> Your comments  
> ...

In [2]:
## YOUR CODE
...
import os
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from PIL import Image
import torch.nn.functional as F
import unicodedata  #这个可以用吗？

In [3]:
# --------------------
# 1. 配置文件与路径
# --------------------
# --------------------
# 1. 配置文件与路径
# --------------------
class Config:
    data_path = r"D:\Desktop\EPFL\EE-451\my_group_repo\EE-451-project\dataset_project_iapr2025"
    ref_dir = os.path.join(data_path, "references")
    train_dir = os.path.join(data_path, "train")
    test_dir = os.path.join(data_path, "test")
    train_csv = os.path.join(data_path, "train.csv")
    sample_csv = os.path.join(data_path, "sample_submission.csv")
    output_dir = os.path.join(data_path, "train_augmented")
    out_csv = os.path.join(data_path, "train_augmented.csv")
    num_classes = 13
    img_size = 128
    batch_size = 16
    lr = 3e-4
    epochs = 150       # 增加总训练轮次
    num_aug = 20
    mixup_alpha = 0.4  # MixUp参数


In [None]:
# --------------------
# 1.2 离线数据增强（优化版）
# --------------------
class AdvancedAugmentor:
    def __init__(self):
        self.transform = transforms.Compose([
            transforms.Pad(50, padding_mode='reflect'),
            transforms.RandomPerspective(distortion_scale=0.3, p=0.5),
            transforms.RandomRotation(15),
            transforms.RandomResizedCrop(128, scale=(0.7, 1.0)),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
        ])
        
    def __call__(self, img):
        return self.transform(img)

def generate_augmented_data():
    os.makedirs(Config.output_dir, exist_ok=True)
    label_df = pd.read_csv(Config.train_csv)
    augmentor = AdvancedAugmentor()
    
    augmented_rows = []
    for _, row in tqdm(label_df.iterrows(), total=len(label_df), desc="🔁 Augmenting"):
        img_id = row["id"]
        img_path = os.path.join(Config.train_dir, f"L{img_id}.JPG")
        
        try:
            img = Image.open(img_path).convert("RGB")
        except Exception as e:
            print(f"⚠️ 跳过 {img_path}: {e}")
            continue

        for i in range(Config.num_aug):
            aug_img = augmentor(img)
            new_id = f"{img_id}_aug_{i}"
            new_filename = f"L{new_id}.JPG"
            save_path = os.path.join(Config.output_dir, new_filename)
            aug_img.save(save_path, format="JPEG", quality=95)

            new_row = row.copy()
            new_row["id"] = new_id
            augmented_rows.append(new_row)

    aug_df = pd.DataFrame(augmented_rows)
    aug_df.to_csv(Config.out_csv, index=False)
    print(f"\n✅ 完成增强，共生成 {len(aug_df)} 张图像")


🔍 原始 CSV 列名： ['id', 'Jelly White', 'Jelly Milk', 'Jelly Black', 'Amandina', 'Crème brulée', 'Triangolo', 'Tentation noir', 'Comtesse', 'Noblesse', 'Noir authentique', 'Passion au lait', 'Arabia', 'Stracciatella']


  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id
  new_row["id"] = new_id



✅ 完成增强，共生成 1800 张图像
📁 增强图像目录: D:\Desktop\EPFL\EE-451\my_group_repo\EE-451-project\dataset_project_iapr2025\train_augmented
📄 增强标签文件: D:\Desktop\EPFL\EE-451\my_group_repo\EE-451-project\dataset_project_iapr2025\train_augmented.csv


In [18]:
# --------------------
# 2. 参考特征处理（基于OpenCV）
# --------------------
def extract_handcrafted_features(img_path):
    """手工特征提取函数"""
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # 颜色特征（HSV空间直方图）
    hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
    color_feat = np.concatenate([
        cv2.calcHist([hsv], [0], None, [8], [0, 180]).flatten(),
        cv2.calcHist([hsv], [1], None, [8], [0, 256]).flatten()
    ])
    
    # 形状特征（基于轮廓）
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    shape_feat = np.zeros(7+2)  # Hu矩(7)+面积(1)+长宽比(1)
    if len(contours) > 0:
        cnt = max(contours, key=cv2.contourArea)
        hu = cv2.HuMoments(cv2.moments(cnt)).flatten()
        x,y,w,h = cv2.boundingRect(cnt)
        area = cv2.contourArea(cnt)
        aspect = w / h if h !=0 else 0
        shape_feat = np.concatenate([hu, [area, aspect]])
    
    return np.concatenate([color_feat, shape_feat])

def normalize_name(name):
    """统一名称格式：去除重音、转为小写、替换空格"""
    # 去除重音符号
    name = unicodedata.normalize('NFKD', name).encode('ascii', 'ignore').decode()
    # 统一为小写并用空格分隔
    return name.lower().replace('_', ' ').strip()

def load_reference_features():
    """加载并标准化参考特征名称"""
    ref_features = {}
    for fname in os.listdir(Config.ref_dir):
        if not fname.lower().endswith(('.png','.jpg','.jpeg')):
            continue
        
        # 标准化名称处理
        raw_name = os.path.splitext(fname)[0]
        norm_name = normalize_name(raw_name)
        
        path = os.path.join(Config.ref_dir, fname)
        feat = extract_handcrafted_features(path)
        ref_features[norm_name] = feat
    return ref_features

ref_features = load_reference_features()
feat_dim = len(next(iter(ref_features.values())))  # 自动获取特征维度

In [19]:
# --------------------
# 3. 数据集类(只使用图像)
# --------------------
class ChocolateDataset(Dataset):
    def __init__(self, df, img_dir, ref_dict, transform=None):
        self.df = df
        self.img_dir = img_dir
        self.ref_dict = ref_dict
        self.transform = transform

        # 关键修改：存储所有图像路径
        self.image_paths = [
            os.path.join(self.img_dir, f"L{row['id']}.JPG")
            for _, row in self.df.iterrows()
        ]
        
        # 保存原始列名（用于数据访问）
        self.original_class_names = df.columns[1:].tolist()
        
        # 生成标准化后的名称（用于特征匹配）
        self.norm_class_names = [
            normalize_name(cls) 
            for cls in self.original_class_names
        ]

        # 验证参考特征
        missing = [cls for cls in self.norm_class_names if cls not in ref_dict]
        if missing:
            available = list(ref_dict.keys())
            raise KeyError(
                f"特征匹配失败！\n"
                f"- 缺失的类别: {missing}\n"
                f"- 可用的参考特征: {available}"
            )
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_id = row['id']
        #img_path = os.path.join(self.img_dir, f"L{img_id}.JPG")
        img_path = self.image_paths[idx]

        # 加载图像
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.transform:
            img = self.transform(img)
        else:
            img = transforms.ToTensor()(img)
        
        
        # 使用原始列名获取目标值
        targets = row[self.original_class_names].values.astype(np.float32)

        return img, torch.FloatTensor(targets), img_id


In [20]:
# --------------------
# 2. 改进模型架构
# --------------------
class ChannelAttention(nn.Module):
    def __init__(self, channel, reduction=8):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction),
            nn.ReLU(),
            nn.Linear(channel // reduction, channel),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

class EnhancedConvBlock(nn.Module):
    def __init__(self, in_ch, out_ch, use_attn=True):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_ch, out_ch, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_ch),
            nn.LeakyReLU(0.1),  # 改为SiLU激活函数
            ChannelAttention(out_ch) if use_attn else nn.Identity()
        )
        
    def forward(self, x):
        return self.conv(x)

class YOLOCountNet_v3(nn.Module):
    def __init__(self, num_classes=13):
        super().__init__()
        # 特征提取
        self.backbone = nn.Sequential(
            EnhancedConvBlock(3, 32),
            EnhancedConvBlock(32, 64),
            nn.MaxPool2d(2),  # 64x64
            
            EnhancedConvBlock(64, 128),
            EnhancedConvBlock(128, 256),
            nn.MaxPool2d(2),  # 32x32
            
            EnhancedConvBlock(256, 512),
            EnhancedConvBlock(512, 512),
            nn.AdaptiveAvgPool2d((1,1))
        )
        # 分类头
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512, 1024),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.6),
            nn.LayerNorm(1024),
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.1),
            nn.Dropout(0.4),
            nn.Linear(512, num_classes),
            nn.Sigmoid()
        )
        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu')
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                nn.init.constant_(m.bias, 0.01)

    def forward(self, x):
        features = self.backbone(x)
        return self.head(features) * 5.0


In [21]:
def mixup_data(x, y, alpha=0.4):
    '''对一批数据执行 MixUp 操作'''
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size(0)
    index = torch.randperm(batch_size).to(x.device)

    mixed_x = lam * x + (1 - lam) * x[index]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

In [22]:
# --------------------
# 4. 其他优化组件
# --------------------
def compute_stats():
    """安全计算数据集的均值和标准差"""
    # 使用基础转换（无数据增强）
    base_transform = transforms.Compose([
        lambda x: cv2.resize(x, (Config.img_size, Config.img_size)),
        transforms.ToTensor()
    ])
    
    # 创建临时数据集
    temp_df = pd.read_csv(Config.train_csv)
    temp_dataset = ChocolateDataset(temp_df, Config.train_dir, ref_features, transform=base_transform)
    
    # 初始化统计量
    pixel_sum = torch.zeros(3)
    pixel_sq_sum = torch.zeros(3)
    
    # 遍历所有图像路径
    for img_path in tqdm(temp_dataset.image_paths, desc="计算统计量"):
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = base_transform(img)  # 应用基础转换
        
        pixel_sum += img.sum(dim=[1,2])
        pixel_sq_sum += (img ** 2).sum(dim=[1,2])
    
    # 计算最终统计量
    total_pixels = len(temp_dataset) * Config.img_size * Config.img_size
    mean = pixel_sum / total_pixels
    std = torch.sqrt(pixel_sq_sum / total_pixels - mean ** 2)
    
    return mean.tolist(), std.tolist()

def get_train_transform():
    return transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomPerspective(distortion_scale=0.3, p=0.5),
        transforms.RandomResizedCrop(Config.img_size, scale=(0.7, 1.0)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
        transforms.ToTensor(),
        transforms.Normalize(*compute_stats())
    ])

def adjust_learning_rate(optimizer, epoch):
    """自定义学习率调整规则"""
    if epoch < Config.epochs//3:
        lr = Config.lr
    elif epoch < 2*Config.epochs//3:
        lr = Config.lr * 0.1
    else:
        lr = Config.lr * 0.01
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


In [23]:
# --------------------
# 5. 优化训练流程
# --------------------
class RegressionLabelSmoothing(nn.Module):
    def __init__(self, smoothing=0.1, min_val=0, max_val=5):
        super().__init__()
        self.smoothing = smoothing
        self.min = min_val
        self.max = max_val
    
    def forward(self, pred, target):
        normalized_target = (target - self.min) / (self.max - self.min)
        smoothed_target = normalized_target * (1 - self.smoothing) + 0.5 * self.smoothing
        return F.mse_loss(pred, smoothed_target)

def train():
    # 初始化
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = YOLOCountNet_v3(Config.num_classes).to(device)
    
    # 优化器配置
    optimizer = optim.AdamW(model.parameters(), lr=Config.lr, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=Config.epochs)
    
    # 损失函数
    criterion = RegressionLabelSmoothing(smoothing=0.1)
    
    # 数据加载
    train_df = pd.read_csv(Config.out_csv if os.path.exists(Config.out_csv) else Config.train_csv)
    dataset = ChocolateDataset(train_df, 
                              Config.output_dir if os.path.exists(Config.out_csv) else Config.train_dir,
                              ref_features,
                              transform=get_train_transform())
    loader = DataLoader(dataset, batch_size=Config.batch_size, shuffle=True, pin_memory=True)
    
    # 训练循环
    model.train()
    for epoch in range(Config.epochs):
        adjust_learning_rate(optimizer, epoch)  # 自定义学习率调整
        total_loss = 0
        
        for img, targets, _ in tqdm(loader, desc=f"Epoch {epoch+1}/{Config.epochs}"):
            img, targets = img.to(device), targets.to(device)
            
            # MixUp增强
            img, targets_a, targets_b, lam = mixup_data(img, targets, alpha=0.4)
            
            # 前向传播
            optimizer.zero_grad()
            outputs = model(img)
            
            # 损失计算
            loss = lam * criterion(outputs, targets_a) + (1 - lam) * criterion(outputs, targets_b)
            
            # 反向传播
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # 梯度裁剪
            optimizer.step()
            
            total_loss += loss.item()
        
        scheduler.step()
        print(f"Epoch {epoch+1} | Loss: {total_loss/len(loader):.4f} | LR: {scheduler.get_last_lr()[0]:.2e}")
    
    return model

In [24]:
# --------------------
# 6. 预测与提交
# --------------------

def predict(model, sample_csv, mean, std, class_order):
    test_df = pd.read_csv(sample_csv)
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((Config.img_size + 16, Config.img_size + 16)),
        transforms.CenterCrop(Config.img_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std)
    ])
    
    preds = []
    model.eval()
    device = next(model.parameters()).device
    # class_names = test_df.columns[1:].tolist()  # 确保顺序一致
    
    # 使用标准化后的类别顺序
    norm_class_names = [normalize_name(cls) for cls in class_order]
    
    # 关键修改2：验证参考特征
    missing = [cls for cls in norm_class_names if cls not in ref_features]
    if missing:
        raise KeyError(f"缺失参考特征: {missing}\n可用特征: {list(ref_features.keys())}")

    with torch.no_grad():
        for _, row in tqdm(test_df.iterrows(), total=len(test_df)):
            img_id = row['id']
            img_path = os.path.join(Config.test_dir, f"L{img_id}.JPG")
            
            if not os.path.exists(img_path):
                print(f"Warning: {img_path} not found")
                preds.append([0]*Config.num_classes)
                continue
            
            # 处理图像
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = transform(img).unsqueeze(0).to(device)
            
            # 关键修改3：使用标准化后的名称构建参考矩阵
            ref_matrix = np.stack([ref_features[cls] for cls in norm_class_names])
            ref_matrix = torch.FloatTensor(ref_matrix).unsqueeze(0).to(device)
            
            # 预测
            output = model(img)
            pred = output.squeeze().cpu().numpy()

            # 裁剪预测值并四舍五入，确保整数输出
            pred = np.clip(pred, 0, 20)
            pred = np.round(pred).astype(int)
            print(f"🔍 Prediction for {img_id}: {pred}")

            preds.append(pred.tolist())
    
    # 生成提交文件
    submission = test_df.copy()
    submission.iloc[:, 1:] = preds
    submission.to_csv("submission.csv", index=False)
    print("Submission saved to submission.csv")


In [None]:
# --------------------
# 主程序
# --------------------
if __name__ == "__main__":
    # 生成增强数据（如果不存在）
    if not os.path.exists(Config.out_csv):
        generate_augmented_data()
    
    # 训练模型
    model = train()
    
    # 预测与保存
    your_mean, your_std = compute_stats()
    class_order = pd.read_csv(Config.train_csv).columns[1:].tolist()
    predict(model, Config.sample_csv, your_mean, your_std, class_order)

计算统计量: 100%|██████████| 90/90 [00:18<00:00,  4.86it/s]
Epoch 1/150: 100%|██████████| 113/113 [37:43<00:00, 20.03s/it]


Epoch 1 | Loss: 0.1506 | LR: 3.00e-04


Epoch 2/150: 100%|██████████| 113/113 [38:59<00:00, 20.71s/it]


Epoch 2 | Loss: 0.0417 | LR: 3.00e-04


Epoch 3/150: 100%|██████████| 113/113 [54:40<00:00, 29.03s/it]


Epoch 3 | Loss: 0.0405 | LR: 3.00e-04


Epoch 4/150: 100%|██████████| 113/113 [54:40<00:00, 29.03s/it] 


Epoch 4 | Loss: 0.0397 | LR: 3.00e-04


Epoch 5/150: 100%|██████████| 113/113 [40:50<00:00, 21.68s/it]


Epoch 5 | Loss: 0.0377 | LR: 3.00e-04


Epoch 6/150: 100%|██████████| 113/113 [42:21<00:00, 22.49s/it]


Epoch 6 | Loss: 0.0349 | LR: 3.00e-04


Epoch 7/150: 100%|██████████| 113/113 [42:22<00:00, 22.50s/it]


Epoch 7 | Loss: 0.0326 | LR: 3.00e-04


Epoch 8/150: 100%|██████████| 113/113 [42:30<00:00, 22.57s/it]


Epoch 8 | Loss: 0.0303 | LR: 3.00e-04


Epoch 9/150: 100%|██████████| 113/113 [39:26<00:00, 20.94s/it]


Epoch 9 | Loss: 0.0286 | LR: 2.99e-04


Epoch 10/150: 100%|██████████| 113/113 [1:33:04<00:00, 49.42s/it]  


Epoch 10 | Loss: 0.0280 | LR: 2.99e-04


Epoch 11/150: 100%|██████████| 113/113 [39:11<00:00, 20.81s/it]


Epoch 11 | Loss: 0.0266 | LR: 2.99e-04


Epoch 12/150: 100%|██████████| 113/113 [40:07<00:00, 21.31s/it]


Epoch 12 | Loss: 0.0266 | LR: 2.99e-04


Epoch 13/150: 100%|██████████| 113/113 [24:53<00:00, 13.22s/it]


Epoch 13 | Loss: 0.0261 | LR: 2.99e-04


Epoch 14/150: 100%|██████████| 113/113 [20:09<00:00, 10.71s/it]


Epoch 14 | Loss: 0.0258 | LR: 2.99e-04


Epoch 15/150: 100%|██████████| 113/113 [20:00<00:00, 10.62s/it]


Epoch 15 | Loss: 0.0254 | LR: 2.99e-04


Epoch 16/150: 100%|██████████| 113/113 [20:03<00:00, 10.65s/it]


Epoch 16 | Loss: 0.0249 | LR: 2.99e-04


Epoch 17/150: 100%|██████████| 113/113 [27:38<00:00, 14.68s/it]


Epoch 17 | Loss: 0.0248 | LR: 2.99e-04


Epoch 18/150: 100%|██████████| 113/113 [27:51<00:00, 14.79s/it]


Epoch 18 | Loss: 0.0243 | LR: 2.99e-04


Epoch 19/150: 100%|██████████| 113/113 [20:01<00:00, 10.64s/it]


Epoch 19 | Loss: 0.0242 | LR: 2.99e-04


Epoch 20/150: 100%|██████████| 113/113 [10:43:34<00:00, 341.72s/it]   


Epoch 20 | Loss: 0.0238 | LR: 2.99e-04


Epoch 21/150: 100%|██████████| 113/113 [29:59<00:00, 15.93s/it]


Epoch 21 | Loss: 0.0236 | LR: 2.99e-04


Epoch 22/150: 100%|██████████| 113/113 [24:52<00:00, 13.20s/it]


Epoch 22 | Loss: 0.0230 | LR: 2.99e-04


Epoch 23/150: 100%|██████████| 113/113 [38:14<00:00, 20.30s/it]


Epoch 23 | Loss: 0.0229 | LR: 2.98e-04


Epoch 24/150: 100%|██████████| 113/113 [24:18<00:00, 12.91s/it]


Epoch 24 | Loss: 0.0226 | LR: 2.98e-04


Epoch 25/150:  38%|███▊      | 43/113 [10:48<15:44, 13.50s/it]