In [1]:
import os
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.preprocessing import MultiLabelBinarizer
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from torchvision import transforms, models
from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights
import matplotlib.pyplot as plt

# Hyperparameters
learning_rate = 0.01
batch_size = 32
num_epochs = 10
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

base_path = '/kaggle/input/'
train_csv_path = os.path.join(base_path, 'project/train.csv')
test_csv_path = os.path.join(base_path, 'project/test.csv')
local_train_dir = base_path + 'images/train_data/train_data'
local_test_dir = base_path + 'images/test_data/test_data'

# Load and filter training CSV
train_df = pd.read_csv(train_csv_path)
train_df['labels'] = train_df['labels'].apply(lambda x: x.split(', '))

def file_exists_in_local_dir(filename, folder):
    return os.path.exists(os.path.join(folder, filename))

train_df = train_df[train_df['filename'].apply(lambda x: file_exists_in_local_dir(x, local_train_dir))]
train_df = train_df.reset_index(drop=True)

mlb = MultiLabelBinarizer()
train_labels = mlb.fit_transform(train_df['labels'])
num_classes = len(mlb.classes_)

test_df = pd.read_csv(test_csv_path)

class MultiLabelDataset(Dataset):
    def __init__(self, df, img_folder, labels=None, transform=None):
        self.df = df
        self.img_folder = img_folder
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]['filename']
        img_path = os.path.join(self.img_folder, img_name)
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        if self.labels is not None:
            label_vec = self.labels[idx]
            return image, torch.tensor(label_vec, dtype=torch.float32)
        else:
            return image, img_name

# Basic data augmentation
transform = transforms.Compose([
    transforms.RandomResizedCrop(256, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

from torch.utils.data import random_split

train_dataset = MultiLabelDataset(train_df, local_train_dir, labels=train_labels, transform=transform)
validation_size = 1000
train_size = len(train_dataset) - validation_size
train_dataset, validation_dataset = random_split(train_dataset, [train_size, validation_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False)

# 測試集保持不變
test_dataset = MultiLabelDataset(test_df, local_test_dir, labels=None, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Training samples: {train_size}, Validation samples: {validation_size}")


# Load EfficientNet-B0 model with pretrained weights
weights = EfficientNet_B4_Weights.IMAGENET1K_V1
model = efficientnet_b4(weights=weights)

in_features = model.classifier[1].in_features
model.classifier[1] = nn.Sequential(
    nn.Dropout(0.5),
    nn.Linear(in_features, num_classes)
)
model = model.to(device)

# Focal loss for imbalanced datasets
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction

    def forward(self, inputs, targets):
        BCE_loss = nn.BCEWithLogitsLoss(reduction='none')(inputs, targets)
        pt = torch.exp(-BCE_loss)
        focal_loss = self.alpha * (1 - pt) ** self.gamma * BCE_loss
        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

criterion = FocalLoss()
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=learning_rate, epochs=num_epochs, steps_per_epoch=len(train_loader))

cuda
Training samples: 36866, Validation samples: 1000


Downloading: "https://download.pytorch.org/models/efficientnet_b4_rwightman-23ab8bcd.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b4_rwightman-23ab8bcd.pth
100%|██████████| 74.5M/74.5M [00:00<00:00, 205MB/s]


In [3]:
from sklearn.metrics import average_precision_score
import numpy as np
import torch

# Function to evaluate mAP
def evaluate_mAP(model, data_loader, device, num_classes):
    model.eval()
    all_targets = []
    all_predictions = []

    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            predictions = torch.sigmoid(outputs).cpu().numpy()
            all_predictions.append(predictions)
            all_targets.append(labels.cpu().numpy())

    all_predictions = np.vstack(all_predictions)
    all_targets = np.vstack(all_targets)

    # Calculate average precision for each class
    ap_per_class = []
    for i in range(num_classes):
        if np.sum(all_targets[:, i]) == 0:
            continue  # Skip classes with no positive samples
        ap = average_precision_score(all_targets[:, i], all_predictions[:, i])
        ap_per_class.append(ap)

    # Compute mAP
    mAP = np.mean(ap_per_class)
    return mAP

# Early stopping parameters
patience = 1  # Number of epochs to wait for improvement
best_mAP = 0.0
early_stop_counter = 0

# Training loop
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch+1}")

    for batch_idx, (images, labels) in progress_bar:
        images, labels = images.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)  # Normal criterion without Mixup
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        progress_bar.set_postfix({"Batch Loss": loss.item()})

    scheduler.step()
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

    # Evaluate mAP on test data
    mAP = evaluate_mAP(model, validation_loader, device, num_classes)
    print(f"Epoch [{epoch+1}/{num_epochs}], mAP: {mAP:.4f}")

    # Early stopping logic
    if mAP > best_mAP:
        best_mAP = mAP
        early_stop_counter = 0
        # Save the best model
        best_model_path = f'/kaggle/working/best_model.pth'
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': avg_loss,
            'mAP': mAP,
        }, best_model_path)
        print(f"New best model saved to {best_model_path}")
    else:
        early_stop_counter += 1
        print(f"No improvement in mAP. Early stop counter: {early_stop_counter}/{patience}")

    # Check if early stopping condition is met
    if early_stop_counter >= patience:
        print(f"Early stopping triggered. Best mAP: {best_mAP:.4f}")
        break


Epoch 1: 100%|██████████| 1153/1153 [20:10<00:00,  1.05s/it, Batch Loss=0.0269] 


Epoch [1/10], Loss: 0.0132
Epoch [1/10], mAP: 0.6133
New best model saved to /kaggle/working/best_model.pth


Epoch 2: 100%|██████████| 1153/1153 [16:52<00:00,  1.14it/s, Batch Loss=0.0229] 


Epoch [2/10], Loss: 0.0114
Epoch [2/10], mAP: 0.6176
New best model saved to /kaggle/working/best_model.pth


Epoch 3: 100%|██████████| 1153/1153 [16:51<00:00,  1.14it/s, Batch Loss=0.0192] 


Epoch [3/10], Loss: 0.0099
Epoch [3/10], mAP: 0.6590
New best model saved to /kaggle/working/best_model.pth


Epoch 4: 100%|██████████| 1153/1153 [16:52<00:00,  1.14it/s, Batch Loss=0.016]  


Epoch [4/10], Loss: 0.0087
Epoch [4/10], mAP: 0.6743
New best model saved to /kaggle/working/best_model.pth


Epoch 5: 100%|██████████| 1153/1153 [16:49<00:00,  1.14it/s, Batch Loss=0.0439] 


Epoch [5/10], Loss: 0.0077
Epoch [5/10], mAP: 0.6777
New best model saved to /kaggle/working/best_model.pth


Epoch 6: 100%|██████████| 1153/1153 [16:56<00:00,  1.13it/s, Batch Loss=0.016]  


Epoch [6/10], Loss: 0.0069
Epoch [6/10], mAP: 0.7000
New best model saved to /kaggle/working/best_model.pth


Epoch 7: 100%|██████████| 1153/1153 [17:02<00:00,  1.13it/s, Batch Loss=0.00849]


Epoch [7/10], Loss: 0.0062
Epoch [7/10], mAP: 0.7028
New best model saved to /kaggle/working/best_model.pth


Epoch 8: 100%|██████████| 1153/1153 [16:47<00:00,  1.14it/s, Batch Loss=0.0167] 


Epoch [8/10], Loss: 0.0055
Epoch [8/10], mAP: 0.6920
No improvement in mAP. Early stop counter: 1/1
Early stopping triggered. Best mAP: 0.7028


In [4]:
results = []

with torch.no_grad():
    for images, filenames in test_loader:
        images = images.to(device)
        outputs = torch.sigmoid(model(images))
        outputs = outputs.cpu().numpy()

        for i, filename in enumerate(filenames):
            probs = outputs[i, :num_classes]
            results.append([filename] + probs.tolist())


In [5]:
columns = ['filename'] + [f'class_{i}_prob' for i in range(num_classes)]

# Create a DataFrame
columns_to_keep = ['filename'] + [f'class_{i}_prob' for i in range(79)]
submission_df = pd.DataFrame(results, columns=columns)

# Define the output path (use /kaggle/working if in a Kaggle environment)
submission_file = os.path.join('/kaggle/working', 'submission.csv')

# Save DataFrame to CSV
submission_df.to_csv(submission_file, index=False)

print(f"Submission file saved to: {submission_file}")

Submission file saved to: /kaggle/working/submission.csv


# 以下是要讀取已經有紀錄起來的 model 改路徑即可使用 

In [2]:
# 設定模型文件的路徑
model_path = '/kaggle/input/model-for-sldl/pytorch/default/1/best_model.pth'  # 根據實際文件名稱設置

# 檢查該路徑是否存在
if os.path.exists(model_path):
    print(f"從 {model_path} 載入最佳模型")
    
    # 載入模型檔案
    checkpoint = torch.load(model_path)
    
    # 載入模型和優化器的 state_dict
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    best_mAP = checkpoint['mAP']  # 恢復最佳 mAP
    
    print(f"從 epoch {epoch+1} 繼續訓練，最佳 mAP: {best_mAP:.4f}")
else:
    print(f"沒有找到模型檔案，從頭開始訓練。")
    epoch = 0  # 從第一個 epoch 開始
    best_mAP = 0.0  # 尚未有最佳模型


從 /kaggle/input/model-for-sldl/pytorch/default/1/best_model.pth 載入最佳模型


  checkpoint = torch.load(model_path)


從 epoch 3 繼續訓練，最佳 mAP: 0.5171
