In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torchvision.transforms import RandomErasing
from tqdm import tqdm

In [9]:
class CatDogDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.images = []
        self.labels = []
        
        for class_idx, class_name in enumerate(['Cat', 'Dog']):
            class_path = os.path.join(data_dir, class_name)
            for img_name in os.listdir(class_path):
                self.images.append(os.path.join(class_path, img_name))
                self.labels.append(class_idx)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]
        
        if self.transform:
            image = self.transform(image)
            
        return image, label

def plot_training_history(history, model_type):
    """繪製訓練歷史圖表"""
    plt.figure(figsize=(12, 4))
    
    # 繪製損失曲線
    plt.subplot(1, 2, 1)
    plt.plot(history['train_loss'], label='Train Loss')
    plt.plot(history['val_loss'], label='Val Loss')
    plt.title(f'{model_type} Model - Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    # 繪製準確率曲線
    plt.subplot(1, 2, 2)
    plt.plot(history['train_acc'], label='Train Acc')
    plt.plot(history['val_acc'], label='Val Acc')
    plt.title(f'{model_type} Model - Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(f'model/{model_type.lower()}_training_history.png')


    try:
        plot_path = f'model/{model_type.lower()}_training_history.png'
        plt.savefig(plot_path)
        print(f"\nSuccess: Plot saved to {plot_path}")
        if os.path.exists(plot_path):
            print(f"Verified: Plot file exists at {plot_path}")
            print(f"File size: {os.path.getsize(plot_path) / 1024:.2f} KB")
        else:
            print(f"Warning: Plot file not found at {plot_path}")
    except Exception as e:
        print(f"\nError saving plot: {str(e)}")
    plt.close()

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device, model_type="Normal"):
    print(f"\n開始訓練 {model_type} 模型...")
    print("=" * 60)
    
    history = {
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': []
    }
    
    best_val_acc = 0.0
    
    for epoch in range(num_epochs):
        print(f"\nEpoch [{epoch+1}/{num_epochs}]")
        
        # 訓練階段
        model.train()
        running_loss = 0.0
        running_corrects = 0
        total = 0
        
        train_pbar = tqdm(train_loader, desc='Training')
        for inputs, labels in train_pbar:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            total += labels.size(0)
            
            train_pbar.set_postfix({
                'loss': f'{loss.item():.4f}',
                'acc': f'{(torch.sum(preds == labels.data).item() / labels.size(0)):.4f}'
            })
        
        epoch_loss = running_loss / total
        epoch_acc = running_corrects.double() / total
        
        history['train_loss'].append(epoch_loss)
        history['train_acc'].append(epoch_acc.item())
        
        # 驗證階段
        model.eval()
        running_loss = 0.0
        running_corrects = 0
        total = 0
        
        val_pbar = tqdm(val_loader, desc='Validation')
        with torch.no_grad():
            for inputs, labels in val_pbar:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)
                
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                total += labels.size(0)
                
                val_pbar.set_postfix({
                    'loss': f'{loss.item():.4f}',
                    'acc': f'{(torch.sum(preds == labels.data).item() / labels.size(0)):.4f}'
                })
        
        val_loss = running_loss / total
        val_acc = running_corrects.double() / total
        
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc.item())
        
        print(f"Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc*100:.4f}%")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc*100:.4f}%")
        
        # 保存最佳模型
        # if val_acc > best_val_acc:
        #     best_val_acc = val_acc
        #     model_path = f'model/resnet50_{model_type.lower()}_best.pth'
        #     torch.save({
        #         'epoch': epoch + 1,
        #         'model_state_dict': model.state_dict(),
        #         'optimizer_state_dict': optimizer.state_dict(),
        #         'val_acc': val_acc,
        #         'val_loss': val_loss,
        #     }, model_path)
        #     print(f"已保存最佳模型! 驗證準確率: {val_acc*100:.4f}%")


        # 保存最佳模型時添加調試信息
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            model_path = f'model/resnet50_{model_type.lower()}_best.pth'
            try:
                torch.save({
                    'epoch': epoch + 1,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'val_acc': val_acc,
                    'val_loss': val_loss,
                }, model_path)
                print(f"\nSuccess: Model saved to {model_path}")
                # 驗證文件是否存在
                if os.path.exists(model_path):
                    print(f"Verified: File exists at {model_path}")
                    print(f"File size: {os.path.getsize(model_path) / 1024 / 1024:.2f} MB")
                else:
                    print(f"Warning: File not found at {model_path}")
            except Exception as e:
                print(f"\nError saving model: {str(e)}")    
        
        print("-" * 60)
    
    # 繪製並保存訓練歷史圖表
    plot_training_history(history, model_type)

    
    
    return history

def main():
    # 設定訓練參數
    batch_size = 32
    num_epochs = 10  # 設定為10個epochs
    learning_rate = 0.001
    
    # 檢查是否可用 GPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"使用設備: {device}")
    
    # 創建保存模型的目錄
    os.makedirs('model', exist_ok=True)
    
    # 定義數據轉換
    transform_normal = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    transform_with_erasing = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        RandomErasing(p=0.5)
    ])
    
    print("\n1. 訓練不使用 Random Erasing 的模型")
    train_dataset = CatDogDataset('/kaggle/input/cv-hw2-dataset/Dataset_OpenCvDl_Hw2_Q5/dataset/training_dataset', transform=transform_normal)
    val_dataset = CatDogDataset('/kaggle/input/cv-hw2-dataset/Dataset_OpenCvDl_Hw2_Q5/dataset/validation_dataset', transform=transform_normal)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    model_normal = models.resnet50(weights=True)
    model_normal.fc = nn.Sequential(
        nn.Linear(model_normal.fc.in_features, 2),
        nn.Softmax(dim=1)
    )
    model_normal = model_normal.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model_normal.parameters(), lr=learning_rate)
    
    history_normal = train_model(model_normal, train_loader, val_loader, 
                                criterion, optimizer, num_epochs, device, "Normal")
    
    print("\n2. 訓練使用 Random Erasing 的模型")
    train_dataset = CatDogDataset('/kaggle/input/cv-hw2-dataset/Dataset_OpenCvDl_Hw2_Q5/dataset/training_dataset', transform=transform_with_erasing)
    val_dataset = CatDogDataset('/kaggle/input/cv-hw2-dataset/Dataset_OpenCvDl_Hw2_Q5/dataset/validation_dataset', transform=transform_with_erasing)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    model_erasing = models.resnet50(weights=True)
    model_erasing.fc = nn.Sequential(
        nn.Linear(model_erasing.fc.in_features, 2),
        nn.Softmax(dim=1)
    )
    model_erasing = model_erasing.to(device)
    
    optimizer = optim.Adam(model_erasing.parameters(), lr=learning_rate)
    
    history_erasing = train_model(model_erasing, train_loader, val_loader, 
                                 criterion, optimizer, num_epochs, device, "RandomErasing")
    
     # 保存比較結果時添加調試信息
    comparison = {
        'normal': history_normal,
        'erasing': history_erasing
    }
    comparison_path = 'model/training_comparison.npy'
    try:
        np.save(comparison_path, comparison)
        print(f"\nSuccess: Comparison data saved to {comparison_path}")
        if os.path.exists(comparison_path):
            print(f"Verified: Comparison file exists at {comparison_path}")
            print(f"File size: {os.path.getsize(comparison_path) / 1024:.2f} KB")
        else:
            print(f"Warning: Comparison file not found at {comparison_path}")
    except Exception as e:
        print(f"\nError saving comparison data: {str(e)}")
    
    # 計算並顯示最終比較結果
    print("\n最終比較結果:")
    print("-" * 60)
    print("普通模型:")
    print(f"最佳驗證準確率: {max(history_normal['val_acc'])*100:.2f}%")
    print("\nRandom Erasing 模型:")
    print(f"最佳驗證準確率: {max(history_erasing['val_acc'])*100:.2f}%")
    
    print("\n訓練完成！所有模型和圖表已保存。")

    

if __name__ == '__main__':
    main()

使用設備: cuda

1. 訓練不使用 Random Erasing 的模型


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 209MB/s]



開始訓練 Normal 模型...

Epoch [1/10]


Training: 100%|██████████| 507/507 [05:06<00:00,  1.65it/s, loss=0.7801, acc=0.3750]
Validation: 100%|██████████| 57/57 [00:19<00:00,  2.98it/s, loss=0.3574, acc=1.0000]


Train Loss: 0.6142, Train Acc: 67.4753%
Val Loss: 0.6178, Val Acc: 67.2778%

Success: Model saved to model/resnet50_normal_best.pth
Verified: File exists at model/resnet50_normal_best.pth
File size: 269.52 MB
------------------------------------------------------------

Epoch [2/10]


Training: 100%|██████████| 507/507 [04:13<00:00,  2.00it/s, loss=0.4106, acc=0.8750]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.32it/s, loss=0.3981, acc=0.8750]


Train Loss: 0.5737, Train Acc: 71.7840%
Val Loss: 0.5619, Val Acc: 74.0000%

Success: Model saved to model/resnet50_normal_best.pth
Verified: File exists at model/resnet50_normal_best.pth
File size: 269.52 MB
------------------------------------------------------------

Epoch [3/10]


Training: 100%|██████████| 507/507 [04:12<00:00,  2.01it/s, loss=0.4326, acc=0.8750]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.16it/s, loss=0.3463, acc=1.0000]


Train Loss: 0.5217, Train Acc: 77.7346%
Val Loss: 0.4998, Val Acc: 80.1111%

Success: Model saved to model/resnet50_normal_best.pth
Verified: File exists at model/resnet50_normal_best.pth
File size: 269.52 MB
------------------------------------------------------------

Epoch [4/10]


Training: 100%|██████████| 507/507 [04:13<00:00,  2.00it/s, loss=0.5422, acc=0.7500]
Validation: 100%|██████████| 57/57 [00:12<00:00,  4.46it/s, loss=0.7279, acc=0.5000]


Train Loss: 0.4849, Train Acc: 81.8889%
Val Loss: 0.5238, Val Acc: 77.3889%
------------------------------------------------------------

Epoch [5/10]


Training: 100%|██████████| 507/507 [04:11<00:00,  2.01it/s, loss=0.5628, acc=0.7500]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.33it/s, loss=0.3165, acc=1.0000]


Train Loss: 0.4590, Train Acc: 84.5309%
Val Loss: 0.5534, Val Acc: 75.4444%
------------------------------------------------------------

Epoch [6/10]


Training: 100%|██████████| 507/507 [04:10<00:00,  2.02it/s, loss=0.3932, acc=0.8750]
Validation: 100%|██████████| 57/57 [00:12<00:00,  4.39it/s, loss=0.3138, acc=1.0000]


Train Loss: 0.4419, Train Acc: 86.4198%
Val Loss: 0.4320, Val Acc: 87.5556%

Success: Model saved to model/resnet50_normal_best.pth
Verified: File exists at model/resnet50_normal_best.pth
File size: 269.52 MB
------------------------------------------------------------

Epoch [7/10]


Training: 100%|██████████| 507/507 [04:11<00:00,  2.02it/s, loss=0.5998, acc=0.6250]
Validation: 100%|██████████| 57/57 [00:12<00:00,  4.41it/s, loss=0.4255, acc=0.8750]


Train Loss: 0.4314, Train Acc: 87.5185%
Val Loss: 0.4218, Val Acc: 88.8889%

Success: Model saved to model/resnet50_normal_best.pth
Verified: File exists at model/resnet50_normal_best.pth
File size: 269.52 MB
------------------------------------------------------------

Epoch [8/10]


Training: 100%|██████████| 507/507 [04:11<00:00,  2.02it/s, loss=0.5599, acc=0.7500]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.30it/s, loss=0.3165, acc=1.0000]


Train Loss: 0.4203, Train Acc: 88.6111%
Val Loss: 0.4008, Val Acc: 91.1111%

Success: Model saved to model/resnet50_normal_best.pth
Verified: File exists at model/resnet50_normal_best.pth
File size: 269.52 MB
------------------------------------------------------------

Epoch [9/10]


Training: 100%|██████████| 507/507 [04:12<00:00,  2.01it/s, loss=0.3465, acc=1.0000]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.26it/s, loss=0.4800, acc=0.8750]


Train Loss: 0.4069, Train Acc: 90.0679%
Val Loss: 0.4820, Val Acc: 82.1111%
------------------------------------------------------------

Epoch [10/10]


Training: 100%|██████████| 507/507 [04:12<00:00,  2.01it/s, loss=0.4950, acc=0.7500]
Validation: 100%|██████████| 57/57 [00:12<00:00,  4.43it/s, loss=0.4920, acc=0.7500]


Train Loss: 0.4014, Train Acc: 90.7531%
Val Loss: 0.4814, Val Acc: 81.2222%
------------------------------------------------------------

Success: Plot saved to model/normal_training_history.png
Verified: Plot file exists at model/normal_training_history.png
File size: 54.62 KB

2. 訓練使用 Random Erasing 的模型

開始訓練 RandomErasing 模型...

Epoch [1/10]


Training: 100%|██████████| 507/507 [04:16<00:00,  1.98it/s, loss=0.4371, acc=0.8750]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.30it/s, loss=0.4371, acc=0.8750]


Train Loss: 0.5140, Train Acc: 78.7284%
Val Loss: 0.4673, Val Acc: 83.6111%

Success: Model saved to model/resnet50_randomerasing_best.pth
Verified: File exists at model/resnet50_randomerasing_best.pth
File size: 269.52 MB
------------------------------------------------------------

Epoch [2/10]


Training: 100%|██████████| 507/507 [04:15<00:00,  1.98it/s, loss=0.4399, acc=0.8750]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.30it/s, loss=0.4084, acc=0.8750]


Train Loss: 0.4571, Train Acc: 84.5309%
Val Loss: 0.5235, Val Acc: 78.2222%
------------------------------------------------------------

Epoch [3/10]


Training: 100%|██████████| 507/507 [04:14<00:00,  1.99it/s, loss=0.5206, acc=0.7500]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.26it/s, loss=0.5319, acc=0.7500]


Train Loss: 0.4625, Train Acc: 84.3765%
Val Loss: 0.5218, Val Acc: 78.2778%
------------------------------------------------------------

Epoch [4/10]


Training: 100%|██████████| 507/507 [04:14<00:00,  2.00it/s, loss=0.5983, acc=0.6250]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.34it/s, loss=0.4286, acc=0.8750]


Train Loss: 0.4396, Train Acc: 86.5617%
Val Loss: 0.4224, Val Acc: 88.2778%

Success: Model saved to model/resnet50_randomerasing_best.pth
Verified: File exists at model/resnet50_randomerasing_best.pth
File size: 269.52 MB
------------------------------------------------------------

Epoch [5/10]


Training: 100%|██████████| 507/507 [04:14<00:00,  1.99it/s, loss=0.4695, acc=0.8750]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.25it/s, loss=0.3777, acc=0.8750]


Train Loss: 0.4270, Train Acc: 87.9321%
Val Loss: 0.4559, Val Acc: 84.2222%
------------------------------------------------------------

Epoch [6/10]


Training: 100%|██████████| 507/507 [04:14<00:00,  1.99it/s, loss=0.3746, acc=1.0000]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.23it/s, loss=0.3134, acc=1.0000]


Train Loss: 0.4194, Train Acc: 88.6358%
Val Loss: 0.4106, Val Acc: 89.9444%

Success: Model saved to model/resnet50_randomerasing_best.pth
Verified: File exists at model/resnet50_randomerasing_best.pth
File size: 269.52 MB
------------------------------------------------------------

Epoch [7/10]


Training: 100%|██████████| 507/507 [04:13<00:00,  2.00it/s, loss=0.3244, acc=1.0000]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.22it/s, loss=0.4860, acc=0.8750]


Train Loss: 0.4174, Train Acc: 89.1173%
Val Loss: 0.4367, Val Acc: 86.8333%
------------------------------------------------------------

Epoch [8/10]


Training: 100%|██████████| 507/507 [04:14<00:00,  1.99it/s, loss=0.3652, acc=1.0000]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.12it/s, loss=0.3133, acc=1.0000]


Train Loss: 0.4112, Train Acc: 89.8333%
Val Loss: 0.4219, Val Acc: 88.1111%
------------------------------------------------------------

Epoch [9/10]


Training: 100%|██████████| 507/507 [04:16<00:00,  1.98it/s, loss=0.4451, acc=0.8750]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.10it/s, loss=0.5362, acc=0.7500]


Train Loss: 0.4062, Train Acc: 90.3025%
Val Loss: 0.4110, Val Acc: 89.4444%
------------------------------------------------------------

Epoch [10/10]


Training: 100%|██████████| 507/507 [04:15<00:00,  1.99it/s, loss=0.3795, acc=0.8750]
Validation: 100%|██████████| 57/57 [00:13<00:00,  4.31it/s, loss=0.3153, acc=1.0000]


Train Loss: 0.4012, Train Acc: 90.6728%
Val Loss: 0.3888, Val Acc: 91.9444%

Success: Model saved to model/resnet50_randomerasing_best.pth
Verified: File exists at model/resnet50_randomerasing_best.pth
File size: 269.52 MB
------------------------------------------------------------

Success: Plot saved to model/randomerasing_training_history.png
Verified: Plot file exists at model/randomerasing_training_history.png
File size: 61.77 KB

Success: Comparison data saved to model/training_comparison.npy
Verified: Comparison file exists at model/training_comparison.npy
File size: 1.12 KB

最終比較結果:
------------------------------------------------------------
普通模型:
最佳驗證準確率: 91.11%

Random Erasing 模型:
最佳驗證準確率: 91.94%

訓練完成！所有模型和圖表已保存。


In [13]:
import os
print("model 資料夾內容：", os.listdir('model') if os.path.exists('model') else "model 資料夾不存在")


model 資料夾內容： ['training_comparison.npy', 'randomerasing_training_history.png', 'resnet50_randomerasing_best.pth', 'normal_training_history.png', 'resnet50_normal_best.pth']
