*數據準備和預處理*


In [8]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [3]:
!pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [4]:
!pwd

/home/jovyan/2025Coding_101


In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# 圖像預處理

class AccidentDataset(Dataset):
    def __init__(self, image_dir, labels_file, transform=None):
        self.image_dir = image_dir
        self.labels = pd.read_csv(labels_file)['risk'].values
        #讀tarin內的資料夾
        self.image_paths = sorted(
                  os.path.join(image_dir, i)
                  for i in os.listdir(image_dir)
                  )
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_paths = self.image_paths[idx]
        img_list = []
        for image_path in os.listdir(image_paths):
          image = cv2.imread(os.path.join(image_paths, image_path))
          image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

          if self.transform:
              image = self.transform(image)
          img_list.append(image)
        while len(img_list)<169:
          img_list.append(image)
        image_tensor = torch.stack(img_list)  # Convert list of images to tensor
        label = torch.stack([torch.tensor(self.labels[idx])])
        return image_tensor, label

transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

dataset = AccidentDataset(
    image_dir='freeway/train',
    labels_file='freeway/freeway_train.csv',
    transform=transform)


分割訓練集和測試集

In [2]:
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset

train_indices, test_indices = train_test_split(list(range(len(dataset))), test_size=0.2, random_state=42)

train_dataset = Subset(dataset, train_indices)
test_dataset = Subset(dataset, test_indices)


train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)




#分隔線

構建模型

In [1]:
#0302 New
import torch
import torch.nn as nn
import torch.nn.functional as F

class EnhancedCNN_LSTM(nn.Module):
    def __init__(self, num_classes, input_size=(224, 224)):
        super(EnhancedCNN_LSTM, self).__init__()
        # Assuming input size is 224x224 (common for CNNs), adjust if different
        self.input_size = input_size
        
        # Depthwise Separable Conv to reduce parameters
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = self._make_dw_conv(64, 128)
        self.conv3 = self._make_dw_conv(128, 128)
        self.conv4 = self._make_dw_conv(128, 256)
        self.conv5 = nn.Conv2d(256, 256, kernel_size=1, stride=1, bias=False)  # Pointwise conv for efficiency
        self.bn5 = nn.BatchNorm2d(256)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        # Compute CNN output size dynamically
        self._cnn_output_size = self._get_cnn_output_size()
        self.fc1 = nn.Linear(self._cnn_output_size, 512)
        self.dropout = nn.Dropout(0.3)  # Add dropout for regularization

        # LSTM with bidirectional for better sequence modeling
        self.lstm = nn.LSTM(input_size=512, hidden_size=256, num_layers=2, 
                           batch_first=True, bidirectional=True)
        self.fc2 = nn.Linear(256 * 2, num_classes)  # *2 due to bidirectional

    def _make_dw_conv(self, in_channels, out_channels):
        """Depthwise separable convolution block"""
        return nn.Sequential(
            nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, 
                     groups=in_channels, bias=False),  # Depthwise
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),  # Pointwise
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def _get_cnn_output_size(self):
        """Calculate the flattened size after CNN layers"""
        with torch.no_grad():
            x = torch.zeros(1, 3, *self.input_size)  # Dummy input
            x = self.pool(F.relu(self.bn1(self.conv1(x))))
            x = self.pool(self.conv2(x))
            x = self.pool(self.conv3(x))
            x = self.pool(self.conv4(x))
            x = self.pool(F.relu(self.bn5(self.conv5(x))))
            return x.view(1, -1).size(1)

    def forward(self, x):
        batch_size, timesteps, C, H, W = x.size()
        c_in = x.view(batch_size * timesteps, C, H, W)

        # CNN with residual-like behavior via careful design
        x = self.pool(F.relu(self.bn1(self.conv1(c_in))))
        x = self.pool(self.conv2(x))
        x = self.pool(self.conv3(x))
        x = self.pool(self.conv4(x))
        x = self.pool(F.relu(self.bn5(self.conv5(x))))

        # Flatten and fully connected
        x = x.view(batch_size * timesteps, -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)

        # LSTM
        x = x.view(batch_size, timesteps, -1)
        x, _ = self.lstm(x)
        x = self.fc2(x[:, -1, :])  # Take the last timestep
        return torch.sigmoid(x)  # Assuming binary/multilabel classification

# Instantiate the model
model = EnhancedCNN_LSTM(num_classes=1, input_size=(224, 224))

# Optional: Enable mixed precision training
model = model.half()  # Use FP16 for lower memory usage

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNN_LSTM(nn.Module):
    def __init__(self, num_classes):
        super(CNN_LSTM, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        self.fc1 = nn.Linear(256 * 7 * 7, 512)

        self.lstm = nn.LSTM(input_size=512, hidden_size=256, num_layers=2, batch_first=True)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        batch_size, timesteps, C, H, W = x.size()
        c_in = x.view(batch_size * timesteps, C, H, W)
        x = self.pool(F.relu(self.conv1(c_in)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = self.pool(F.relu(self.conv5(x)))
        x = F.relu(self.fc1(x.view(batch_size * timesteps,-1)))
        x = x.view(batch_size, timesteps, -1)
        x, _ = self.lstm(x)
        x = torch.sigmoid(self.fc2(x[:, -1, :]))
        return x
model = CNN_LSTM(1)


In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.get_device_name(0))

NVIDIA GeForce RTX 4070 Ti


In [9]:
!pip3 install torch torchvision torchaudio

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [4]:
model = model.to(device)

訓練模型

In [6]:
import torch.optim as optim
from tqdm import tqdm

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader):
      #outputs = model(images.to(device))
      images, labels = images.to(device), labels.to('cuda').float().view(-1, 1)
      #images, labels = images.to(device), labels.to(device).float().view(-1, 1)  # 確保 labels 轉為 float
      #images, labels = model(images.to(device)), labels(images.to(device)).view(-1, 1)
      #images, labels = images.to(device), labels.view(-1,1)
      optimizer.zero_grad()
      outputs = model(images)  # 直接輸出 logits，讓 BCEWithLogitsLoss 處理 Sigmoid
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()
      running_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')


  0%|          | 0/144 [00:00<?, ?it/s]


RuntimeError: Input type (torch.cuda.FloatTensor) and weight type (torch.cuda.HalfTensor) should be the same

In [8]:
#GPU 顯示版
import torch
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm
from torch.amp import GradScaler, autocast
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score
import os
from PIL import Image
import numpy as np

# 設置記憶體優化環境變數，以減少記憶體碎片
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# 設置設備
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 輕量化的模型定義
class EnhancedCNN_LSTM(nn.Module):
    def __init__(self, num_classes, input_size=(128, 128)):  # 減少輸入尺寸
        super(EnhancedCNN_LSTM, self).__init__()
        self.input_size = input_size
        
        # 輕量化的卷積神經網路 (CNN)
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)  # 減少濾波器數量
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.dropout = nn.Dropout(0.3)  # 添加Dropout以防止過擬合

        # 計算扁平化後的大小，對於128x128輸入：128 / 8 = 16
        self.fc1 = nn.Linear(128 * 16 * 16, 512)  # 減少全連接層大小
        
        # 輕量化的雙向LSTM
        self.lstm = nn.LSTM(input_size=512, hidden_size=256, num_layers=2, 
                           batch_first=True, bidirectional=True, dropout=0.3)
        self.fc2 = nn.Linear(256 * 2, num_classes)  # 雙向LSTM輸出維度加倍

    def forward(self, x):
        batch_size, timesteps, C, H, W = x.size()
        c_in = x.view(batch_size * timesteps, C, H, W)
        
        x = F.relu(self.bn1(self.conv1(c_in)))
        x = self.pool(x)
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.pool(x)
        x = self.dropout(x)
        
        x = x.view(batch_size * timesteps, -1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        
        x = x.view(batch_size, timesteps, -1)
        x, _ = self.lstm(x)
        x = self.fc2(x[:, -1, :])  # 輸出原始logits
        return x

# 實例化模型
model = EnhancedCNN_LSTM(num_classes=1, input_size=(128, 128)).to(device)

# 驗證模型參數為FP32
for param in model.parameters():
    assert param.dtype == torch.float32, "模型參數必須為FP32"

# 損失函數與優化器
criterion = nn.BCEWithLogitsLoss()  # 使用BCEWithLogitsLoss處理原始logits
optimizer = optim.AdamW(model.parameters(), lr=0.0005, weight_decay=1e-4)  # 較低學習率
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)  # 動態調整學習率

# 混合精度訓練的GradScaler
scaler = GradScaler('cuda')

# 資料增強
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # 調整圖像大小以匹配模型輸入
    transforms.RandomHorizontalFlip(),  # 隨機水平翻轉
    transforms.RandomRotation(10),  # 隨機旋轉10度
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # 隨機調整亮度與對比
    transforms.ToTensor(),  # 轉為Tensor
])

# 示例資料集類（模擬PIL Image格式）
class DummyDataset(Dataset):
    def __init__(self, num_samples, timesteps=10, transform=None):
        self.num_samples = num_samples
        self.timesteps = timesteps
        self.transform = transform
    
    def __len__(self):
        return self.num_samples
    
    def __getitem__(self, idx):
        # 模擬圖像數據：生成隨機NumPy陣列並轉為PIL Image
        images = []
        for t in range(self.timesteps):
            # 生成隨機NumPy陣列，範圍0-255，模擬RGB圖像
            img = np.random.randint(0, 256, (128, 128, 3), dtype=np.uint8)
            img = Image.fromarray(img)  # 轉為PIL Image
            if self.transform:
                img = self.transform(img)
            images.append(img)
        image = torch.stack(images)  # 堆疊為 [timesteps, 3, 128, 128]
        label = torch.randint(0, 2, (1,)).float()  # 隨機生成0或1的標籤
        return image, label

# 創建訓練和驗證資料載入器
train_dataset = DummyDataset(num_samples=1152, timesteps=10, transform=transform)  # 1152 / 4 = 288 batches
val_dataset = DummyDataset(num_samples=288, timesteps=10, transform=transforms.ToTensor())
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)

# 訓練設置
num_epochs = 150  # 訓練20個epoch
accumulation_steps = 4  # 梯度累積步數
batch_size = 1  # 減少批次大小以適應記憶體限制

for epoch in range(num_epochs):
    model.train()  # 設置模型為訓練模式
    running_loss = 0.0
    optimizer.zero_grad()  # 清空梯度

    for i, (images, labels) in enumerate(tqdm(train_loader)):
        images = images.to(device, dtype=torch.float32)  # 確保輸入為FP32
        labels = labels.to(device, dtype=torch.float32).view(-1, 1)

        with torch.amp.autocast(device_type='cuda'):  # 混合精度前向傳播
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss = loss / accumulation_steps  # 標準化損失以進行梯度累積

        scaler.scale(loss).backward()  # 反向傳播
        if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
            scaler.step(optimizer)  # 更新參數
            scaler.update()
            optimizer.zero_grad()  # 清空梯度

        running_loss += loss.item() * accumulation_steps  # 累加損失

    avg_train_loss = running_loss / len(train_loader)  # 計算平均訓練損失

    # 驗證階段
    model.eval()  # 設置模型為評估模式
    val_preds, val_labels = [], []
    val_loss = 0.0
    with torch.no_grad():  # 禁用梯度計算
        for images, labels in val_loader:
            images = images.to(device, dtype=torch.float32)
            labels = labels.to(device, dtype=torch.float32).view(-1, 1)
            with torch.amp.autocast(device_type='cuda'):
                outputs = model(images)
                val_loss += criterion(outputs, labels).item()
            preds = torch.sigmoid(outputs).cpu().numpy() > 0.5  # 預測二分類結果
            val_preds.extend(preds.flatten())
            val_labels.extend(labels.cpu().numpy().flatten())

    avg_val_loss = val_loss / len(val_loader)  # 計算平均驗證損失
    val_accuracy = accuracy_score(val_labels, val_preds) * 100  # 修正為比較預測與標籤
    scheduler.step(avg_val_loss)  # 根據驗證損失調整學習率

    print(f'第 {epoch+1}/{num_epochs} 個Epoch')
    print(f'訓練損失: {avg_train_loss:.4f}, 驗證損失: {avg_val_loss:.4f}')
    print(f'驗證準確率: {val_accuracy:.2f}%')
    print(f'學習率: {scheduler.optimizer.param_groups[0]["lr"]:.6f}')
    print(f'GPU記憶體使用量: {torch.cuda.memory_allocated() / 1024**3:.2f} GB')

# 保存模型
torch.save(model.state_dict(), 'enhanced_cnn_lstm.pth')

100%|██████████| 288/288 [00:09<00:00, 31.47it/s]


第 1/150 個Epoch
訓練損失: 0.7016, 驗證損失: 0.7035
驗證準確率: 46.88%
學習率: 0.000500
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.62it/s]


第 2/150 個Epoch
訓練損失: 0.7002, 驗證損失: 0.6958
驗證準確率: 50.35%
學習率: 0.000500
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.16it/s]


第 3/150 個Epoch
訓練損失: 0.6959, 驗證損失: 0.6967
驗證準確率: 49.65%
學習率: 0.000500
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.73it/s]


第 4/150 個Epoch
訓練損失: 0.6959, 驗證損失: 0.6952
驗證準確率: 48.26%
學習率: 0.000500
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.23it/s]


第 5/150 個Epoch
訓練損失: 0.6945, 驗證損失: 0.6948
驗證準確率: 50.00%
學習率: 0.000500
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.25it/s]


第 6/150 個Epoch
訓練損失: 0.6947, 驗證損失: 0.7029
驗證準確率: 46.53%
學習率: 0.000500
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 37.24it/s]


第 7/150 個Epoch
訓練損失: 0.6952, 驗證損失: 0.6941
驗證準確率: 43.40%
學習率: 0.000500
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.01it/s]


第 8/150 個Epoch
訓練損失: 0.6933, 驗證損失: 0.6914
驗證準確率: 52.78%
學習率: 0.000500
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 33.17it/s]


第 9/150 個Epoch
訓練損失: 0.6937, 驗證損失: 0.6961
驗證準確率: 47.57%
學習率: 0.000500
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 33.52it/s]


第 10/150 個Epoch
訓練損失: 0.6947, 驗證損失: 0.6946
驗證準確率: 47.22%
學習率: 0.000500
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.22it/s]


第 11/150 個Epoch
訓練損失: 0.6948, 驗證損失: 0.6932
驗證準確率: 47.92%
學習率: 0.000250
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.58it/s]


第 12/150 個Epoch
訓練損失: 0.6933, 驗證損失: 0.6931
驗證準確率: 53.12%
學習率: 0.000250
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.38it/s]


第 13/150 個Epoch
訓練損失: 0.6945, 驗證損失: 0.6949
驗證準確率: 46.88%
學習率: 0.000250
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.10it/s]


第 14/150 個Epoch
訓練損失: 0.6935, 驗證損失: 0.6937
驗證準確率: 45.49%
學習率: 0.000125
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.14it/s]


第 15/150 個Epoch
訓練損失: 0.6932, 驗證損失: 0.6930
驗證準確率: 50.69%
學習率: 0.000125
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.36it/s]


第 16/150 個Epoch
訓練損失: 0.6931, 驗證損失: 0.6935
驗證準確率: 47.92%
學習率: 0.000125
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.61it/s]


第 17/150 個Epoch
訓練損失: 0.6931, 驗證損失: 0.6931
驗證準確率: 50.00%
學習率: 0.000063
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.40it/s]


第 18/150 個Epoch
訓練損失: 0.6928, 驗證損失: 0.6927
驗證準確率: 55.21%
學習率: 0.000063
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.22it/s]


第 19/150 個Epoch
訓練損失: 0.6929, 驗證損失: 0.6929
驗證準確率: 52.08%
學習率: 0.000063
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 36.00it/s]


第 20/150 個Epoch
訓練損失: 0.6938, 驗證損失: 0.6932
驗證準確率: 50.35%
學習率: 0.000031
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.66it/s]


第 21/150 個Epoch
訓練損失: 0.6927, 驗證損失: 0.6928
驗證準確率: 50.69%
學習率: 0.000031
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.85it/s]


第 22/150 個Epoch
訓練損失: 0.6932, 驗證損失: 0.6930
驗證準確率: 52.08%
學習率: 0.000031
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.10it/s]


第 23/150 個Epoch
訓練損失: 0.6941, 驗證損失: 0.6933
驗證準確率: 48.96%
學習率: 0.000016
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 33.77it/s]


第 24/150 個Epoch
訓練損失: 0.6931, 驗證損失: 0.6925
驗證準確率: 53.12%
學習率: 0.000016
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.95it/s]


第 25/150 個Epoch
訓練損失: 0.6923, 驗證損失: 0.6928
驗證準確率: 51.74%
學習率: 0.000016
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.62it/s]


第 26/150 個Epoch
訓練損失: 0.6929, 驗證損失: 0.6944
驗證準確率: 46.53%
學習率: 0.000008
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.99it/s]


第 27/150 個Epoch
訓練損失: 0.6934, 驗證損失: 0.6932
驗證準確率: 49.65%
學習率: 0.000008
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.36it/s]


第 28/150 個Epoch
訓練損失: 0.6934, 驗證損失: 0.6925
驗證準確率: 51.39%
學習率: 0.000008
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 32.92it/s]


第 29/150 個Epoch
訓練損失: 0.6935, 驗證損失: 0.6940
驗證準確率: 46.53%
學習率: 0.000004
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.33it/s]


第 30/150 個Epoch
訓練損失: 0.6933, 驗證損失: 0.6947
驗證準確率: 45.83%
學習率: 0.000004
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.66it/s]


第 31/150 個Epoch
訓練損失: 0.6926, 驗證損失: 0.6932
驗證準確率: 50.69%
學習率: 0.000004
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.92it/s]


第 32/150 個Epoch
訓練損失: 0.6932, 驗證損失: 0.6934
驗證準確率: 50.35%
學習率: 0.000002
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.17it/s]


第 33/150 個Epoch
訓練損失: 0.6933, 驗證損失: 0.6931
驗證準確率: 49.65%
學習率: 0.000002
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.66it/s]


第 34/150 個Epoch
訓練損失: 0.6944, 驗證損失: 0.6923
驗證準確率: 53.47%
學習率: 0.000002
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.63it/s]


第 35/150 個Epoch
訓練損失: 0.6931, 驗證損失: 0.6934
驗證準確率: 48.61%
學習率: 0.000001
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.10it/s]


第 36/150 個Epoch
訓練損失: 0.6933, 驗證損失: 0.6931
驗證準確率: 51.04%
學習率: 0.000001
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.76it/s]


第 37/150 個Epoch
訓練損失: 0.6931, 驗證損失: 0.6931
驗證準確率: 51.04%
學習率: 0.000001
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.39it/s]


第 38/150 個Epoch
訓練損失: 0.6938, 驗證損失: 0.6932
驗證準確率: 52.08%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 33.58it/s]


第 39/150 個Epoch
訓練損失: 0.6929, 驗證損失: 0.6942
驗證準確率: 47.22%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.77it/s]


第 40/150 個Epoch
訓練損失: 0.6934, 驗證損失: 0.6943
驗證準確率: 47.57%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.16it/s]


第 41/150 個Epoch
訓練損失: 0.6940, 驗證損失: 0.6925
驗證準確率: 52.78%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.44it/s]


第 42/150 個Epoch
訓練損失: 0.6931, 驗證損失: 0.6922
驗證準確率: 54.51%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.00it/s]


第 43/150 個Epoch
訓練損失: 0.6935, 驗證損失: 0.6926
驗證準確率: 53.12%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.93it/s]


第 44/150 個Epoch
訓練損失: 0.6934, 驗證損失: 0.6928
驗證準確率: 51.74%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.57it/s]


第 45/150 個Epoch
訓練損失: 0.6926, 驗證損失: 0.6924
驗證準確率: 52.43%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:09<00:00, 31.63it/s]


第 46/150 個Epoch
訓練損失: 0.6936, 驗證損失: 0.6941
驗證準確率: 46.53%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.17it/s]


第 47/150 個Epoch
訓練損失: 0.6932, 驗證損失: 0.6929
驗證準確率: 52.08%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.12it/s]


第 48/150 個Epoch
訓練損失: 0.6923, 驗證損失: 0.6932
驗證準確率: 47.92%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.26it/s]


第 49/150 個Epoch
訓練損失: 0.6934, 驗證損失: 0.6928
驗證準確率: 51.74%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.39it/s]


第 50/150 個Epoch
訓練損失: 0.6936, 驗證損失: 0.6931
驗證準確率: 50.35%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 36.17it/s]


第 51/150 個Epoch
訓練損失: 0.6933, 驗證損失: 0.6939
驗證準確率: 47.22%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.14it/s]


第 52/150 個Epoch
訓練損失: 0.6936, 驗證損失: 0.6938
驗證準確率: 47.57%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.19it/s]


第 53/150 個Epoch
訓練損失: 0.6936, 驗證損失: 0.6936
驗證準確率: 48.26%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 34.79it/s]


第 54/150 個Epoch
訓練損失: 0.6934, 驗證損失: 0.6935
驗證準確率: 46.18%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:07<00:00, 37.58it/s]


第 55/150 個Epoch
訓練損失: 0.6934, 驗證損失: 0.6928
驗證準確率: 51.74%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 33.12it/s]


第 56/150 個Epoch
訓練損失: 0.6933, 驗證損失: 0.6935
驗證準確率: 47.57%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


100%|██████████| 288/288 [00:08<00:00, 35.37it/s]


第 57/150 個Epoch
訓練損失: 0.6931, 驗證損失: 0.6933
驗證準確率: 48.96%
學習率: 0.000000
GPU記憶體使用量: 0.25 GB


 90%|████████▉ | 259/288 [00:07<00:00, 34.34it/s]


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/opt/conda/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_3250/216947866.py", line 129, in <module>
    for i, (images, labels) in enumerate(tqdm(train_loader)):
  File "/opt/conda/lib/python3.11/site-packages/tqdm/std.py", line 1182, in __iter__
    for obj in iterable:
  File "/opt/conda/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 701, in __next__
    data = self._next_data()
           ^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 757, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
           ^^^^^^

In [12]:
#0302 Claude
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from torch.amp import GradScaler, autocast
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score
import os
from PIL import Image
import numpy as np

# 設置記憶體優化環境變數，以減少記憶體碎片
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# 設置設備
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 強化版 CNN-LSTM 模型
class EnhancedCNN_LSTM(nn.Module):
    def __init__(self, num_classes, input_size=(128, 128)):
        super(EnhancedCNN_LSTM, self).__init__()
        self.input_size = input_size
        
        # 增強的卷積神經網路 (CNN) - 使用 ResNet 風格的殘差連接
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        
        # 第一個殘差區塊
        self.conv2a = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2a = nn.BatchNorm2d(64)
        self.conv2b = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2b = nn.BatchNorm2d(64)
        self.downsample1 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0)  # 添加池化層使尺寸匹配
        )
        
        # 第二個殘差區塊
        self.conv3a = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn3a = nn.BatchNorm2d(128)
        self.conv3b = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn3b = nn.BatchNorm2d(128)
        self.downsample2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0)  # 添加池化層使尺寸匹配
        )
        
        # 共享池化和 Dropout 層
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.dropout = nn.Dropout(0.4)  # 增加 Dropout 比例以減少過擬合
        
        # 空間注意力機制
        self.attention_conv = nn.Conv2d(128, 1, kernel_size=1)
        
        # 計算特徵大小：128x128 經過 3 次 pool 後變為 16x16
        # feature_size = input_size[0] // (2**3)
        feature_size = 16
        self.fc1 = nn.Linear(128 * feature_size * feature_size, 512)
        
        # 增強型雙向 LSTM
        self.lstm = nn.LSTM(
            input_size=512, 
            hidden_size=256, 
            num_layers=2, 
            batch_first=True, 
            bidirectional=True, 
            dropout=0.4
        )
        
        # 最終分類層
        self.fc2 = nn.Linear(256 * 2, 128)  # 添加中間層
        self.fc3 = nn.Linear(128, num_classes)
        
        # 批次正規化
        self.bn_fc1 = nn.BatchNorm1d(512)
        self.bn_fc2 = nn.BatchNorm1d(128)

    def forward(self, x):
        batch_size, timesteps, C, H, W = x.size()
        c_in = x.view(batch_size * timesteps, C, H, W)
        
        # 初始卷積層
        x = F.relu(self.bn1(self.conv1(c_in)))
        
        # 第一個殘差區塊
        identity = self.downsample1(x)
        x = self.pool(x)
        x = F.relu(self.bn2a(self.conv2a(x)))
        x = self.bn2b(self.conv2b(x))
        x = F.relu(x + identity)
        
        # 第二個殘差區塊
        identity = self.downsample2(x)
        x = self.pool(x)
        x = F.relu(self.bn3a(self.conv3a(x)))
        x = self.bn3b(self.conv3b(x))
        x = F.relu(x + identity)
        x = self.pool(x)
        
        # 空間注意力機制
        attention = torch.sigmoid(self.attention_conv(x))
        x = x * attention
        
        # Dropout 降低過擬合風險
        x = self.dropout(x)
        
        # 扁平化
        x = x.view(batch_size * timesteps, -1)
        
        # 全連接層
        x = F.relu(self.bn_fc1(self.fc1(x)))
        x = self.dropout(x)
        
        # 重塑為序列資料
        x = x.view(batch_size, timesteps, -1)
        
        # 應用 LSTM
        x, _ = self.lstm(x)
        
        # 使用最後一個時間步的輸出進行分類
        x = F.relu(self.bn_fc2(self.fc2(x[:, -1, :])))
        x = self.dropout(x)
        x = self.fc3(x)
        
        return x

# 自定義損失函數
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.bce = nn.BCEWithLogitsLoss(reduction='none')
        
    def forward(self, inputs, targets):
        BCE_loss = self.bce(inputs, targets)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss
        return F_loss.mean()

# 自定義資料集增強
class SequenceDataset(Dataset):
    def __init__(self, num_samples, timesteps=10, transform=None, augment=False):
        self.num_samples = num_samples
        self.timesteps = timesteps
        self.transform = transform
        self.augment = augment
    
    def __len__(self):
        return self.num_samples
    
    def __getitem__(self, idx):
        # 模擬時序圖像資料
        images = []
        for t in range(self.timesteps):
            # 生成類模擬資料，實際應用中替換為真實圖像讀取
            img = np.random.randint(0, 256, (128, 128, 3), dtype=np.uint8)
            img = Image.fromarray(img)
            
            if self.transform:
                img = self.transform(img)
            images.append(img)
            
        image_seq = torch.stack(images)  # [timesteps, 3, 128, 128]
        
        # 模擬標籤：正負樣本比例約為 1:1
        label = torch.FloatTensor([1.0 if np.random.random() > 0.5 else 0.0])
        
        return image_seq, label

# 實例化模型
model = EnhancedCNN_LSTM(num_classes=1, input_size=(128, 128)).to(device)

# 檢查模型參數類型
for param in model.parameters():
    assert param.dtype == torch.float32, "模型參數必須為 FP32"

# 聯合損失函數：結合 Focal Loss 和傳統 BCE Loss
criterion = FocalLoss(alpha=0.25, gamma=2.0)

# 優化器設定：使用 Lion 優化器，通常優於 AdamW
from torch.optim import AdamW

optimizer = AdamW(
    model.parameters(),
    lr=0.001,  # 稍高的初始學習率
    weight_decay=2e-5,  # 輕微增加權重衰減以減少過擬合
    betas=(0.9, 0.999)  # 預設動量值
)

# 進階學習率調度器：OneCycleLR 通常比 ReduceLROnPlateau 效果好
scheduler = optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=0.002,  # 最大學習率
    total_steps=20 * (1152 // 4),  # epochs * batches_per_epoch
    pct_start=0.3,  # 預熱階段佔總步數的比例
    div_factor=10.0,  # 初始學習率 = max_lr / div_factor
    final_div_factor=100.0  # 最終學習率 = max_lr / final_div_factor
)

# 混合精度訓練配置
scaler = GradScaler(enabled=True)

# 擴展資料增強管道
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomResizedCrop(128, scale=(0.8, 1.0)),  # 隨機剪裁
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.1, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # 隨機平移
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet 標準化
])

val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 建立資料集和資料載入器
train_dataset = SequenceDataset(num_samples=1152, timesteps=10, transform=train_transform, augment=True)
val_dataset = SequenceDataset(num_samples=288, timesteps=10, transform=val_transform, augment=False)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=2, pin_memory=True)

# 模型訓練設定
num_epochs = 20
accumulation_steps = 4  # 梯度累積以模擬更大批次

# 輸入 F 模組
import torch.nn.functional as F

# 訓練記錄
best_val_accuracy = 0.0
patience_counter = 0
patience = 5  # 提前停止的耐心值

# 訓練迴圈
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    train_preds, train_labels = [], []
    
    # 清空梯度
    optimizer.zero_grad()
    
    # 訓練階段
    for i, (images, labels) in enumerate(tqdm(train_loader)):
        images = images.to(device, dtype=torch.float32)
        labels = labels.to(device, dtype=torch.float32).view(-1, 1)
        
        # 前向傳播（使用混合精度）
        with autocast(device_type='cuda'):
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss = loss / accumulation_steps  # 梯度累積
        
        # 反向傳播
        scaler.scale(loss).backward()
        
        # 計算訓練指標
        with torch.no_grad():
            preds = torch.sigmoid(outputs).cpu().numpy() > 0.5
            train_preds.extend(preds.flatten())
            train_labels.extend(labels.cpu().numpy().flatten())
        
        # 批次累積更新
        if (i + 1) % accumulation_steps == 0 or (i + 1) == len(train_loader):
            # 梯度裁剪以防止梯度爆炸
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            # 更新參數
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            
            # 更新學習率（每批次更新）
            scheduler.step()
        
        running_loss += loss.item() * accumulation_steps
    
    # 計算平均訓練損失和準確率
    avg_train_loss = running_loss / len(train_loader)
    train_accuracy = accuracy_score(train_labels, train_preds) * 100
    
    # 驗證階段
    model.eval()
    val_loss = 0.0
    val_preds, val_labels = [], []
    
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device, dtype=torch.float32)
            labels = labels.to(device, dtype=torch.float32).view(-1, 1)
            
            with autocast(device_type='cuda'):
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            
            preds = torch.sigmoid(outputs).cpu().numpy() > 0.5
            val_preds.extend(preds.flatten())
            val_labels.extend(labels.cpu().numpy().flatten())
    
    # 計算驗證指標
    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = accuracy_score(val_labels, val_preds) * 100
    
    # 提前停止檢查
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        patience_counter = 0
        # 儲存最佳模型
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'val_accuracy': val_accuracy,
        }, 'best_enhanced_cnn_lstm.pth')
        print(f'保存最佳模型，驗證準確率: {val_accuracy:.2f}%')
    else:
        patience_counter += 1
    
    # 列印訓練資訊
    print(f'第 {epoch+1}/{num_epochs} 個 Epoch')
    print(f'訓練損失: {avg_train_loss:.4f}, 訓練準確率: {train_accuracy:.2f}%')
    print(f'驗證損失: {avg_val_loss:.4f}, 驗證準確率: {val_accuracy:.2f}%')
    print(f'學習率: {optimizer.param_groups[0]["lr"]:.6f}')
    print(f'GPU記憶體使用量: {torch.cuda.memory_allocated() / 1024**3:.2f} GB')
    
    # # 提前停止
    # if patience_counter >= patience:
    #     print(f'驗證準確率已 {patience} 個 epoch 未改善，提前停止訓練')
    #     break

# 載入最佳模型進行最終評估
# 使用 weights_only=True 以符合 PyTorch 未來的安全建議
checkpoint = torch.load('best_enhanced_cnn_lstm.pth', weights_only=True, map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])

# 最終驗證
model.eval()
final_preds, final_labels = [], []

with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device, dtype=torch.float32)
        labels = labels.to(device, dtype=torch.float32).view(-1, 1)
        
        outputs = model(images)
        preds = torch.sigmoid(outputs).cpu().numpy() > 0.5
        
        final_preds.extend(preds.flatten())
        final_labels.extend(labels.cpu().numpy().flatten())

final_accuracy = accuracy_score(final_labels, final_preds) * 100
print(f'最終模型驗證準確率: {final_accuracy:.2f}%')

100%|██████████| 288/288 [00:07<00:00, 38.15it/s]


保存最佳模型，驗證準確率: 44.44%
第 1/20 個 Epoch
訓練損失: 0.0528, 訓練準確率: 51.22%
驗證損失: 0.0459, 驗證準確率: 44.44%
學習率: 0.000208
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.01it/s]


保存最佳模型，驗證準確率: 48.61%
第 2/20 個 Epoch
訓練損失: 0.0516, 訓練準確率: 49.91%
驗證損失: 0.0466, 驗證準確率: 48.61%
學習率: 0.000231
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.06it/s]


第 3/20 個 Epoch
訓練損失: 0.0514, 訓練準確率: 49.13%
驗證損失: 0.0438, 驗證準確率: 47.22%
學習率: 0.000269
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 37.92it/s]


保存最佳模型，驗證準確率: 53.12%
第 4/20 個 Epoch
訓練損失: 0.0507, 訓練準確率: 45.83%
驗證損失: 0.0445, 驗證準確率: 53.12%
學習率: 0.000321
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 37.93it/s]


第 5/20 個 Epoch
訓練損失: 0.0475, 訓練準確率: 50.78%
驗證損失: 0.0442, 驗證準確率: 43.75%
學習率: 0.000386
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.15it/s]


第 6/20 個 Epoch
訓練損失: 0.0493, 訓練準確率: 46.70%
驗證損失: 0.0433, 驗證準確率: 51.74%
學習率: 0.000464
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.15it/s]


保存最佳模型，驗證準確率: 55.56%
第 7/20 個 Epoch
訓練損失: 0.0469, 訓練準確率: 50.61%
驗證損失: 0.0431, 驗證準確率: 55.56%
學習率: 0.000552
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 37.97it/s]


第 8/20 個 Epoch
訓練損失: 0.0459, 訓練準確率: 51.65%
驗證損失: 0.0468, 驗證準確率: 45.83%
學習率: 0.000650
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.24it/s]


第 9/20 個 Epoch
訓練損失: 0.0459, 訓練準確率: 48.87%
驗證損失: 0.0440, 驗證準確率: 51.74%
學習率: 0.000756
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.07it/s]


第 10/20 個 Epoch
訓練損失: 0.0457, 訓練準確率: 49.05%
驗證損失: 0.0494, 驗證準確率: 46.18%
學習率: 0.000868
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 37.87it/s]


第 11/20 個 Epoch
訓練損失: 0.0452, 訓練準確率: 49.65%
驗證損失: 0.0437, 驗證準確率: 48.61%
學習率: 0.000983
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.14it/s]


第 12/20 個 Epoch
訓練損失: 0.0455, 訓練準確率: 50.09%
驗證損失: 0.0433, 驗證準確率: 50.35%
學習率: 0.001101
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.13it/s]


第 13/20 個 Epoch
訓練損失: 0.0443, 訓練準確率: 52.52%
驗證損失: 0.0447, 驗證準確率: 50.35%
學習率: 0.001218
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.25it/s]


第 14/20 個 Epoch
訓練損失: 0.0447, 訓練準確率: 50.87%
驗證損失: 0.0437, 驗證準確率: 47.22%
學習率: 0.001334
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.24it/s]


第 15/20 個 Epoch
訓練損失: 0.0442, 訓練準確率: 51.22%
驗證損失: 0.0445, 驗證準確率: 52.78%
學習率: 0.001445
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.07it/s]


第 16/20 個 Epoch
訓練損失: 0.0441, 訓練準確率: 50.26%
驗證損失: 0.0434, 驗證準確率: 53.12%
學習率: 0.001551
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.14it/s]


第 17/20 個 Epoch
訓練損失: 0.0445, 訓練準確率: 47.31%
驗證損失: 0.0433, 驗證準確率: 49.31%
學習率: 0.001649
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.12it/s]


第 18/20 個 Epoch
訓練損失: 0.0440, 訓練準確率: 50.00%
驗證損失: 0.0437, 驗證準確率: 51.74%
學習率: 0.001737
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 37.79it/s]


第 19/20 個 Epoch
訓練損失: 0.0442, 訓練準確率: 49.31%
驗證損失: 0.0444, 驗證準確率: 44.44%
學習率: 0.001815
GPU記憶體使用量: 0.93 GB


100%|██████████| 288/288 [00:07<00:00, 38.12it/s]


第 20/20 個 Epoch
訓練損失: 0.0445, 訓練準確率: 48.78%
驗證損失: 0.0474, 驗證準確率: 52.43%
學習率: 0.001880
GPU記憶體使用量: 0.93 GB
最終模型驗證準確率: 51.74%


In [13]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from torch.amp import GradScaler, autocast
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

# 設置記憶體優化環境變數
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# 設置設備
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用設備: {device}")

# 深度優化版 CNN-LSTM 模型
class DeepCNN_LSTM(nn.Module):
    def __init__(self, num_classes=1, input_size=(128, 128)):
        super(DeepCNN_LSTM, self).__init__()
        
        # 基礎配置
        self.input_size = input_size
        
        # 初始卷積層 - 使用較小的卷積核和較少的通道數
        self.conv_init = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)  # 降到 64x64
        )
        
        # 第一個雙重卷積區塊
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)  # 降到 32x32
        )
        
        # 第二個雙重卷積區塊
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)  # 降到 16x16
        )
        
        # 第三個雙重卷積區塊
        self.conv_block3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)  # 降到 8x8
        )
        
        # 計算 CNN 輸出尺寸
        self.cnn_output_size = 128 * 8 * 8  # 128 通道, 8x8 特徵圖
        
        # Dropout 和全連接層
        self.dropout1 = nn.Dropout(0.5)  # 更高的 dropout 率
        self.fc = nn.Linear(self.cnn_output_size, 256)
        self.bn_fc = nn.BatchNorm1d(256)
        
        # LSTM 部分
        self.lstm = nn.LSTM(
            input_size=256,
            hidden_size=128,
            num_layers=1,  # 減少層數
            batch_first=True,
            bidirectional=True
        )
        
        # 分類器
        self.dropout2 = nn.Dropout(0.5)
        self.classifier = nn.Sequential(
            nn.Linear(256, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(64, num_classes)
        )
        
        # 模型初始化
        self._initialize_weights()
    
    def _initialize_weights(self):
        """初始化模型權重"""
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        # 輸入形狀: [batch_size, timesteps, channels, height, width]
        batch_size, timesteps, C, H, W = x.size()
        
        # 重塑並單獨處理每個時間步長
        c_in = x.view(batch_size * timesteps, C, H, W)
        
        # 通過 CNN 特徵提取器
        c_out = self.conv_init(c_in)
        c_out = self.conv_block1(c_out)
        c_out = self.conv_block2(c_out)
        c_out = self.conv_block3(c_out)
        
        # 攤平
        c_out = c_out.view(batch_size * timesteps, -1)
        
        # 應用 Dropout
        c_out = self.dropout1(c_out)
        
        # 通過全連接層
        c_out = self.fc(c_out)
        c_out = self.bn_fc(c_out)
        c_out = F.relu(c_out)
        
        # 重塑為序列
        r_out = c_out.view(batch_size, timesteps, -1)
        
        # 通過 LSTM
        r_out, _ = self.lstm(r_out)
        
        # 僅使用最後一個時間步的輸出
        r_out = r_out[:, -1, :]
        
        # 應用 Dropout
        r_out = self.dropout2(r_out)
        
        # 最終分類
        output = self.classifier(r_out)
        
        return output

# 設置自定義資料集類別
class ImprovedSequenceDataset(Dataset):
    def __init__(self, num_samples, timesteps=10, transform=None, seed=42):
        """
        改進版資料集：產生更具鑑別度的資料
        
        Args:
            num_samples: 樣本數量
            timesteps: 每個樣本的時間步數
            transform: 資料轉換
            seed: 隨機種子，確保可重現性
        """
        self.num_samples = num_samples
        self.timesteps = timesteps
        self.transform = transform
        
        # 設置隨機種子以確保可重現性
        np.random.seed(seed)
        
        # 預先產生標籤，確保正負樣本平衡
        self.labels = np.array([0, 1] * (num_samples // 2 + 1))[:num_samples]
        np.random.shuffle(self.labels)
    
    def __len__(self):
        return self.num_samples
    
    def __getitem__(self, idx):
        # 獲取此索引的標籤
        label = self.labels[idx]
        
        # 建立時序圖像序列
        images = []
        
        base_intensity = np.random.randint(100, 200)  # 基礎亮度
        pattern_type = label  # 根據標籤決定模式類型
        
        for t in range(self.timesteps):
            # 創建基礎圖像 - 灰度背景
            img_array = np.ones((128, 128, 3), dtype=np.uint8) * base_intensity
            
            # 根據標籤添加不同的特徵模式
            if pattern_type == 1:  # 正樣本
                # 在圖像中心添加圓形
                center_x, center_y = 64, 64
                radius = 20 + int(10 * np.sin(t / 2))  # 隨時間變化的半徑
                
                # 繪製圓形
                for i in range(128):
                    for j in range(128):
                        dist = np.sqrt((i - center_x)**2 + (j - center_y)**2)
                        if dist < radius:
                            # 紅色調圓形
                            img_array[i, j, 0] = min(255, base_intensity + 80)  # R
                            img_array[i, j, 1] = max(0, base_intensity - 50)    # G
                            img_array[i, j, 2] = max(0, base_intensity - 50)    # B
            else:  # 負樣本
                # 添加矩形
                start_x = 32 + int(10 * np.cos(t / 2))
                start_y = 32 + int(10 * np.sin(t / 2))
                width = height = 40
                
                # 繪製矩形
                img_array[start_x:start_x+width, start_y:start_y+height, 0] = max(0, base_intensity - 50)
                img_array[start_x:start_x+width, start_y:start_y+height, 1] = min(255, base_intensity + 70)
                img_array[start_x:start_x+width, start_y:start_y+height, 2] = max(0, base_intensity - 30)
            
            # 添加一些噪聲
            noise = np.random.randint(-20, 20, (128, 128, 3))
            img_array = np.clip(img_array + noise, 0, 255).astype(np.uint8)
            
            # 轉換為 PIL 圖像
            img = Image.fromarray(img_array)
            
            # 應用轉換
            if self.transform:
                img = self.transform(img)
            
            images.append(img)
        
        # 堆疊為張量
        image_seq = torch.stack(images)  # [timesteps, channels, height, width]
        
        return image_seq, torch.FloatTensor([float(label)])

# 建立訓練圖表函數
def plot_training_history(train_losses, val_losses, train_accs, val_accs, save_path='training_history.png'):
    """繪製訓練歷史圖表"""
    epochs = range(1, len(train_losses) + 1)
    
    plt.figure(figsize=(12, 5))
    
    # 損失曲線
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, 'b-', label='訓練損失')
    plt.plot(epochs, val_losses, 'r-', label='驗證損失')
    plt.title('訓練與驗證損失')
    plt.xlabel('Epochs')
    plt.ylabel('損失')
    plt.legend()
    
    # 準確率曲線
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accs, 'b-', label='訓練準確率')
    plt.plot(epochs, val_accs, 'r-', label='驗證準確率')
    plt.title('訓練與驗證準確率')
    plt.xlabel('Epochs')
    plt.ylabel('準確率 (%)')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()
    print(f"訓練歷史已儲存至 {save_path}")

# 訓練函數
def train_model(model, train_loader, val_loader, optimizer, criterion, scheduler, num_epochs=25, patience=7):
    """完整的模型訓練函數"""
    # 初始化
    best_val_accuracy = 0.0
    patience_counter = 0
    train_losses, val_losses = [], []
    train_accs, val_accs = [], []
    scaler = GradScaler(enabled=True)
    
    for epoch in range(num_epochs):
        # 訓練階段
        model.train()
        running_loss = 0.0
        train_preds, train_labels_list = [], []
        
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
            images = images.to(device, dtype=torch.float32)
            labels = labels.to(device, dtype=torch.float32)
            
            # 清空梯度
            optimizer.zero_grad()
            
            # 前向傳播（使用混合精度）
            with autocast(device_type='cuda' if torch.cuda.is_available() else 'cpu'):
                outputs = model(images)
                loss = criterion(outputs, labels)
            
            # 反向傳播
            scaler.scale(loss).backward()
            
            # 梯度裁剪
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            # 更新權重
            scaler.step(optimizer)
            scaler.update()
            
            # 更新學習率
            scheduler.step()
            
            # 統計
            running_loss += loss.item()
            preds = (torch.sigmoid(outputs) > 0.5).float().cpu().numpy()
            train_preds.extend(preds)
            train_labels_list.extend(labels.cpu().numpy())
        
        # 計算訓練指標
        train_loss = running_loss / len(train_loader)
        train_accuracy = accuracy_score(train_labels_list, train_preds) * 100
        train_losses.append(train_loss)
        train_accs.append(train_accuracy)
        
        # 驗證階段
        model.eval()
        val_loss = 0.0
        val_preds, val_labels_list = [], []
        
        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]"):
                images = images.to(device, dtype=torch.float32)
                labels = labels.to(device, dtype=torch.float32)
                
                # 前向傳播
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                # 統計
                val_loss += loss.item()
                preds = (torch.sigmoid(outputs) > 0.5).float().cpu().numpy()
                val_preds.extend(preds)
                val_labels_list.extend(labels.cpu().numpy())
        
        # 計算驗證指標
        val_loss = val_loss / len(val_loader)
        val_accuracy = accuracy_score(val_labels_list, val_preds) * 100
        val_losses.append(val_loss)
        val_accs.append(val_accuracy)
        
        # 計算精確度、召回率和 F1 分數
        precision, recall, f1, _ = precision_recall_fscore_support(
            val_labels_list, val_preds, average='binary', zero_division=0
        )
        
        # 輸出訓練資訊
        print(f'Epoch {epoch+1}/{num_epochs}:')
        print(f'訓練損失: {train_loss:.4f}, 訓練準確率: {train_accuracy:.2f}%')
        print(f'驗證損失: {val_loss:.4f}, 驗證準確率: {val_accuracy:.2f}%')
        print(f'精確度: {precision:.4f}, 召回率: {recall:.4f}, F1: {f1:.4f}')
        print(f'學習率: {optimizer.param_groups[0]["lr"]:.6f}')
        
        # 檢查是否是最佳模型
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            patience_counter = 0
            
            # 儲存最佳模型
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_accuracy': val_accuracy,
                'val_precision': precision,
                'val_recall': recall,
                'val_f1': f1
            }, 'best_deep_cnn_lstm.pth')
            
            print(f'保存最佳模型，驗證準確率: {val_accuracy:.2f}%')
        else:
            patience_counter += 1
            
        # 提前停止
        if patience_counter >= patience:
            print(f'驗證準確率已 {patience} 個 epoch 未改善，提前停止訓練')
            break
    
    # 繪製訓練歷史
    plot_training_history(train_losses, val_losses, train_accs, val_accs)
    
    return train_losses, val_losses, train_accs, val_accs

# 主程式：設置訓練
def main():
    # 模型配置
    input_size = (128, 128)
    num_classes = 1
    batch_size = 8
    num_epochs = 30
    
    # 設置資料轉換
    train_transform = transforms.Compose([
        transforms.Resize(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.1, contrast=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize(input_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # 創建改進的資料集
    print("創建資料集...")
    train_dataset = ImprovedSequenceDataset(num_samples=1024, timesteps=10, transform=train_transform, seed=42)
    val_dataset = ImprovedSequenceDataset(num_samples=256, timesteps=10, transform=val_transform, seed=43)
    
    # 資料載入器
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)
    
    print(f"訓練資料集大小: {len(train_dataset)}")
    print(f"驗證資料集大小: {len(val_dataset)}")
    
    # 初始化模型
    model = DeepCNN_LSTM(num_classes=num_classes, input_size=input_size).to(device)
    print("模型結構:")
    print(model)
    
    # 訓練前打印模型參數數量
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"總參數數量: {total_params:,}")
    print(f"可訓練參數數量: {trainable_params:,}")
    
    # 損失函數、優化器和學習率調度器
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
    
    # 使用 OneCycleLR 調度器
    steps_per_epoch = len(train_loader)
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=0.003,
        epochs=num_epochs,
        steps_per_epoch=steps_per_epoch,
        pct_start=0.3,
        anneal_strategy='cos',
        div_factor=10.0,
        final_div_factor=100.0
    )
    
    # 開始訓練
    print("開始訓練...")
    train_losses, val_losses, train_accs, val_accs = train_model(
        model, train_loader, val_loader, optimizer, criterion, scheduler, 
        num_epochs=num_epochs, patience=7
    )
    
    # 載入最佳模型進行最終評估
    print("載入最佳模型進行最終評估...")
    best_model = DeepCNN_LSTM(num_classes=num_classes, input_size=input_size).to(device)
    checkpoint = torch.load('best_deep_cnn_lstm.pth', weights_only=False, map_location=device)
    best_model.load_state_dict(checkpoint['model_state_dict'])
    
    # 最終評估
    best_model.eval()
    final_preds, final_labels = [], []
    
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="最終評估"):
            images = images.to(device, dtype=torch.float32)
            outputs = best_model(images)
            preds = (torch.sigmoid(outputs) > 0.5).float().cpu().numpy()
            final_preds.extend(preds)
            final_labels.extend(labels.numpy())
    
    # 計算最終指標
    final_accuracy = accuracy_score(final_labels, final_preds) * 100
    precision, recall, f1, _ = precision_recall_fscore_support(
        final_labels, final_preds, average='binary', zero_division=0
    )
    
    # 輸出最終結果
    print("\n最終評估結果:")
    print(f"準確率: {final_accuracy:.2f}%")
    print(f"精確度: {precision:.4f}")
    print(f"召回率: {recall:.4f}")
    print(f"F1 分數: {f1:.4f}")
    
    # 保存完整模型
    torch.save(best_model, 'complete_deep_cnn_lstm.pth')
    print("訓練完成！最佳模型已保存。")

if __name__ == "__main__":
    main()

使用設備: cuda
創建資料集...
訓練資料集大小: 1024
驗證資料集大小: 256
模型結構:
DeepCNN_LSTM(
  (conv_init): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block1): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), pa

Epoch 1/30 [Train]: 100%|██████████| 128/128 [00:26<00:00,  4.83it/s]
Epoch 1/30 [Val]: 100%|██████████| 32/32 [00:06<00:00,  4.97it/s]


Epoch 1/30:
訓練損失: 0.4518, 訓練準確率: 96.48%
驗證損失: 0.1885, 驗證準確率: 100.00%
精確度: 1.0000, 召回率: 1.0000, F1: 1.0000
學習率: 0.000382
保存最佳模型，驗證準確率: 100.00%


Epoch 2/30 [Train]: 100%|██████████| 128/128 [00:27<00:00,  4.71it/s]
Epoch 2/30 [Val]: 100%|██████████| 32/32 [00:06<00:00,  4.98it/s]


Epoch 2/30:
訓練損失: 0.1667, 訓練準確率: 99.51%
驗證損失: 0.0498, 驗證準確率: 100.00%
精確度: 1.0000, 召回率: 1.0000, F1: 1.0000
學習率: 0.000616


Epoch 3/30 [Train]: 100%|██████████| 128/128 [00:26<00:00,  4.75it/s]
Epoch 3/30 [Val]: 100%|██████████| 32/32 [00:06<00:00,  4.95it/s]


Epoch 3/30:
訓練損失: 0.0983, 訓練準確率: 98.63%
驗證損失: 0.0176, 驗證準確率: 100.00%
精確度: 1.0000, 召回率: 1.0000, F1: 1.0000
學習率: 0.000976


Epoch 4/30 [Train]: 100%|██████████| 128/128 [00:27<00:00,  4.71it/s]
Epoch 4/30 [Val]: 100%|██████████| 32/32 [00:06<00:00,  4.95it/s]


Epoch 4/30:
訓練損失: 0.0703, 訓練準確率: 98.54%
驗證損失: 0.0100, 驗證準確率: 100.00%
精確度: 1.0000, 召回率: 1.0000, F1: 1.0000
學習率: 0.001417


Epoch 5/30 [Train]: 100%|██████████| 128/128 [00:27<00:00,  4.71it/s]
Epoch 5/30 [Val]: 100%|██████████| 32/32 [00:06<00:00,  4.97it/s]


Epoch 5/30:
訓練損失: 0.0236, 訓練準確率: 99.80%
驗證損失: 0.0024, 驗證準確率: 100.00%
精確度: 1.0000, 召回率: 1.0000, F1: 1.0000
學習率: 0.001886


Epoch 6/30 [Train]: 100%|██████████| 128/128 [00:27<00:00,  4.61it/s]
Epoch 6/30 [Val]: 100%|██████████| 32/32 [00:06<00:00,  4.95it/s]


Epoch 6/30:
訓練損失: 0.0271, 訓練準確率: 99.80%
驗證損失: 0.0006, 驗證準確率: 100.00%
精確度: 1.0000, 召回率: 1.0000, F1: 1.0000
學習率: 0.002327


Epoch 7/30 [Train]: 100%|██████████| 128/128 [00:26<00:00,  4.84it/s]
Epoch 7/30 [Val]: 100%|██████████| 32/32 [00:06<00:00,  4.95it/s]


Epoch 7/30:
訓練損失: 0.0553, 訓練準確率: 98.93%
驗證損失: 0.0003, 驗證準確率: 100.00%
精確度: 1.0000, 召回率: 1.0000, F1: 1.0000
學習率: 0.002686


Epoch 8/30 [Train]: 100%|██████████| 128/128 [00:27<00:00,  4.63it/s]
Epoch 8/30 [Val]: 100%|██████████| 32/32 [00:06<00:00,  4.94it/s]
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)


Epoch 8/30:
訓練損失: 0.0727, 訓練準確率: 99.22%
驗證損失: 0.0001, 驗證準確率: 100.00%
精確度: 1.0000, 召回率: 1.0000, F1: 1.0000
學習率: 0.002920
驗證準確率已 7 個 epoch 未改善，提前停止訓練
訓練歷史已儲存至 training_history.png
載入最佳模型進行最終評估...


最終評估: 100%|██████████| 32/32 [00:06<00:00,  4.91it/s]


最終評估結果:
準確率: 100.00%
精確度: 1.0000
召回率: 1.0000
F1 分數: 1.0000
訓練完成！最佳模型已保存。





In [17]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from torch.amp import GradScaler, autocast
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import glob
from pathlib import Path
import cv2
import random
from sklearn.model_selection import train_test_split

# 設置隨機種子，確保可重現性
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

set_seed(42)

# 設置記憶體優化環境變數
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# 設置設備
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用設備: {device}")

# 改進版 CNN-LSTM 模型
class ImprovedCNN_LSTM(nn.Module):
    def __init__(self, num_classes=1, seq_length=10):
        super(ImprovedCNN_LSTM, self).__init__()
        
        # 使用預訓練的 ResNet-18 作為特徵提取器
        # 移除最後的全連接層
        from torchvision.models import resnet18, ResNet18_Weights
        self.feature_extractor = resnet18(weights=ResNet18_Weights.DEFAULT)
        self.feature_size = self.feature_extractor.fc.in_features  # 通常是 512
        self.feature_extractor.fc = nn.Identity()  # 移除最後的全連接層
        
        # 凍結前幾層以避免過擬合
        layers_to_freeze = list(self.feature_extractor.children())[:6]  # 凍結前6層
        for layer in layers_to_freeze:
            for param in layer.parameters():
                param.requires_grad = False
        
        # LSTM 部分
        self.lstm = nn.LSTM(
            input_size=self.feature_size,
            hidden_size=256,
            num_layers=2,
            batch_first=True,
            dropout=0.5,
            bidirectional=True
        )
        
        # 注意力機制
        self.attention = nn.Sequential(
            nn.Linear(512, 128),  # 512 = 256*2 (雙向LSTM)
            nn.Tanh(),
            nn.Linear(128, 1),
            nn.Softmax(dim=1)
        )
        
        # 分類器
        self.classifier = nn.Sequential(
            nn.Linear(512, 128),
            nn.LayerNorm(128),
            nn.Dropout(0.5),
            nn.ReLU(inplace=True),
            nn.Linear(128, 64),
            nn.LayerNorm(64),
            nn.Dropout(0.3),
            nn.ReLU(inplace=True),
            nn.Linear(64, num_classes)
        )
        
        # 時間序列長度
        self.seq_length = seq_length
    
    def forward(self, x):
        batch_size, seq_len, C, H, W = x.size()
        
        # 展平時間維度，作為批次處理
        x = x.view(batch_size * seq_len, C, H, W)
        
        # 提取特徵
        x = self.feature_extractor(x)  # 輸出: [batch_size*seq_len, feature_size]
        
        # 重塑為序列格式
        x = x.view(batch_size, seq_len, -1)  # [batch_size, seq_len, feature_size]
        
        # 通過 LSTM 處理序列
        lstm_out, _ = self.lstm(x)  # [batch_size, seq_len, hidden_size*2]
        
        # 應用注意力機制
        attention_weights = self.attention(lstm_out)  # [batch_size, seq_len, 1]
        context_vector = torch.sum(attention_weights * lstm_out, dim=1)  # [batch_size, hidden_size*2]
        
        # 最終分類
        output = self.classifier(context_vector)
        
        return output

# 真實交通數據集
class TrafficDataset(Dataset):
    def __init__(self, root_dir, sequence_length=10, transform=None, max_sequences=None, mode='train'):
        """
        真實交通數據集
        
        Args:
            root_dir (str): 資料夾路徑，包含多個子資料夾，每個子資料夾是一個序列
            sequence_length (int): 每個樣本的時間步數
            transform: 圖像轉換
            max_sequences (int, optional): 限制使用的序列數量
            mode (str): 'train' 或 'test'，影響資料增強和處理
        """
        self.root_dir = root_dir
        self.sequence_length = sequence_length
        self.transform = transform
        self.mode = mode
        
        # 獲取所有序列資料夾
        self.sequence_folders = sorted([
            f for f in os.listdir(root_dir) 
            if os.path.isdir(os.path.join(root_dir, f)) and f.startswith('freeway_')
        ])
        
        if max_sequences and max_sequences < len(self.sequence_folders):
            # 隨機選擇子集
            random.shuffle(self.sequence_folders)
            self.sequence_folders = self.sequence_folders[:max_sequences]
            
        print(f"找到 {len(self.sequence_folders)} 個序列資料夾")
        
        # 為每個序列加載標籤（這裡需要根據實際情況調整）
        self.sequence_labels = {}
        self.sequences = []
        
        for folder in self.sequence_folders:
            folder_path = os.path.join(root_dir, folder)
            
            # 獲取所有圖像並按數字排序
            images = sorted([
                f for f in os.listdir(folder_path) 
                if f.lower().endswith(('.jpg', '.jpeg', '.png'))
            ], key=lambda x: int(x.split('.')[0]) if x.split('.')[0].isdigit() else 0)
            
            # 如果圖像太少，跳過
            if len(images) < sequence_length:
                continue
                
            # 為每個可能的子序列創建一個樣本
            for i in range(0, len(images) - sequence_length + 1, sequence_length // 2):  # 使用重疊窗口
                sub_sequence = images[i:i+sequence_length]
                if len(sub_sequence) == sequence_length:
                    # 這裡需要為每個子序列分配標籤
                    # 例如，可以基於圖像特徵或文件名來確定標籤
                    # 對於演示，我們使用簡單的啟發式方法：
                    # 如果文件夾名稱的數字是奇數，標記為正樣本，否則為負樣本
                    folder_id = int(folder.split('_')[1])
                    label = 1 if folder_id % 2 == 1 else 0  # 奇數為正樣本
                    
                    self.sequences.append({
                        'folder': folder_path,
                        'images': sub_sequence,
                        'label': label
                    })
        
        # 確保類別平衡
        positive_samples = [s for s in self.sequences if s['label'] == 1]
        negative_samples = [s for s in self.sequences if s['label'] == 0]
        
        # 通過下采樣較多的類別來平衡數據集
        min_samples = min(len(positive_samples), len(negative_samples))
        
        if len(positive_samples) > min_samples:
            positive_samples = random.sample(positive_samples, min_samples)
        if len(negative_samples) > min_samples:
            negative_samples = random.sample(negative_samples, min_samples)
            
        self.sequences = positive_samples + negative_samples
        random.shuffle(self.sequences)
        
        print(f"創建了 {len(self.sequences)} 個樣本 (正樣本: {len(positive_samples)}, 負樣本: {len(negative_samples)})")
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        sequence_info = self.sequences[idx]
        folder_path = sequence_info['folder']
        image_names = sequence_info['images']
        label = sequence_info['label']
        
        # 加載並轉換圖像序列
        images = []
        for img_name in image_names:
            img_path = os.path.join(folder_path, img_name)
            image = Image.open(img_path).convert('RGB')
            
            if self.transform:
                image = self.transform(image)
                
            images.append(image)
        
        # 堆疊為單一張量
        image_sequence = torch.stack(images)  # [sequence_length, C, H, W]
        
        return image_sequence, torch.FloatTensor([float(label)])

# 評估函數
def evaluate_model(model, data_loader, criterion):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0.0
    
    with torch.no_grad():
        for inputs, labels in tqdm(data_loader, desc="評估中"):
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            
            preds = (torch.sigmoid(outputs) > 0.5).float().cpu().numpy()
            all_preds.extend(preds)
            all_labels.extend(labels.cpu().numpy())
    
    # 計算評估指標
    accuracy = accuracy_score(all_labels, all_preds) * 100
    precision, recall, f1, _ = precision_recall_fscore_support(
        all_labels, all_preds, average='binary', zero_division=0
    )
    cm = confusion_matrix(all_labels, all_preds)
    
    metrics = {
        'loss': total_loss / len(data_loader),
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': cm
    }
    
    return metrics

# 訓練函數
def train_model(model, train_loader, val_loader, optimizer, criterion, scheduler, 
               num_epochs=30, patience=7, checkpoint_path='best_traffic_model.pth'):
    # 初始化變數
    best_val_f1 = 0.0
    patience_counter = 0
    train_losses, val_losses = [], []
    train_f1s, val_f1s = [], []
    scaler = GradScaler(enabled=torch.cuda.is_available())
    
    for epoch in range(num_epochs):
        # 訓練階段
        model.train()
        train_loss = 0.0
        all_train_preds = []
        all_train_labels = []
        
        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            
            # 修正 autocast 用法，加入必要的 device_type 參數
            device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
            with autocast(device_type=device_type, enabled=torch.cuda.is_available()):
                outputs = model(inputs)
                loss = criterion(outputs, labels)
            
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
            
            if scheduler is not None:
                scheduler.step()
            
            train_loss += loss.item()
            
            preds = (torch.sigmoid(outputs) > 0.5).float().cpu().numpy()
            all_train_preds.extend(preds)
            all_train_labels.extend(labels.cpu().numpy())
        
        # 計算訓練指標
        train_loss = train_loss / len(train_loader)
        train_precision, train_recall, train_f1, _ = precision_recall_fscore_support(
            all_train_labels, all_train_preds, average='binary', zero_division=0
        )
        train_accuracy = accuracy_score(all_train_labels, all_train_preds) * 100
        
        # 驗證階段
        val_metrics = evaluate_model(model, val_loader, criterion)
        
        # 記錄指標
        train_losses.append(train_loss)
        val_losses.append(val_metrics['loss'])
        train_f1s.append(train_f1)
        val_f1s.append(val_metrics['f1'])
        
        # 打印訓練資訊
        print(f"Epoch {epoch+1}/{num_epochs} | LR: {optimizer.param_groups[0]['lr']:.6f}")
        print(f"Train - Loss: {train_loss:.4f}, Acc: {train_accuracy:.2f}%, F1: {train_f1:.4f}")
        print(f"Valid - Loss: {val_metrics['loss']:.4f}, Acc: {val_metrics['accuracy']:.2f}%, F1: {val_metrics['f1']:.4f}")
        print(f"Valid - Precision: {val_metrics['precision']:.4f}, Recall: {val_metrics['recall']:.4f}")
        print(f"Confusion Matrix:\n{val_metrics['confusion_matrix']}")
        
        # 檢查是否為最佳模型
        if val_metrics['f1'] > best_val_f1:
            best_val_f1 = val_metrics['f1']
            patience_counter = 0
            
            # 保存最佳模型
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_f1': best_val_f1,
                'accuracy': val_metrics['accuracy'],
                'precision': val_metrics['precision'],
                'recall': val_metrics['recall']
            }, checkpoint_path)
            
            print(f"儲存最佳模型 (F1: {best_val_f1:.4f})")
        else:
            patience_counter += 1
            
        # 提前停止
        if patience_counter >= patience:
            print(f"驗證 F1 已 {patience} 個 epoch 未改善，提前停止訓練")
            break
            
        print("-" * 60)
    
    # 繪製訓練歷史
    plot_training_history(train_losses, val_losses, train_f1s, val_f1s, 
                        save_path='training_history.png', metric_name='F1 Score')
    
    return train_losses, val_losses, train_f1s, val_f1s

# 繪製訓練歷史
def plot_training_history(train_losses, val_losses, train_metrics, val_metrics, 
                         save_path='training_history.png', metric_name='Accuracy'):
    epochs = range(1, len(train_losses) + 1)
    
    plt.figure(figsize=(12, 5))
    
    # 損失曲線
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, 'b-', label='訓練損失')
    plt.plot(epochs, val_losses, 'r-', label='驗證損失')
    plt.title('訓練與驗證損失')
    plt.xlabel('Epochs')
    plt.ylabel('損失')
    plt.legend()
    
    # 指標曲線
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_metrics, 'b-', label=f'訓練 {metric_name}')
    plt.plot(epochs, val_metrics, 'r-', label=f'驗證 {metric_name}')
    plt.title(f'訓練與驗證 {metric_name}')
    plt.xlabel('Epochs')
    plt.ylabel(metric_name)
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()
    
    print(f"訓練歷史已儲存至 {save_path}")

# 主程式
def main():
    # 配置參數
    data_root = "train/train"  # 根據實際路徑調整
    sequence_length = 10
    batch_size = 4
    num_epochs = 30
    learning_rate = 0.0005
    num_classes = 1
    
    # 圖像轉換
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # 載入資料集
    print("載入資料集...")
    
    # 檢查數據集路徑是否存在
    if not os.path.exists(data_root):
        potential_paths = [
            "train/train",
            "train",
            "data/train",
            "dataset/train",
            "../train/train",
            "../train"
        ]
        
        for path in potential_paths:
            if os.path.exists(path):
                data_root = path
                print(f"找到數據集路徑: {data_root}")
                break
        else:
            print("無法找到數據集路徑，請手動指定")
            return
    
    # 創建完整數據集
    full_dataset = TrafficDataset(root_dir=data_root, 
                                sequence_length=sequence_length, 
                                transform=None,  # 先不應用轉換
                                max_sequences=None)  # 使用所有數據
    
    # 分割數據集
    train_size = int(0.8 * len(full_dataset))
    val_size = len(full_dataset) - train_size
    
    train_indices, val_indices = train_test_split(
        range(len(full_dataset)), 
        test_size=val_size/len(full_dataset),
        random_state=42,
        stratify=[s['label'] for s in full_dataset.sequences]  # 確保分層抽樣
    )
    
    # 創建訓練和驗證數據集
    train_dataset = torch.utils.data.Subset(full_dataset, train_indices)
    val_dataset = torch.utils.data.Subset(full_dataset, val_indices)
    
    # 添加數據轉換
    train_dataset.dataset.transform = train_transform
    val_dataset.dataset.transform = val_transform
    
    # 數據加載器
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        num_workers=4, 
        pin_memory=True,
        drop_last=True
    )
    
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=4, 
        pin_memory=True
    )
    
    print(f"訓練數據集大小: {len(train_dataset)}")
    print(f"驗證數據集大小: {len(val_dataset)}")
    
    # 初始化模型
    model = ImprovedCNN_LSTM(num_classes=num_classes, seq_length=sequence_length).to(device)
    
    # 統計模型參數
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"總參數數量: {total_params:,}")
    print(f"可訓練參數數量: {trainable_params:,}")
    
    # 初始化損失函數和優化器
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    
    # 學習率調度器
    steps_per_epoch = len(train_loader)
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=learning_rate * 10,
        epochs=num_epochs,
        steps_per_epoch=steps_per_epoch,
        pct_start=0.3,
        anneal_strategy='cos',
        div_factor=10.0,
        final_div_factor=100.0
    )
    
    # 開始訓練
    print("開始訓練...")
    train_model(
        model=model,
        train_loader=train_loader,
        val_loader=val_loader,
        optimizer=optimizer,
        criterion=criterion,
        scheduler=scheduler,
        num_epochs=num_epochs,
        patience=7,
        checkpoint_path='best_traffic_model.pth'
    )
    
    # 載入最佳模型進行最終評估
    print("載入最佳模型進行最終評估...")
    checkpoint = torch.load('best_traffic_model.pth', map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])
    
    # 最終評估
    val_metrics = evaluate_model(model, val_loader, criterion)
    
    # 輸出最終結果
    print("\n最終評估結果:")
    print(f"準確率: {val_metrics['accuracy']:.2f}%")
    print(f"精確度: {val_metrics['precision']:.4f}")
    print(f"召回率: {val_metrics['recall']:.4f}")
    print(f"F1 分數: {val_metrics['f1']:.4f}")
    print(f"混淆矩陣:\n{val_metrics['confusion_matrix']}")
    
    # 保存完整模型，使用 state_dict 保存以便於移植性
    torch.save(model.state_dict(), 'traffic_model_state_dict.pth')
    
    # 保存 scriptable 模型版本 (TorchScript)，便於部署
    scripted_model = torch.jit.script(model.cpu())
    scripted_model.save('traffic_model_scripted.pt')
    
    print("訓練完成！所有模型檔案已保存。")
    
if __name__ == "__main__":
    main()

使用設備: cuda
載入資料集...
找到 180 個序列資料夾
創建了 2388 個樣本 (正樣本: 1194, 負樣本: 1194)
訓練數據集大小: 1910
驗證數據集大小: 478
總參數數量: 14,470,594
可訓練參數數量: 13,787,522
開始訓練...


Epoch 1/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.03it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 26.13it/s]


Epoch 1/30 | LR: 0.000636
Train - Loss: 0.6639, Acc: 61.43%, F1: 0.5978
Valid - Loss: 0.5115, Acc: 78.24%, F1: 0.7668
Valid - Precision: 0.8261, Recall: 0.7155
Confusion Matrix:
[[203  36]
 [ 68 171]]
儲存最佳模型 (F1: 0.7668)
------------------------------------------------------------


Epoch 2/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.47it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 26.02it/s]


Epoch 2/30 | LR: 0.001027
Train - Loss: 0.6508, Acc: 67.82%, F1: 0.6984
Valid - Loss: 0.5685, Acc: 75.73%, F1: 0.7836
Valid - Precision: 0.7071, Recall: 0.8787
Confusion Matrix:
[[152  87]
 [ 29 210]]
儲存最佳模型 (F1: 0.7836)
------------------------------------------------------------


Epoch 3/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.63it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 25.97it/s]


Epoch 3/30 | LR: 0.001625
Train - Loss: 0.6024, Acc: 73.27%, F1: 0.7363
Valid - Loss: 0.8429, Acc: 66.95%, F1: 0.7331
Valid - Precision: 0.6147, Recall: 0.9079
Confusion Matrix:
[[103 136]
 [ 22 217]]
------------------------------------------------------------


Epoch 4/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.13it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 26.10it/s]


Epoch 4/30 | LR: 0.002360
Train - Loss: 0.6475, Acc: 67.03%, F1: 0.6538
Valid - Loss: 0.5994, Acc: 70.92%, F1: 0.6775
Valid - Precision: 0.7604, Recall: 0.6109
Confusion Matrix:
[[193  46]
 [ 93 146]]
------------------------------------------------------------


Epoch 5/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.33it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 26.12it/s]


Epoch 5/30 | LR: 0.003142
Train - Loss: 0.6745, Acc: 62.63%, F1: 0.6241
Valid - Loss: 0.6393, Acc: 69.04%, F1: 0.7087
Valid - Precision: 0.6691, Recall: 0.7531
Confusion Matrix:
[[150  89]
 [ 59 180]]
------------------------------------------------------------


Epoch 6/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.23it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 25.71it/s]


Epoch 6/30 | LR: 0.003876
Train - Loss: 0.6580, Acc: 64.99%, F1: 0.6735
Valid - Loss: 0.6047, Acc: 69.25%, F1: 0.6621
Valid - Precision: 0.7347, Recall: 0.6025
Confusion Matrix:
[[187  52]
 [ 95 144]]
------------------------------------------------------------


Epoch 7/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.00it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 28.16it/s]


Epoch 7/30 | LR: 0.004474
Train - Loss: 0.6385, Acc: 69.03%, F1: 0.7064
Valid - Loss: 0.5330, Acc: 76.57%, F1: 0.7812
Valid - Precision: 0.7326, Recall: 0.8368
Confusion Matrix:
[[166  73]
 [ 39 200]]
------------------------------------------------------------


Epoch 8/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.14it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 28.09it/s]


Epoch 8/30 | LR: 0.004865
Train - Loss: 0.6234, Acc: 69.39%, F1: 0.7179
Valid - Loss: 0.5860, Acc: 70.92%, F1: 0.7301
Valid - Precision: 0.6812, Recall: 0.7866
Confusion Matrix:
[[151  88]
 [ 51 188]]
------------------------------------------------------------


Epoch 9/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.19it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 27.98it/s]


Epoch 9/30 | LR: 0.005000
Train - Loss: 0.5735, Acc: 76.05%, F1: 0.7674
Valid - Loss: 0.5349, Acc: 76.78%, F1: 0.8021
Valid - Precision: 0.6988, Recall: 0.9414
Confusion Matrix:
[[142  97]
 [ 14 225]]
儲存最佳模型 (F1: 0.8021)
------------------------------------------------------------


Epoch 10/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.16it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 27.83it/s]


Epoch 10/30 | LR: 0.004972
Train - Loss: 0.5053, Acc: 80.14%, F1: 0.8096
Valid - Loss: 0.3971, Acc: 85.98%, F1: 0.8613
Valid - Precision: 0.8525, Recall: 0.8703
Confusion Matrix:
[[203  36]
 [ 31 208]]
儲存最佳模型 (F1: 0.8613)
------------------------------------------------------------


Epoch 11/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.42it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 25.88it/s]


Epoch 11/30 | LR: 0.004889
Train - Loss: 0.4387, Acc: 84.59%, F1: 0.8521
Valid - Loss: 0.3484, Acc: 88.91%, F1: 0.8898
Valid - Precision: 0.8843, Recall: 0.8954
Confusion Matrix:
[[211  28]
 [ 25 214]]
儲存最佳模型 (F1: 0.8898)
------------------------------------------------------------


Epoch 12/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.07it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 26.14it/s]


Epoch 12/30 | LR: 0.004752
Train - Loss: 0.3732, Acc: 87.26%, F1: 0.8752
Valid - Loss: 0.2143, Acc: 94.56%, F1: 0.9444
Valid - Precision: 0.9651, Recall: 0.9247
Confusion Matrix:
[[231   8]
 [ 18 221]]
儲存最佳模型 (F1: 0.9444)
------------------------------------------------------------


Epoch 13/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.46it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 27.76it/s]


Epoch 13/30 | LR: 0.004566
Train - Loss: 0.3354, Acc: 89.26%, F1: 0.8957
Valid - Loss: 0.1775, Acc: 95.19%, F1: 0.9530
Valid - Precision: 0.9320, Recall: 0.9749
Confusion Matrix:
[[222  17]
 [  6 233]]
儲存最佳模型 (F1: 0.9530)
------------------------------------------------------------


Epoch 14/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.05it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 28.03it/s]


Epoch 14/30 | LR: 0.004333
Train - Loss: 0.2662, Acc: 91.46%, F1: 0.9170
Valid - Loss: 0.2195, Acc: 94.56%, F1: 0.9469
Valid - Precision: 0.9243, Recall: 0.9707
Confusion Matrix:
[[220  19]
 [  7 232]]
------------------------------------------------------------


Epoch 15/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.07it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 26.09it/s]


Epoch 15/30 | LR: 0.004059
Train - Loss: 0.2698, Acc: 92.30%, F1: 0.9256
Valid - Loss: 0.1109, Acc: 97.07%, F1: 0.9706
Valid - Precision: 0.9747, Recall: 0.9665
Confusion Matrix:
[[233   6]
 [  8 231]]
儲存最佳模型 (F1: 0.9706)
------------------------------------------------------------


Epoch 16/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.28it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 27.44it/s]


Epoch 16/30 | LR: 0.003751
Train - Loss: 0.1984, Acc: 94.55%, F1: 0.9468
Valid - Loss: 0.1082, Acc: 97.07%, F1: 0.9713
Valid - Precision: 0.9518, Recall: 0.9916
Confusion Matrix:
[[227  12]
 [  2 237]]
儲存最佳模型 (F1: 0.9713)
------------------------------------------------------------


Epoch 17/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.18it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 27.77it/s]


Epoch 17/30 | LR: 0.003414
Train - Loss: 0.1747, Acc: 95.07%, F1: 0.9519
Valid - Loss: 0.0823, Acc: 97.91%, F1: 0.9791
Valid - Precision: 0.9791, Recall: 0.9791
Confusion Matrix:
[[234   5]
 [  5 234]]
儲存最佳模型 (F1: 0.9791)
------------------------------------------------------------


Epoch 18/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.18it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 28.03it/s]


Epoch 18/30 | LR: 0.003057
Train - Loss: 0.1802, Acc: 94.97%, F1: 0.9509
Valid - Loss: 0.0761, Acc: 98.54%, F1: 0.9853
Valid - Precision: 0.9874, Recall: 0.9833
Confusion Matrix:
[[236   3]
 [  4 235]]
儲存最佳模型 (F1: 0.9853)
------------------------------------------------------------


Epoch 19/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.13it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 27.94it/s]


Epoch 19/30 | LR: 0.002688
Train - Loss: 0.1552, Acc: 96.28%, F1: 0.9633
Valid - Loss: 0.1437, Acc: 96.65%, F1: 0.9654
Valid - Precision: 1.0000, Recall: 0.9331
Confusion Matrix:
[[239   0]
 [ 16 223]]
------------------------------------------------------------


Epoch 20/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.15it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 28.02it/s]


Epoch 20/30 | LR: 0.002315
Train - Loss: 0.1456, Acc: 96.12%, F1: 0.9614
Valid - Loss: 0.0505, Acc: 98.12%, F1: 0.9810
Valid - Precision: 0.9915, Recall: 0.9707
Confusion Matrix:
[[237   2]
 [  7 232]]
------------------------------------------------------------


Epoch 21/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.11it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 26.74it/s]


Epoch 21/30 | LR: 0.001946
Train - Loss: 0.1257, Acc: 96.70%, F1: 0.9675
Valid - Loss: 0.1019, Acc: 97.70%, F1: 0.9772
Valid - Precision: 0.9672, Recall: 0.9874
Confusion Matrix:
[[231   8]
 [  3 236]]
------------------------------------------------------------


Epoch 22/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.28it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 26.19it/s]


Epoch 22/30 | LR: 0.001589
Train - Loss: 0.1061, Acc: 97.33%, F1: 0.9736
Valid - Loss: 0.0670, Acc: 98.33%, F1: 0.9834
Valid - Precision: 0.9753, Recall: 0.9916
Confusion Matrix:
[[233   6]
 [  2 237]]
------------------------------------------------------------


Epoch 23/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.06it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 27.83it/s]


Epoch 23/30 | LR: 0.001253
Train - Loss: 0.0652, Acc: 98.53%, F1: 0.9854
Valid - Loss: 0.0526, Acc: 98.54%, F1: 0.9854
Valid - Precision: 0.9793, Recall: 0.9916
Confusion Matrix:
[[234   5]
 [  2 237]]
儲存最佳模型 (F1: 0.9854)
------------------------------------------------------------


Epoch 24/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.06it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 28.06it/s]


Epoch 24/30 | LR: 0.000945
Train - Loss: 0.0669, Acc: 98.58%, F1: 0.9859
Valid - Loss: 0.0300, Acc: 99.37%, F1: 0.9937
Valid - Precision: 1.0000, Recall: 0.9874
Confusion Matrix:
[[239   0]
 [  3 236]]
儲存最佳模型 (F1: 0.9937)
------------------------------------------------------------


Epoch 25/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.12it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 28.14it/s]


Epoch 25/30 | LR: 0.000671
Train - Loss: 0.0443, Acc: 99.16%, F1: 0.9916
Valid - Loss: 0.0240, Acc: 99.58%, F1: 0.9958
Valid - Precision: 1.0000, Recall: 0.9916
Confusion Matrix:
[[239   0]
 [  2 237]]
儲存最佳模型 (F1: 0.9958)
------------------------------------------------------------


Epoch 26/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.22it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 26.04it/s]


Epoch 26/30 | LR: 0.000439
Train - Loss: 0.0458, Acc: 98.95%, F1: 0.9896
Valid - Loss: 0.0250, Acc: 99.58%, F1: 0.9958
Valid - Precision: 1.0000, Recall: 0.9916
Confusion Matrix:
[[239   0]
 [  2 237]]
------------------------------------------------------------


Epoch 27/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.28it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 28.03it/s]


Epoch 27/30 | LR: 0.000252
Train - Loss: 0.0290, Acc: 99.21%, F1: 0.9922
Valid - Loss: 0.0312, Acc: 99.58%, F1: 0.9958
Valid - Precision: 1.0000, Recall: 0.9916
Confusion Matrix:
[[239   0]
 [  2 237]]
------------------------------------------------------------


Epoch 28/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 29.21it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 27.91it/s]


Epoch 28/30 | LR: 0.000116
Train - Loss: 0.0307, Acc: 99.42%, F1: 0.9942
Valid - Loss: 0.0280, Acc: 99.58%, F1: 0.9958
Valid - Precision: 1.0000, Recall: 0.9916
Confusion Matrix:
[[239   0]
 [  2 237]]
------------------------------------------------------------


Epoch 29/30 [Train]: 100%|██████████| 477/477 [00:17<00:00, 27.05it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 26.71it/s]


Epoch 29/30 | LR: 0.000033
Train - Loss: 0.0223, Acc: 99.53%, F1: 0.9953
Valid - Loss: 0.0385, Acc: 99.37%, F1: 0.9937
Valid - Precision: 1.0000, Recall: 0.9874
Confusion Matrix:
[[239   0]
 [  3 236]]
------------------------------------------------------------


Epoch 30/30 [Train]: 100%|██████████| 477/477 [00:16<00:00, 28.88it/s]
評估中: 100%|██████████| 120/120 [00:04<00:00, 25.92it/s]
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.tight_layout()
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  plt.savefig(save_path)
  checkpoint = torch.load('best_traffic_model.pth', map_location=device)


Epoch 30/30 | LR: 0.000005
Train - Loss: 0.0236, Acc: 99.48%, F1: 0.9948
Valid - Loss: 0.0298, Acc: 99.58%, F1: 0.9958
Valid - Precision: 1.0000, Recall: 0.9916
Confusion Matrix:
[[239   0]
 [  2 237]]
------------------------------------------------------------
訓練歷史已儲存至 training_history.png
載入最佳模型進行最終評估...


評估中: 100%|██████████| 120/120 [00:04<00:00, 25.98it/s]



最終評估結果:
準確率: 99.58%
精確度: 1.0000
召回率: 0.9916
F1 分數: 0.9958
混淆矩陣:
[[239   0]
 [  2 237]]
訓練完成！所有模型檔案已保存。


 評估模型

In [12]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to('cuda'), labels.to('cuda').float().view(-1, 1)
        outputs = model(images)
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total}%')


Accuracy: 63.888888888888886%
