

## Directory Structure

```plaintext
.
├── pretrain/
│   ├── img/
│   │   ├── captcha1.jpg
│   │   ├── captcha2.png
│   │   └── ...
│   ├── model/
│   │   ├── crnn_model-resnet18-gru_pretrain.pt
│   └── output/
│       ├── train_val_results_pretrain.csv
│       └── ...
├── new/
│   ├── img-1/
│   │   ├── img/
│   │   │   ├── captcha1.jpg
│   │   │   ├── captcha2.png
│   │   │   └── ...
│   │   ├── model/
│   │   │   ├── crnn_model-resnet18-gru_img-1.pt
│   │   └── output/
│   │       ├── train_val_results_img-1.csv
│   │       └── ...
│   ├── img-2/
│   │   ├── img/
│   │   │   ├── captcha3.jpg
│   │   │   ├── captcha4.png
│   │   │   └── ...
│   │   ├── model/
│   │   │   ├── crnn_model-resnet18-gru_img-2.pt
│   │   └── output/
│   │       ├── train_val_results_img-2.csv
│   │       └── ...
│   ├── classifier/
│   │   ├── model/
│   │   │   ├── classifier_model.pt
│   │   └── output/
│   │       ├── [Classification Model Output Files]
│   └── ... [Other img-N Folders]
├── train-evaluate/
│   ├── img/
│   │   ├── captcha5.jpg
│   │   ├── captcha6.png
│   │   └── ...
│   ├── model/
│   │   ├── crnn_model-resnet18-gru_train-evaluate.pt
│   └── output/
│       ├── train_val_results_train-evaluate.csv
│       └── ...
├── predict/
│   ├── new_captcha1.jpg
│   ├── new_captcha2.png
│   └── predictions.csv
└── main.py


In [6]:
import os

def create_folder_structure(base_dir='./', num_new_imgs=5):
    """
    建立預定的資料夾結構，包括 pretrain、new/img-1 至 new/img-5、以及 train-evaluate 資料夾。
    
    :param base_dir: 基礎目錄，預設為當前目錄。
    :param num_new_imgs: 在 'new/' 資料夾下要建立多少個 'img-數字' 子資料夾。
    """
    # 定義 pretrain 資料夾結構
    pretrain_structure = [
        os.path.join(base_dir, "pretrain", "img"),
        os.path.join(base_dir, "pretrain", "model"),
        os.path.join(base_dir, "pretrain", "output"),
    ]
    
    # 定義 new 資料夾結構
    new_base = os.path.join(base_dir, "new")
    new_structures = []
    for i in range(1, num_new_imgs + 1):
        img_folder = os.path.join(new_base, f"img-{i}", "img")
        model_folder = os.path.join(new_base, f"img-{i}", "model")
        output_folder = os.path.join(new_base, f"img-{i}", "output")
        new_structures.extend([img_folder, model_folder, output_folder])
    
    # 定義 train-evaluate 資料夾結構
    train_evaluate_structure = [
        os.path.join(base_dir, "train-evaluate", "img"),
        os.path.join(base_dir, "train-evaluate", "model"),
        os.path.join(base_dir, "train-evaluate", "output"),
    ]
    
    # 合併所有資料夾結構
    all_folders = pretrain_structure + new_structures + train_evaluate_structure
    
    # 創建資料夾
    for folder in all_folders:
        try:
            os.makedirs(folder, exist_ok=True)
            print(f"已創建或已存在資料夾: {folder}")
        except Exception as e:
            print(f"創建資料夾失敗: {folder}\n錯誤訊息: {e}")

if __name__ == "__main__":
    create_folder_structure()


已創建或已存在資料夾: ./pretrain\img
已創建或已存在資料夾: ./pretrain\model
已創建或已存在資料夾: ./pretrain\output
已創建或已存在資料夾: ./new\img-1\img
已創建或已存在資料夾: ./new\img-1\model
已創建或已存在資料夾: ./new\img-1\output
已創建或已存在資料夾: ./new\img-2\img
已創建或已存在資料夾: ./new\img-2\model
已創建或已存在資料夾: ./new\img-2\output
已創建或已存在資料夾: ./new\img-3\img
已創建或已存在資料夾: ./new\img-3\model
已創建或已存在資料夾: ./new\img-3\output
已創建或已存在資料夾: ./new\img-4\img
已創建或已存在資料夾: ./new\img-4\model
已創建或已存在資料夾: ./new\img-4\output
已創建或已存在資料夾: ./new\img-5\img
已創建或已存在資料夾: ./new\img-5\model
已創建或已存在資料夾: ./new\img-5\output
已創建或已存在資料夾: ./train-evaluate\img
已創建或已存在資料夾: ./train-evaluate\model
已創建或已存在資料夾: ./train-evaluate\output


In [8]:
import os
import sys
import torch
import random
import numpy as np
import csv
from PIL import Image
from torch import nn
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import torchvision.models as models  # 引入 ResNet18、MobileNetV3 和 EfficientNet
import torch.nn.functional as F

# 設定隨機種子以確保結果可重現
random.seed(0)
torch.manual_seed(0)
np.random.seed(0)

# 定義圖片的目標大小，所有圖片將縮放到此大小
IMAGE_SIZE = (128, 32)  # 根據驗證碼圖片調整大小

# 定義圖片所在的資料夾
BASE_DIR = "./"  # 基礎目錄
PRETRAIN_DIR = os.path.join(BASE_DIR, "pretrain")
PRETRAIN_IMG_DIR = os.path.join(PRETRAIN_DIR, "img")
NEW_DIR = os.path.join(BASE_DIR, "new")
TRAIN_EVALUATE_DIR = os.path.join(BASE_DIR, "train-evaluate")
PREDICT_DIR = os.path.join(BASE_DIR, "predict")
os.makedirs(PREDICT_DIR, exist_ok=True)

# 定義資料保存的資料夾
# pretrain, new 和 train-evaluate 下的 model 和 output 資料夾
PRETRAIN_MODEL_DIR = os.path.join(PRETRAIN_DIR, "model")
PRETRAIN_OUTPUT_DIR = os.path.join(PRETRAIN_DIR, "output")
TRAIN_EVALUATE_MODEL_DIR = os.path.join(TRAIN_EVALUATE_DIR, "model")
TRAIN_EVALUATE_OUTPUT_DIR = os.path.join(TRAIN_EVALUATE_DIR, "output")

# 新增：分類模型保存的資料夾
CLASSIFIER_DIR = os.path.join(NEW_DIR, "classifier")
CLASSIFIER_MODEL_DIR = os.path.join(CLASSIFIER_DIR, "model")
CLASSIFIER_OUTPUT_DIR = os.path.join(CLASSIFIER_DIR, "output")
os.makedirs(CLASSIFIER_MODEL_DIR, exist_ok=True)
os.makedirs(CLASSIFIER_OUTPUT_DIR, exist_ok=True)

# 定義字母數字列表
ALPHA_NUMS = "abcdefghijklmnopqrstuvwxyz0123456789"
NUM_CLASSES = len(ALPHA_NUMS) + 1  # 加1是為了CTC的blank字符

# 定義驗證碼的位數範圍
MIN_DIGITS = 4
MAX_DIGITS = 6

# 檢查設備是否有GPU可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"使用設備: {device}")

# 建立字符與索引的映射
char_to_idx = {c: i + 1 for i, c in enumerate(ALPHA_NUMS)}  # 從1開始，0留給blank
idx_to_char = {i + 1: c for i, c in enumerate(ALPHA_NUMS)}
idx_to_char[0] = ''  # blank字符

def image_to_tensor(img):
    """
    將圖片轉換為Tensor
    :param img: PIL Image物件
    :return: 經過處理的Tensor
    """
    in_img = img.resize(IMAGE_SIZE)
    in_img = in_img.convert("L")  # 轉換為灰階圖
    arr = np.array(in_img)
    t = torch.from_numpy(arr).float()
    t = t.unsqueeze(0)  # 增加通道維度 [1, H, W]
    t = t / 255.0  # 進行歸一化
    return t

def code_to_indices(code):
    """
    將驗證碼字串轉換為索引列表
    :param code: 驗證碼字串
    :return: 索引列表
    """
    return [char_to_idx[c] for c in code.lower() if c in char_to_idx]

def prepare_data(image_dir, save_prefix, parent_dir, max_num=None):
    """
    準備訓練資料
    :param image_dir: 圖片所在的資料夾
    :param save_prefix: 保存資料的前綴（例如 'pretrain' 或 'new_img-1'）
    :param parent_dir: 'pretrain', 'new/img-N' 或 'train-evaluate'
    :param max_num: 要選擇的最大圖片數量（可選）
    :return: (train_path, valid_path, test_path) 或 None
    """
    if parent_dir == "pretrain":
        model_dir = PRETRAIN_MODEL_DIR
        output_dir = PRETRAIN_OUTPUT_DIR
    elif parent_dir.startswith("new/img-"):
        img_num = parent_dir.split('/')[-1]  # 例如 'img-3'
        model_dir = os.path.join(NEW_DIR, img_num, "model")
        output_dir = os.path.join(NEW_DIR, img_num, "output")
    elif parent_dir == "train-evaluate":
        model_dir = TRAIN_EVALUATE_MODEL_DIR
        output_dir = TRAIN_EVALUATE_OUTPUT_DIR
    else:
        print(f"未知的 parent_dir: {parent_dir}")
        return None

    # 創建保存資料的資料夾（自動創建缺失的資料夾）
    os.makedirs(model_dir, exist_ok=True)
    os.makedirs(output_dir, exist_ok=True)
    print(f"資料將保存到: 模型->{model_dir}, 輸出->{output_dir}")

    print(f"掃描圖片資料夾: {image_dir}")
    image_paths = []
    labels = []
    for root, dirs, files in os.walk(image_dir):
        for filename in files:
            if not (filename.lower().endswith(".jpg") or filename.lower().endswith(".png")):
                continue
            path = os.path.join(root, filename)
            code = os.path.splitext(filename)[0]
            if MIN_DIGITS <= len(code) <= MAX_DIGITS:
                if all(c in ALPHA_NUMS for c in code.lower()):
                    image_paths.append(path)
                    labels.append(code)
                else:
                    print(f"跳過包含無效字符的檔案: {filename}")

    print(f"找到 {len(image_paths)} 張符合條件的圖片。")

    # 檢查是否有資料
    if len(image_paths) == 0:
        print("未找到符合條件的圖片。請檢查圖片資料夾和命名格式。")
        return None

    # 如果指定了 max_num，則隨機選擇 max_num 張圖片
    if max_num is not None and max_num < len(image_paths):
        combined = list(zip(image_paths, labels))
        random.shuffle(combined)
        combined = combined[:max_num]
        image_paths, labels = zip(*combined)
        image_paths = list(image_paths)
        labels = list(labels)
        print(f"隨機選擇了 {max_num} 張圖片進行訓練。")
    else:
        print("使用所有符合條件的圖片進行訓練。")

    # 打亂資料
    data = list(zip(image_paths, labels))
    random.shuffle(data)

    # 划分資料集
    total_samples = len(data)
    train_size = int(0.8 * total_samples)
    valid_size = int(0.1 * total_samples)
    test_size = total_samples - train_size - valid_size

    train_data = data[:train_size]
    valid_data = data[train_size:train_size + valid_size]
    test_data = data[train_size + valid_size:]

    print(f"資料集划分: 訓練集 {len(train_data)}，驗證集 {len(valid_data)}，測試集 {len(test_data)}")

    # 保存資料到相應的資料夾
    train_path = os.path.join(model_dir, f"train_data_{save_prefix}.pt")
    valid_path = os.path.join(model_dir, f"valid_data_{save_prefix}.pt")
    test_path = os.path.join(model_dir, f"test_data_{save_prefix}.pt")

    torch.save(train_data, train_path)
    torch.save(valid_data, valid_path)
    torch.save(test_data, test_path)

    print(f"資料已保存為: {train_path}, {valid_path}, {test_path}")
    return train_path, valid_path, test_path

class CaptchaDataset(torch.utils.data.Dataset):
    """
    驗證碼資料集
    """
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        path, code = self.data[idx]
        try:
            with Image.open(path) as img:
                image = image_to_tensor(img)
        except Exception as e:
            print(f"無法打開圖片 {path}: {e}")
            # 跳過這個樣本，重新隨機選擇一個樣本
            return self.__getitem__(random.randint(0, len(self.data) - 1))
        label = code_to_indices(code)
        return image, torch.tensor(label, dtype=torch.long)

def collate_fn(batch):
    """
    用於DataLoader的collate_fn，處理不同長度的序列
    :param batch: 一批資料
    :return: 圖片Tensor，標籤Tensor，標籤長度Tensor
    """
    images, labels = zip(*batch)
    images = torch.stack(images)

    label_lengths = torch.tensor([len(label) for label in labels], dtype=torch.long)
    labels = torch.cat(labels)

    return images, labels, label_lengths

# ------------------- ResNet18、MobileNetV3 Small 和 EfficientNet 的 CRNN 模型 -------------------

class BasicBlock(nn.Module):
    """ResNet 使用的基礎塊"""
    expansion = 1 
    def __init__(self, channels_in, channels_out, stride):
        super().__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(channels_in, channels_out, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(channels_out))
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(channels_out, channels_out, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(channels_out))
    
        self.identity = nn.Sequential()
        if stride != 1 or  channels_in != channels_out * self.expansion:
            self.identity = nn.Sequential(
                nn.Conv2d(channels_in, channels_out * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(channels_out * self.expansion))
    
    def forward(self, x):
        tmp = self.conv1(x)
        tmp = nn.functional.relu(tmp)
        tmp = self.conv2(tmp)
        tmp += self.identity(x)
        y = nn.functional.relu(tmp)
        return y

class CustomCNN(nn.Module):
    """自定義的 CNN 模組，基於 BasicBlock 和 ResNet18 的 CNN 部分"""
    def __init__(self, block_type=BasicBlock):
        super(CustomCNN, self).__init__()
        self.previous_channels_out = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, self.previous_channels_out, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(self.previous_channels_out))
        
        self.layer1 = self._make_layer(block_type, channels_out=64, num_blocks=2, stride=1)
        self.layer2 = self._make_layer(block_type, channels_out=128, num_blocks=2, stride=2)
        self.layer3 = self._make_layer(block_type, channels_out=256, num_blocks=2, stride=2)
        self.layer4 = self._make_layer(block_type, channels_out=512, num_blocks=2, stride=2)
        
    def _make_layer(self, block_type, channels_out, num_blocks, stride):
        blocks = []
        
        blocks.append(block_type(self.previous_channels_out, channels_out, stride))
        self.previous_channels_out = channels_out * block_type.expansion
        for _ in range(num_blocks-1):
            blocks.append(block_type(self.previous_channels_out, self.previous_channels_out, 1))
            # 注意這裡不需要再乘以 expansion，因為 channels_out 已經考慮了 expansion
        return nn.Sequential(*blocks)
    
    def forward(self, x):
        x = self.conv1(x)
        x = nn.functional.relu(x)
      
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        return x

class MobileNetV3_CRNN(nn.Module):
    """
    基於 MobileNetV3 Small 的 CRNN 模型
    """
    def __init__(self, num_classes, hidden_size=256, num_gru_layers=2, bidirectional=True):
        super(MobileNetV3_CRNN, self).__init__()
        
        # 載入預訓練的 MobileNetV3 Small
        mobilenet = models.mobilenet_v3_small(pretrained=0)
        
        # 修改第一個卷積層以接受單通道輸入
        self.encoder = mobilenet.features  # MobileNetV3 Small 的特徵提取部分
        self.encoder[0][0] = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1, bias=False)
        # 重新初始化修改後的第一層
        nn.init.kaiming_normal_(self.encoder[0][0].weight, mode='fan_out', nonlinearity='relu')
        
        # 調整下採樣因子：將 stride=2 的層改為 stride=1，減少下採樣
        for idx, layer in enumerate(self.encoder):
            if isinstance(layer, nn.Conv2d):
                if layer.stride == (2, 2):
                    self.encoder[idx].stride = (1, 1)
            elif isinstance(layer, nn.Sequential):
                for sub_idx, sub_layer in enumerate(layer):
                    if isinstance(sub_layer, nn.Conv2d) and sub_layer.stride == (2, 2):
                        layer[sub_idx].stride = (1, 1)
        
        # 定義 GRU 層，input_size 根據 MobileNetV3 Small 的最後一層特徵維度
        self.gru = nn.GRU(
            input_size=576,  # MobileNetV3 Small 的最後一層特徵維度
            hidden_size=hidden_size,
            num_layers=num_gru_layers,
            bidirectional=bidirectional,
            batch_first=True
        )
        
        # 定義全連接層
        self.fc = nn.Linear(hidden_size * 2 if bidirectional else hidden_size, num_classes)
    
    def forward(self, x):
        # 編碼器部分
        conv = self.encoder(x)  # [batch, 576, H/8, W/8]
        batch, channels, height, width = conv.size()
        
        # 使用自適應平均池化將高度縮減為1
        if height != 1:
            conv = F.adaptive_avg_pool2d(conv, (1, width))  # [batch, 576, 1, W/8]
        
        conv = conv.squeeze(2)  # [batch, 576, W/8]
        conv = conv.permute(0, 2, 1)  # [batch, W/8, 576]
        
        # GRU 層
        recurrent, _ = self.gru(conv)  # [batch, W/8, hidden_size * num_directions]
        
        # 全連接層
        output = self.fc(recurrent)    # [batch, W/8, num_classes]
        output = output.permute(1, 0, 2)  # [W/8, batch, num_classes]
        return output

class EfficientNet_CRNN(nn.Module):
    """
    基於 EfficientNet-B0 的 CRNN 模型
    """
    def __init__(self, num_classes, hidden_size=256, num_gru_layers=2, bidirectional=True):
        super(EfficientNet_CRNN, self).__init__()
        
        # 載入預訓練的 EfficientNet-B0
        efficientnet = models.efficientnet_b0(pretrained=0)
        
        # 修改第一個卷積層以接受單通道輸入
        self.encoder = efficientnet.features  # EfficientNet-B0 的特徵提取部分
        self.encoder[0][0] = nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1, bias=False)
        # 重新初始化修改後的第一層
        nn.init.kaiming_normal_(self.encoder[0][0].weight, mode='fan_out', nonlinearity='relu')
        
        # 調整下採樣因子：將 stride=2 的層改為 stride=1，減少下採樣
        # EfficientNet-B0 的總下採樣因子為 32，目標為 8，因此需要減少多個下採樣層
        # 通常，EfficientNet 的 Block 層可以透過修改 stride 來調整
        # 以下是一個簡化的示例，實際應根據模型結構進行調整
        total_downsampling = 1
        for idx, layer in enumerate(self.encoder):
            if isinstance(layer, nn.Conv2d):
                if layer.stride == (2, 2):
                    self.encoder[idx].stride = (1, 1)
                    total_downsampling /= 2
            elif isinstance(layer, nn.Sequential):
                for sub_idx, sub_layer in enumerate(layer):
                    if isinstance(sub_layer, nn.Conv2d) and sub_layer.stride == (2, 2):
                        layer[sub_idx].stride = (1, 1)
                        total_downsampling /= 2
        
        # 確保總下採樣因子為8
        # EfficientNet-B0 的初始下採樣因子為 32，經過修改後需要為 8
        # 需要仔細檢查和調整各層的 stride，這裡假設已經達成目標
        
        # 定義 GRU 層，input_size 根據 EfficientNet-B0 的最後一層特徵維度
        self.gru = nn.GRU(
            input_size=1280,  # EfficientNet-B0 的最後一層特徵維度
            hidden_size=hidden_size,
            num_layers=num_gru_layers,
            bidirectional=bidirectional,
            batch_first=True
        )
        
        # 定義全連接層
        self.fc = nn.Linear(hidden_size * 2 if bidirectional else hidden_size, num_classes)
    
    def forward(self, x):
        # 編碼器部分
        conv = self.encoder(x)  # [batch, 1280, H/?, W/?]
        batch, channels, height, width = conv.size()
        
        # 假設下採樣因子為8
        # 使用自適應平均池化將高度縮減為1
        if height != 1:
            conv = F.adaptive_avg_pool2d(conv, (1, width))  # [batch, 1280, 1, W/8]
        
        conv = conv.squeeze(2)  # [batch, 1280, W/8]
        conv = conv.permute(0, 2, 1)  # [batch, W/8, 1280]
        
        # GRU 層
        recurrent, _ = self.gru(conv)  # [batch, W/8, hidden_size * num_directions]
        
        # 全連接層
        output = self.fc(recurrent)    # [batch, W/8, num_classes]
        output = output.permute(1, 0, 2)  # [W/8, batch, num_classes]
        return output

# ------------------- CRNN_GRU 和 CRNN_GRU_MobileNetV3 和 CRNN_GRU_EfficientNet -------------------

class CRNN_GRU(nn.Module):
    def __init__(self, num_classes):
        super(CRNN_GRU, self).__init__()
        self.cnn = CustomCNN()
        self.gru = nn.GRU(
            input_size=512,
            hidden_size=256,
            num_layers=2,
            bidirectional=True,
            batch_first=True
        )
        self.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        conv = self.cnn(x)  # [batch, 512, H, W]
        _, _, H, W = conv.size()
        pool = F.adaptive_avg_pool2d(conv, (1, W))  # 將高度縮減到 1，寬度保持不變
        conv = pool.squeeze(2)  # [batch, 512, W]
        conv = conv.permute(0, 2, 1)  # [batch, W, 512]

        recurrent, _ = self.gru(conv)  # [batch, W, 512]
        output = self.fc(recurrent)    # [batch, W, num_classes]
        output = output.permute(1, 0, 2)  # [W, batch, num_classes]
        return output

class CRNN_GRU_MobileNetV3(nn.Module):
    def __init__(self, num_classes):
        super(CRNN_GRU_MobileNetV3, self).__init__()
        self.cnn = MobileNetV3_CRNN(num_classes=num_classes)
        # 不再定義 GRU 和 FC 層，因為它們已在 MobileNetV3_CRNN 中定義
    
    def forward(self, x):
        conv = self.cnn(x)  # [W/8, batch, num_classes]
        return conv

class CRNN_GRU_EfficientNet(nn.Module):
    def __init__(self, num_classes):
        super(CRNN_GRU_EfficientNet, self).__init__()
        self.cnn = EfficientNet_CRNN(num_classes=num_classes)
        # 不再定義 GRU 和 FC 層，因為它們已在 EfficientNet_CRNN 中定義
    
    def forward(self, x):
        conv = self.cnn(x)  # [W/8, batch, num_classes]
        return conv

# ------------------- 其餘函數保持不變 -------------------

def decode_predictions(preds):
    """
    將模型預測的輸出轉換為字串
    :param preds: 模型預測結果
    :return: 預測的字串列表
    """
    preds = preds.permute(1, 0, 2)  # [batch, seq_len, num_classes]
    preds = torch.argmax(preds, dim=2)  # [batch, seq_len]
    preds = preds.cpu().numpy()

    decoded_strings = []
    for pred in preds:
        chars = []
        prev_char_idx = None
        for idx in pred:
            if idx != prev_char_idx and idx != 0:
                chars.append(idx_to_char.get(idx, ''))
            prev_char_idx = idx
        decoded_strings.append(''.join(chars))
    return decoded_strings

def calculate_accuracy(model, data_loader):
    """
    計算模型在資料集上的準確率
    :param model: 訓練好的模型
    :param data_loader: 資料加載器
    :return: 準確率
    """
    model.eval()
    total_count = 0
    correct_count = 0

    with torch.no_grad():
        for images, labels, label_lengths in data_loader:
            images = images.to(device)
            outputs = model(images)  # [seq_len, batch, num_classes]
            outputs = outputs.log_softmax(2)
            preds = outputs.detach().cpu()
            pred_strings = decode_predictions(preds)

            labels = labels.cpu().numpy()
            label_lengths = label_lengths.cpu().numpy()

            batch_size = images.size(0)
            total_count += batch_size

            label_strings = []
            index = 0
            for length in label_lengths:
                label = labels[index:index + length]
                label_str = ''.join([idx_to_char.get(idx, '') for idx in label])
                label_strings.append(label_str)
                index += length

            for pred_str, label_str in zip(pred_strings, label_strings):
                if pred_str == label_str:
                    correct_count += 1

    accuracy = correct_count / total_count
    return accuracy

# ------------------- 分類模型的定義保持不變 -------------------

class ClassifierCNN(nn.Module):
    def __init__(self, num_classes):
        super(ClassifierCNN, self).__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # [batch, 32, H, W]
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # [batch, 32, H/2, W/2]
            
            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # [batch, 64, H/2, W/2]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # [batch, 64, H/4, W/4]
            
            nn.Conv2d(64, 128, kernel_size=3, padding=1),  # [batch, 128, H/4, W/4]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)  # [batch, 128, H/8, W/8]
        )
        self.fc = nn.Sequential(
            nn.Linear(128 * (IMAGE_SIZE[0] // 8) * (IMAGE_SIZE[1] // 8), 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x):
        x = self.cnn(x)  # [batch, 128, H/8, W/8]
        x = x.view(x.size(0), -1)  # [batch, 128 * H/8 * W/8]
        x = self.fc(x)  # [batch, num_classes]
        return x

def train_classifier():
    """
    訓練分類模型的函數
    """
    # 收集所有 img-N 資料夾
    img_folders = [f for f in os.listdir(NEW_DIR) if os.path.isdir(os.path.join(NEW_DIR, f)) and f.startswith("img-")]
    if not img_folders:
        print("沒有找到任何 img-N 資料夾。請先準備資料。")
        return

    # 準備訓練資料
    train_data = []
    labels = []
    for idx, folder in enumerate(img_folders):
        img_dir = os.path.join(NEW_DIR, folder, "img")
        # 選取部分圖片
        total_images = 0
        for root, dirs, files in os.walk(img_dir):
            for filename in files:
                if filename.lower().endswith(".jpg") or filename.lower().endswith(".png"):
                    code = os.path.splitext(filename)[0]
                    if MIN_DIGITS <= len(code) <= MAX_DIGITS and all(c in ALPHA_NUMS for c in code.lower()):
                        total_images += 1
        print(f"資料夾 '{img_dir}' 中共有 {total_images} 張符合條件的圖片。")
        try:
            user_input = input(f"請輸入要用於訓練分類模型的圖片數量（最大 {total_images}）： ").strip()
            desired_num = int(user_input)
            if desired_num <= 0 or desired_num > total_images:
                print("無效的數量。將跳過此資料夾。")
                continue
            else:
                print(f"將從 '{img_dir}' 中隨機選取 {desired_num} 張圖片進行訓練。")
        except ValueError:
            print("無效的輸入。將跳過此資料夾。")
            continue
        
        selected_images = []
        for root, dirs, files in os.walk(img_dir):
            for filename in files:
                if filename.lower().endswith(".jpg") or filename.lower().endswith(".png"):
                    code = os.path.splitext(filename)[0]
                    if MIN_DIGITS <= len(code) <= MAX_DIGITS and all(c in ALPHA_NUMS for c in code.lower()):
                        selected_images.append(os.path.join(root, filename))
        if len(selected_images) < desired_num:
            print(f"資料夾 '{img_dir}' 中符合條件的圖片少於 {desired_num} 張。將使用所有圖片。")
            desired_num = len(selected_images)
        random.shuffle(selected_images)
        selected_images = selected_images[:desired_num]
        for img_path in selected_images:
            train_data.append((img_path, idx))  # 標籤為資料夾的索引
            labels.append(idx)
    
    if not train_data:
        print("沒有足夠的資料來訓練分類模型。")
        return

    # 建立資料集和資料加載器
    class ClassifierDataset(torch.utils.data.Dataset):
        def __init__(self, data):
            self.data = data

        def __len__(self):
            return len(self.data)

        def __getitem__(self, idx):
            path, label = self.data[idx]
            try:
                with Image.open(path) as img:
                    image = image_to_tensor(img)
            except Exception as e:
                print(f"無法打開圖片 {path}: {e}")
                return self.__getitem__(random.randint(0, len(self.data) - 1))
            return image, torch.tensor(label, dtype=torch.long)

    classifier_dataset = ClassifierDataset(train_data)
    classifier_loader = torch.utils.data.DataLoader(
        classifier_dataset, batch_size=32, shuffle=True, collate_fn=lambda batch: (torch.stack([b[0] for b in batch]), torch.stack([b[1] for b in batch]))
    )

    # 定義分類模型
    num_classes = len(img_folders)
    classifier_model = ClassifierCNN(num_classes=num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(classifier_model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    # 訓練分類模型
    num_epochs = 20
    best_accuracy = 0.0
    for epoch in range(1, num_epochs + 1):
        classifier_model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in classifier_loader:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = classifier_model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        epoch_loss = running_loss / total
        epoch_acc = correct / total
        print(f"Epoch {epoch}/{num_epochs} - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc * 100:.2f}%")

        # 儲存最佳模型
        if epoch_acc > best_accuracy:
            best_accuracy = epoch_acc
            torch.save(classifier_model.state_dict(), os.path.join(CLASSIFIER_MODEL_DIR, "classifier_model.pt"))
            print("最佳模型已保存。")
        
        scheduler.step()

    print(f"分類模型訓練完成，最佳準確率: {best_accuracy * 100:.2f}%")

# ------------------- 通用訓練函數 -------------------

def train_model(train_loader, valid_loader, save_model_path, output_dir, backbone='resnet18', num_epochs=50, pretrained_model_path=None):
    """
    通用訓練函數
    :param train_loader: 訓練資料加載器
    :param valid_loader: 驗證資料加載器
    :param save_model_path: 模型保存路徑
    :param output_dir: 輸出資料夾路徑
    :param backbone: 使用的 Backbone ('resnet18'、'mobilenetv3' 或 'efficientnet')
    :param num_epochs: 訓練輪數
    :param pretrained_model_path: 預訓練模型路徑（可選）
    :return: train_losses, valid_losses, train_accuracies, valid_accuracies
    """
    # 根據 Backbone 選擇模型
    if backbone == 'resnet18':
        model = CRNN_GRU(num_classes=NUM_CLASSES).to(device)
    elif backbone == 'mobilenetv3':
        model = CRNN_GRU_MobileNetV3(num_classes=NUM_CLASSES).to(device)
    elif backbone == 'efficientnet':
        model = CRNN_GRU_EfficientNet(num_classes=NUM_CLASSES).to(device)
    else:
        print(f"未知的 backbone: {backbone}")
        return

    # 如果提供了預訓練模型路徑，且檔案存在，則載入模型權重
    if pretrained_model_path and os.path.exists(pretrained_model_path):
        model.load_state_dict(torch.load(pretrained_model_path, map_location=device))
        print(f"載入預訓練模型: {pretrained_model_path}")
    else:
        print("從頭開始訓練模型。")

    # 全面微調：確保所有層參與訓練
    for param in model.parameters():
        param.requires_grad = True

    # 使用較小的學習率
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    
    # 使用學習率調度器
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, min_lr=1e-6)
    
    criterion = nn.CTCLoss(blank=0, reduction='mean', zero_infinity=True)

    best_valid_accuracy = 0.0  # 初始最佳驗證準確率
    patience = 10  # Early Stopping的耐心值，即容忍多少個epoch沒有提升
    patience_counter = 0  # 記錄驗證準確率沒有改善的epoch次數

    # 記錄損失和準確率
    train_losses = []
    valid_losses = []
    train_accuracies = []
    valid_accuracies = []

    # 確保保存目錄存在
    os.makedirs(os.path.dirname(save_model_path), exist_ok=True)

    # 打開CSV檔案以寫入模式
    csv_filename = os.path.join(
        output_dir,  # 使用相應的 output 資料夾
        f'train_val_results_{os.path.splitext(os.path.basename(save_model_path))[0]}.csv'
    )
    os.makedirs(os.path.dirname(csv_filename), exist_ok=True)
    with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
        csvwriter = csv.writer(csvfile)
        # 寫入表頭
        csvwriter.writerow(['Epoch', 'train loss', 'val loss', 'train accu', 'val accu'])

        for epoch in range(1, num_epochs + 1):
            model.train()
            total_loss = 0.0
            for images, labels, label_lengths in train_loader:
                images = images.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                outputs = model(images)  # [seq_len, batch, num_classes]

                input_lengths = torch.full(
                    size=(images.size(0),),
                    fill_value=outputs.size(0),
                    dtype=torch.long
                ).to(device)
                loss = criterion(outputs, labels, input_lengths, label_lengths)

                loss.backward()
                
                # 梯度裁剪以防止梯度爆炸
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)

                optimizer.step()

                total_loss += loss.item()

            avg_train_loss = total_loss / len(train_loader)

            # 計算訓練集準確率
            train_accuracy = calculate_accuracy(model, train_loader)
            # 計算驗證集損失和準確率
            model.eval()
            total_loss = 0.0
            with torch.no_grad():
                for images, labels, label_lengths in valid_loader:
                    images = images.to(device)
                    labels = labels.to(device)

                    outputs = model(images)
                    input_lengths = torch.full(
                        size=(images.size(0),),
                        fill_value=outputs.size(0),
                        dtype=torch.long
                    ).to(device)
                    loss = criterion(outputs, labels, input_lengths, label_lengths)

                    total_loss += loss.item()

            avg_valid_loss = total_loss / len(valid_loader)
            valid_accuracy = calculate_accuracy(model, valid_loader)

            # 記錄每個epoch的損失和準確率
            train_losses.append(avg_train_loss)
            valid_losses.append(avg_valid_loss)
            train_accuracies.append(train_accuracy)
            valid_accuracies.append(valid_accuracy)

            # 將結果寫入CSV檔案
            csvwriter.writerow([epoch, avg_train_loss, avg_valid_loss, train_accuracy, valid_accuracy])

            print(f"Epoch [{epoch}/{num_epochs}]")
            print(f"train loss: {avg_train_loss:.4f}, train accu: {train_accuracy * 100:.2f}%")
            print(f"val loss: {avg_valid_loss:.4f}, val accu: {valid_accuracy * 100:.2f}%")
            scheduler.step(avg_valid_loss)

            # Early Stopping檢查（基於驗證集準確率）
            if valid_accuracy > best_valid_accuracy:
                best_valid_accuracy = valid_accuracy
                patience_counter = 0  # 重置耐心計數器
                torch.save(model.state_dict(), save_model_path)
                print("驗證準確率提升，模型已保存。")
            else:
                patience_counter += 1
                print(f"驗證準確率未提升，耐心計數器: {patience_counter}/{patience}")

            if patience_counter >= patience:
                print("因為驗證準確率長期未提升，提前停止訓練。")
                break  # 提前停止訓練

    return train_losses, valid_losses, train_accuracies, valid_accuracies


def plot_training_curves(train_losses, valid_losses, train_accuracies, valid_accuracies):
    """
    繪製訓練和驗證的損失及準確率曲線
    """
    epochs = range(1, len(train_losses) + 1)

    plt.figure(figsize=(12, 5))

    # 損失曲線
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, 'b-', label='Training Loss')  
    plt.plot(epochs, valid_losses, 'orange', label='Validation Loss')  
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # 準確率曲線
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accuracies, 'g-', label='Train Accuracy')  
    plt.plot(epochs, valid_accuracies, 'r-', label='Validation Accuracy')  
    plt.title('Train and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()


def plot_from_csv(csv_path):
    """
    從CSV文件中讀取訓練和驗證的損失及準確率，並繪製曲線
    """
    epochs = []
    train_losses = []
    valid_losses = []
    train_accuracies = []
    valid_accuracies = []

    with open(csv_path, 'r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            epochs.append(int(row['Epoch']))
            train_losses.append(float(row['train loss']))
            valid_losses.append(float(row['val loss']))
            train_accuracies.append(float(row['train accu']))
            valid_accuracies.append(float(row['val accu']))

    plot_training_curves(train_losses, valid_losses, train_accuracies, valid_accuracies)

def evaluate_model(save_model_path, test_data_path, backbone='resnet18'):
    """
    評估模型並生成混淆矩陣
    :param save_model_path: 模型檔案路徑
    :param test_data_path: 測試資料集路徑
    :param backbone: 使用的 Backbone ('resnet18'、'mobilenetv3' 或 'efficientnet')
    """
    # 載入測試資料
    if not os.path.exists(test_data_path):
        print(f"測試資料檔案 '{test_data_path}' 不存在。請先準備資料。")
        return

    test_data = torch.load(test_data_path)
    test_dataset = CaptchaDataset(test_data)
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn
    )

    # 根據 Backbone 選擇模型
    if backbone == 'resnet18':
        model = CRNN_GRU(num_classes=NUM_CLASSES).to(device)
    elif backbone == 'mobilenetv3':
        model = CRNN_GRU_MobileNetV3(num_classes=NUM_CLASSES).to(device)
    elif backbone == 'efficientnet':
        model = CRNN_GRU_EfficientNet(num_classes=NUM_CLASSES).to(device)
    else:
        print(f"未知的 backbone: {backbone}")
        return

    # 載入模型
    try:
        model.load_state_dict(torch.load(save_model_path, map_location=device))
        print(f"已載入模型: {save_model_path}")
    except FileNotFoundError:
        print(f"模型檔案 '{save_model_path}' 未找到。請先訓練模型。")
        return
    model.eval()

    total_sequences = 0  # 總序列數
    correct_sequences = 0  # 完全正確的序列數

    total_chars = 0  # 總字元數
    correct_chars = 0  # 正確字元數

    all_true_chars = []
    all_pred_chars = []

    with torch.no_grad():
        for images, labels, label_lengths in test_loader:
            images = images.to(device)
            outputs = model(images)  # [seq_len, batch, num_classes]
            outputs = outputs.log_softmax(2)
            preds = outputs.detach().cpu()
            pred_strings = decode_predictions(preds)

            labels = labels.cpu().numpy()
            label_lengths = label_lengths.cpu().numpy()

            batch_size = images.size(0)
            total_sequences += batch_size  # 增加序列計數

            label_strings = []
            index = 0
            for length in label_lengths:
                label = labels[index:index + length]
                label_str = ''.join([idx_to_char.get(idx, '') for idx in label])
                label_strings.append(label_str)
                total_chars += length  # 增加字符計數
                index += length

            for pred_str, label_str in zip(pred_strings, label_strings):
                if pred_str == label_str:
                    correct_sequences += 1  # 完全正確的序列計數

                # 逐字符比較，計算字符準確率
                min_len = min(len(pred_str), len(label_str))
                for i in range(min_len):
                    if pred_str[i] == label_str[i]:
                        correct_chars += 1  # 增加正確字符計數
                    all_true_chars.append(label_str[i])
                    all_pred_chars.append(pred_str[i])

    # 計算準確率
    char_accuracy = correct_chars / total_chars if total_chars > 0 else 0
    seq_accuracy = correct_sequences / total_sequences if total_sequences > 0 else 0

    print(f"字符級準確率: {char_accuracy * 100:.2f}% (正確: {correct_chars}/{total_chars})")
    print(f"序列級準確率: {seq_accuracy * 100:.2f}% (正確: {correct_sequences}/{total_sequences})")

    # 生成混淆矩陣並轉換為百分比
    cm = confusion_matrix(all_true_chars, all_pred_chars, labels=list(ALPHA_NUMS))
    cm_percentage = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] * 100  # 將每行轉換為百分比

    # 繪製混淆矩陣
    plt.figure(figsize=(20, 14))  # 圖形大小
    sns.heatmap(
        cm_percentage,
        annot=True,
        fmt='.2f',
        cmap='Blues',
        xticklabels=list(ALPHA_NUMS),
        yticklabels=list(ALPHA_NUMS),
        annot_kws={"size": 6},  # 註釋字體大小
        cbar_kws={"shrink": 0.8}  # 顏色條
    )
    plt.xlabel('Predicted Characters ', fontsize=16)  # 字體
    plt.ylabel('True Characters', fontsize=16)
    plt.title('Confusion Matrix (Percentage)', fontsize=18)
    plt.xticks(fontsize=14, rotation=0)  # 旋轉x軸標籤 字體大小
    plt.yticks(fontsize=14, rotation=0)   # 旋轉y軸標籤 字體大小
    plt.tight_layout()
    plt.show()  # 顯示混淆矩陣圖

    print("混淆矩陣已顯示。")

def main():
    """
    主函數，處理不同的操作模式
    """
    operation = input("請輸入模式 (prepare|train|train_classifier|evaluate|predict|plot): ").strip().lower()

    if operation == "prepare":
        # 收集所有可準備的資料夾選項
        prepare_options = []

        # 選項 1: pretrain/img
        prepare_options.append({
            "path": PRETRAIN_IMG_DIR,
            "save_prefix": "pretrain",
            "parent_dir": "pretrain"
        })

        # 選項 2-N: new/img-N/img
        img_folders = [f for f in os.listdir(NEW_DIR) if os.path.isdir(os.path.join(NEW_DIR, f)) and f.startswith("img-")]
        if not img_folders:
            print("尚未建立任何 img-N 資料夾。")
        else:
            for folder in img_folders:
                prepare_options.append({
                    "path": os.path.join(NEW_DIR, folder, "img"),
                    "save_prefix": f"new_{folder}",
                    "parent_dir": f"new/{folder}"
                })

        # 選項 N+1: train-evaluate/img
        prepare_options.append({
            "path": os.path.join(TRAIN_EVALUATE_DIR, "img"),
            "save_prefix": "train-evaluate",
            "parent_dir": "train-evaluate"
        })

        # 顯示所有選項
        print("可準備的資料夾:")
        for idx, option in enumerate(prepare_options, 1):
            print(f"{idx}. {option['path']}")

        # 輸入選擇
        try:
            source_choice = int(input(f"請選擇要 prepare 的資料夾編號（1-{len(prepare_options)}）： ").strip()) - 1
            if source_choice < 0 or source_choice >= len(prepare_options):
                raise ValueError
            selected_option = prepare_options[source_choice]
            selected_dir = selected_option["path"]
            save_prefix = selected_option["save_prefix"]
            parent_dir = selected_option["parent_dir"]
            print(f"選擇的資料來源: {selected_dir}")
        except (ValueError, IndexError):
            print("無效的選擇。")
            return

        # 準備資料
        prepare_result = prepare_data(selected_dir, save_prefix, parent_dir)
        if prepare_result:
            print("資料準備完成。")

    elif operation == "train":
        # 選擇是否使用預訓練模型
        use_pretrained = input("是否使用預訓練模型進行訓練？ (y/n)： ").strip().lower()
        if use_pretrained == 'y':
            # 從 pretrain 資料夾中選擇預訓練模型
            pretrained_models = [f for f in os.listdir(PRETRAIN_MODEL_DIR) if f.startswith("crnn_model") and f.endswith(".pt")]
            if not pretrained_models:
                print("在 pretrain/model 資料夾中未找到任何預訓練模型。請先訓練或準備預訓練模型。")
                return
            print("可用的預訓練模型:")
            for idx, file in enumerate(pretrained_models, 1):
                path = os.path.join(PRETRAIN_MODEL_DIR, file)
                print(f"{idx}. {path}")
            try:
                model_choice = int(input(f"請選擇要載入的預訓練模型編號（1-{len(pretrained_models)}）： ").strip()) - 1
                if model_choice < 0 or model_choice >= len(pretrained_models):
                    raise ValueError
                selected_pretrained_model = os.path.join(PRETRAIN_MODEL_DIR, pretrained_models[model_choice])
            except (ValueError, IndexError):
                print("無效的選擇。")
                return
        else:
            selected_pretrained_model = None

        # 選擇使用的 Backbone
        print("請選擇使用的 Backbone 模型：")
        print("1. ResNet-18")
        print("2. MobileNetV3 Small")
        print("3. EfficientNet-B0")
        try:
            backbone_choice = int(input("請輸入選項編號（1、2 或 3）： ").strip())
            if backbone_choice == 1:
                backbone = 'resnet18'
            elif backbone_choice == 2:
                backbone = 'mobilenetv3'
            elif backbone_choice == 3:
                backbone = 'efficientnet'
            else:
                raise ValueError
            print(f"選擇的 Backbone：{backbone}")
        except ValueError:
            print("無效的選擇。")
            return

        # 列出可訓練的資料夾
        train_options = []

        # 選項 1: pretrain/img
        train_options.append({
            "path": PRETRAIN_IMG_DIR,
            "save_prefix": "pretrain",
            "parent_dir": "pretrain"
        })

        # 選項 2-N: new/img-N/img
        img_folders = [f for f in os.listdir(NEW_DIR) if os.path.isdir(os.path.join(NEW_DIR, f)) and f.startswith("img-")]
        if not img_folders:
            print("沒有找到任何 img-N 資料夾。請先準備資料。")
            return
        else:
            for folder in img_folders:
                train_options.append({
                    "path": os.path.join(NEW_DIR, folder, "img"),
                    "save_prefix": f"new_{folder}",
                    "parent_dir": f"new/{folder}"
                })

        # 選項 N+1: train-evaluate/img
        train_options.append({
            "path": os.path.join(TRAIN_EVALUATE_DIR, "img"),
            "save_prefix": "train-evaluate",
            "parent_dir": "train-evaluate"
        })

        # 顯示所有選項
        print("可訓練的資料夾:")
        for idx, option in enumerate(train_options, 1):
            print(f"{idx}. {option['path']}")

        # 輸入選擇
        try:
            train_choice = int(input(f"請選擇要訓練的資料夾編號（1-{len(train_options)}）： ").strip()) - 1
            if train_choice < 0 or train_choice >= len(train_options):
                raise ValueError
            selected_train_option = train_options[train_choice]
            selected_dir = selected_train_option["path"]
            save_prefix = selected_train_option["save_prefix"]
            parent_dir = selected_train_option["parent_dir"]
            print(f"選擇的資料來源: {selected_dir}")
        except (ValueError, IndexError):
            print("無效的選擇。")
            return

        # 如果選擇的是 'new/img-N'，則詢問要訓練的圖片數量
        max_num = None
        if parent_dir.startswith("new/img-"):
            img_num = parent_dir.split('/')[-1]  # 例如 'img-3'
            total_images = 0
            for root, dirs, files in os.walk(selected_dir):
                for filename in files:
                    if filename.lower().endswith(".jpg") or filename.lower().endswith(".png"):
                        code = os.path.splitext(filename)[0]
                        if MIN_DIGITS <= len(code) <= MAX_DIGITS and all(c in ALPHA_NUMS for c in code.lower()):
                            total_images += 1
            print(f"資料夾 '{selected_dir}' 中共有 {total_images} 張符合條件的圖片。")
            try:
                user_input = input(f"請輸入要用於訓練的圖片數量（最大 {total_images}，留空則使用所有）： ").strip()
                if user_input:
                    desired_num = int(user_input)
                    if desired_num <= 0 or desired_num > total_images:
                        print("無效的數量。將使用所有圖片。")
                    else:
                        max_num = desired_num
                        print(f"將隨機選擇 {max_num} 張圖片進行訓練。")
            except ValueError:
                print("無效的輸入。將使用所有圖片。")

        # 準備資料
        prepare_result = prepare_data(selected_dir, save_prefix, parent_dir, max_num=max_num)
        if prepare_result is None:
            print("資料準備失敗，訓練終止。")
            return

        train_path, valid_path, _ = prepare_result  # 測試集路徑在訓練中不需要

        # 創建資料集和資料加載器
        train_data = torch.load(train_path)
        valid_data = torch.load(valid_path)

        train_dataset = CaptchaDataset(train_data)
        valid_dataset = CaptchaDataset(valid_data)

        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn
        )
        valid_loader = torch.utils.data.DataLoader(
            valid_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn
        )

        # 定義模型保存路徑
        if parent_dir == "pretrain":
            model_save_filename = f"crnn_model-{backbone}-gru_pretrain.pt"
            model_save_path = os.path.join(PRETRAIN_MODEL_DIR, model_save_filename)
            output_dir = PRETRAIN_OUTPUT_DIR
        elif parent_dir == "train-evaluate":
            model_save_filename = f"crnn_model-{backbone}-gru_train-evaluate.pt"
            model_save_path = os.path.join(TRAIN_EVALUATE_MODEL_DIR, model_save_filename)
            output_dir = TRAIN_EVALUATE_OUTPUT_DIR
        else:
            # 將模型保存到 'new/img-N/model' 資料夾
            img_num = parent_dir.split('/')[-1]  # 例如 'img-3'
            model_save_filename = f"crnn_model-{backbone}-gru_{img_num}.pt"
            model_save_path = os.path.join(NEW_DIR, img_num, "model", model_save_filename)
            output_dir = os.path.join(NEW_DIR, img_num, "output")

            # 確保模型保存資料夾和輸出資料夾存在
            os.makedirs(os.path.join(NEW_DIR, img_num, "model"), exist_ok=True)
            os.makedirs(os.path.join(NEW_DIR, img_num, "output"), exist_ok=True)

        # 訓練模型並獲取損失和準確率數據
        train_losses, valid_losses, train_accuracies, valid_accuracies = train_model(
            train_loader,
            valid_loader,
            save_model_path=model_save_path,
            output_dir=output_dir,
            backbone=backbone,
            num_epochs=120,
            pretrained_model_path=selected_pretrained_model
        )
        print(f"模型已保存為 '{model_save_path}'")

        # 繪製訓練和驗證的損失及準確率曲線
        plot_training_curves(train_losses, valid_losses, train_accuracies, valid_accuracies)

    elif operation == "train_classifier":
        """
        新增的分類模型訓練模式
        """
        train_classifier()

    elif operation == "evaluate":
        # 收集所有可評估的資料夾選項
        evaluate_options = []

        # 選項 1: pretrain/img
        evaluate_options.append({
            "data_dir": PRETRAIN_IMG_DIR,
            "model_dir": PRETRAIN_MODEL_DIR,
            "save_prefix": "pretrain"
        })

        # 選項 2-N: new/img-N/img
        img_folders = [f for f in os.listdir(NEW_DIR) if os.path.isdir(os.path.join(NEW_DIR, f)) and f.startswith("img-")]
        if not img_folders:
            print("尚未建立任何 img-N 資料夾。")
        else:
            for folder in img_folders:
                evaluate_options.append({
                    "data_dir": os.path.join(NEW_DIR, folder, "img"),
                    "model_dir": os.path.join(NEW_DIR, folder, "model"),
                    "save_prefix": f"new_{folder}"
                })

        # 選項 N+1: train-evaluate/img
        evaluate_options.append({
            "data_dir": os.path.join(TRAIN_EVALUATE_DIR, "img"),
            "model_dir": TRAIN_EVALUATE_MODEL_DIR,
            "save_prefix": "train-evaluate"
        })

        # 檢查是否有可評估的選項
        if not evaluate_options:
            print("沒有可評估的資料夾。請先準備資料。")
            return

        # 顯示所有選項
        print("可評估的資料夾:")
        for idx, option in enumerate(evaluate_options, 1):
            print(f"{idx}. {option['data_dir']}")

        # 輸入選擇
        try:
            evaluate_choice = int(input(f"請選擇要評估的資料夾編號（1-{len(evaluate_options)}）： ").strip()) - 1
            if evaluate_choice < 0 or evaluate_choice >= len(evaluate_options):
                raise ValueError
            selected_evaluate_option = evaluate_options[evaluate_choice]
            data_dir = selected_evaluate_option["data_dir"]
            model_dir = selected_evaluate_option["model_dir"]
            save_prefix = selected_evaluate_option["save_prefix"]
            print(f"選擇的資料來源: {data_dir}")
        except (ValueError, IndexError):
            print("無效的選擇。")
            return

        # 檢查模型資料夾是否存在
        if not os.path.exists(model_dir):
            print(f"模型資料夾 '{model_dir}' 不存在。請先訓練模型。")
            return

        # 列出所有可用的模型文件
        model_files = [f for f in os.listdir(model_dir) if f.startswith("crnn_model") and f.endswith(".pt")]
        if not model_files:
            print(f"在 '{model_dir}' 資料夾中未找到任何模型文件。請先訓練模型。")
            return

        # 顯示可用的模型文件
        print("可用的模型:")
        for idx, file in enumerate(model_files, 1):
            path = os.path.join(model_dir, file)
            print(f"{idx}. {path}")
        try:
            model_choice = int(input(f"請選擇要使用的模型編號（1-{len(model_files)}）： ").strip()) - 1
            if model_choice < 0 or model_choice >= len(model_files):
                raise ValueError
            selected_model_file = model_files[model_choice]
            selected_model_path = os.path.join(model_dir, selected_model_file)
        except (ValueError, IndexError):
            print("無效的選擇。")
            return

        # 根據模型文件名稱判斷 Backbone
        if 'resnet18' in selected_model_file:
            backbone = 'resnet18'
        elif 'mobilenetv3' in selected_model_file:
            backbone = 'mobilenetv3'
        elif 'efficientnet' in selected_model_file:
            backbone = 'efficientnet'
        else:
            print("無法識別模型的 Backbone 類型。請確認模型命名規則。")
            return

        # 準備測試資料路徑
        test_data_path = os.path.join(model_dir, f"test_data_{save_prefix}.pt")
        if not os.path.exists(test_data_path):
            print(f"測試資料檔案 '{test_data_path}' 不存在。請先準備資料。")
            return

        # 評估模型，傳遞 Backbone 參數
        evaluate_model(selected_model_path, test_data_path, backbone=backbone)

    elif operation == "predict":
        """
        修改後的預測模式，批次預測 'predict' 資料夾中的所有圖片，先分類再預測
        """
        # 檢查並獲取預測資料夾
        if not os.path.exists(PREDICT_DIR):
            print(f"預測資料夾 '{PREDICT_DIR}' 不存在。")
            return

        # 列出預測資料夾中的所有圖片
        image_files = [f for f in os.listdir(PREDICT_DIR) if f.lower().endswith(('.jpg', '.png'))]
        if not image_files:
            print(f"預測資料夾 '{PREDICT_DIR}' 中沒有任何圖片。")
            return

        # 載入分類模型
        classifier_model_path = os.path.join(CLASSIFIER_MODEL_DIR, "classifier_model.pt")
        if not os.path.exists(classifier_model_path):
            print(f"分類模型 '{classifier_model_path}' 不存在。請先訓練分類模型。")
            return

        # 獲取 img-N 資料夾列表
        img_folders = [f for f in os.listdir(NEW_DIR) if os.path.isdir(os.path.join(NEW_DIR, f)) and f.startswith("img-")]
        if not img_folders:
            print("沒有找到任何 img-N 資料夾。")
            return
        num_classes_classifier = len(img_folders)

        classifier_model = ClassifierCNN(num_classes=num_classes_classifier).to(device)
        classifier_model.load_state_dict(torch.load(classifier_model_path, map_location=device))
        classifier_model.eval()

        # 開啟一個CSV文件來保存預測結果
        predictions_csv = os.path.join(PREDICT_DIR, "predictions.csv")
        with open(predictions_csv, 'w', newline='', encoding='utf-8') as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(['Image', 'Source Folder', 'Predicted CAPTCHA'])

            # 批次預測所有圖片
            for image_file in image_files:
                image_path = os.path.join(PREDICT_DIR, image_file)
                try:
                    with Image.open(image_path) as img:
                        image_tensor = image_to_tensor(img).unsqueeze(0).to(device)  # [1, 1, H, W]
                except Exception as e:
                    print(f"無法打開圖片 {image_path}: {e}")
                    continue

                # 使用分類模型預測圖片來源
                with torch.no_grad():
                    outputs = classifier_model(image_tensor)  # [1, num_classes]
                    _, predicted = torch.max(outputs, 1)
                    predicted_class = predicted.item()

                # 獲取分類結果對應的 img-N 資料夾
                if predicted_class >= len(img_folders):
                    print(f"分類結果超出範圍 for image {image_file}.")
                    continue
                source_folder = img_folders[predicted_class]
                print(f"圖片 {image_file} 分類結果：來自 '{source_folder}' 資料夾。")

                # 載入對應的 OCR 模型
                # 根據 Backbone 的不同選擇不同的模型類別
                # 假設模型命名為 crnn_model-{backbone}-gru_{img_num}.pt
                ocr_model_files = [f for f in os.listdir(os.path.join(NEW_DIR, source_folder, "model")) if f.startswith("crnn_model") and f.endswith(".pt")]
                if not ocr_model_files:
                    print(f"OCR 模型資料夾 '{os.path.join(NEW_DIR, source_folder, 'model')}' 中未找到任何模型文件。請先訓練 OCR 模型。")
                    continue
                # 假設只有一個模型文件
                ocr_model_filename = ocr_model_files[0]
                ocr_model_path = os.path.join(NEW_DIR, source_folder, "model", ocr_model_filename)
                if not os.path.exists(ocr_model_path):
                    print(f"OCR 模型 '{ocr_model_path}' 不存在。請先訓練 OCR 模型。")
                    continue

                # 判斷 OCR 模型的 Backbone
                if 'resnet18' in ocr_model_filename:
                    ocr_backbone = 'resnet18'
                    ocr_model = CRNN_GRU(num_classes=NUM_CLASSES).to(device)
                elif 'mobilenetv3' in ocr_model_filename:
                    ocr_backbone = 'mobilenetv3'
                    ocr_model = CRNN_GRU_MobileNetV3(num_classes=NUM_CLASSES).to(device)
                elif 'efficientnet' in ocr_model_filename:
                    ocr_backbone = 'efficientnet'
                    ocr_model = CRNN_GRU_EfficientNet(num_classes=NUM_CLASSES).to(device)
                else:
                    print(f"OCR 模型 '{ocr_model_filename}' 的 Backbone 未知。請確認模型命名規則。")
                    continue

                ocr_model.load_state_dict(torch.load(ocr_model_path, map_location=device))
                ocr_model.eval()

                # 預測驗證碼內容
                with torch.no_grad():
                    ocr_output = ocr_model(image_tensor)  # [seq_len, batch, num_classes]
                    ocr_output = ocr_output.log_softmax(2)
                    preds = ocr_output.detach().cpu()
                    pred_strings = decode_predictions(preds)

                # 輸出並保存預測結果
                predicted_captcha = pred_strings[0]
                print(f"圖片 {image_file} 預測的驗證碼內容：{predicted_captcha}")
                csvwriter.writerow([image_file, source_folder, predicted_captcha])

        print(f"所有預測結果已保存至 '{predictions_csv}'")

    elif operation == "plot":
        # 列出所有 CSV 結果文件
        # 包含 pretrain/output、new/img-N/output 和 train-evaluate/output 資料夾
        evaluate_options = []

        # 選項 1: pretrain/output
        evaluate_options.append(os.path.join(PRETRAIN_OUTPUT_DIR))

        # 選項 2-N: new/img-N/output
        img_folders = [f for f in os.listdir(NEW_DIR) if os.path.isdir(os.path.join(NEW_DIR, f)) and f.startswith("img-")]
        for folder in img_folders:
            evaluate_options.append(os.path.join(NEW_DIR, folder, "output"))

        # 選項 N+1: train-evaluate/output
        evaluate_options.append(os.path.join(TRAIN_EVALUATE_OUTPUT_DIR))

        # 收集所有 CSV 文件
        available_csvs = []
        for output_dir in evaluate_options:
            if os.path.exists(output_dir):
                available_csvs += [
                    os.path.join(output_dir, f) 
                    for f in os.listdir(output_dir) 
                    if f.startswith("train_val_results") and f.endswith(".csv")
                ]

        if not available_csvs:
            print("沒有找到任何訓練結果的 CSV 文件。請先訓練模型。")
            return

        # 顯示可用的 CSV 文件
        print("可用的訓練結果 CSV 文件:")
        for idx, file in enumerate(available_csvs, 1):
            print(f"{idx}. {file}")
        try:
            csv_choice = int(input(f"請選擇要繪製的 CSV 文件編號（1-{len(available_csvs)}）： ").strip()) - 1
            if csv_choice < 0 or csv_choice >= len(available_csvs):
                raise ValueError
            selected_csv_file = available_csvs[csv_choice]
            selected_csv_path = selected_csv_file
        except (ValueError, IndexError):
            print("無效的選擇。")
            return

        # 繪製曲線
        plot_from_csv(selected_csv_path)

    else:
        print(f"不支援的操作: {operation}")
        sys.exit(1)   

if __name__ == "__main__":
    main()


使用設備: cuda
請選擇使用的 Backbone 模型：
1. ResNet-18
2. MobileNetV3 Small
3. EfficientNet-B0
選擇的 Backbone：efficientnet
可訓練的資料夾:
1. ./pretrain\img
2. ./new\img-1\img
3. ./new\img-2\img
4. ./new\img-3\img
5. ./new\img-4\img
6. ./new\img-5\img
7. ./train-evaluate\img
選擇的資料來源: ./train-evaluate\img
資料將保存到: 模型->./train-evaluate\model, 輸出->./train-evaluate\output
掃描圖片資料夾: ./train-evaluate\img
找到 19909 張符合條件的圖片。
使用所有符合條件的圖片進行訓練。
資料集划分: 訓練集 15927，驗證集 1990，測試集 1992
資料已保存為: ./train-evaluate\model\train_data_train-evaluate.pt, ./train-evaluate\model\valid_data_train-evaluate.pt, ./train-evaluate\model\test_data_train-evaluate.pt


  train_data = torch.load(train_path)
  valid_data = torch.load(valid_path)


從頭開始訓練模型。
Epoch [1/120]
train loss: 3.5292, train accu: 0.00%
val loss: 3.7019, val accu: 0.00%
驗證準確率未提升，耐心計數器: 1/10
Epoch [2/120]
train loss: 3.6885, train accu: 0.00%
val loss: 3.6716, val accu: 0.00%
驗證準確率未提升，耐心計數器: 2/10
Epoch [3/120]
train loss: 3.6590, train accu: 0.00%
val loss: 3.6332, val accu: 0.00%
驗證準確率未提升，耐心計數器: 3/10
Epoch [4/120]
train loss: 3.5303, train accu: 0.00%
val loss: 3.4159, val accu: 0.00%
驗證準確率未提升，耐心計數器: 4/10
Epoch [5/120]
train loss: 3.3295, train accu: 0.00%
val loss: 3.2510, val accu: 0.00%
驗證準確率未提升，耐心計數器: 5/10
Epoch [6/120]
train loss: 3.1833, train accu: 0.01%
val loss: 3.1138, val accu: 0.00%
驗證準確率未提升，耐心計數器: 6/10
Epoch [7/120]
train loss: 3.0103, train accu: 0.09%
val loss: 2.9557, val accu: 0.05%
驗證準確率提升，模型已保存。
Epoch [8/120]
train loss: 2.7972, train accu: 0.35%
val loss: 2.7721, val accu: 0.05%
驗證準確率未提升，耐心計數器: 1/10
Epoch [9/120]
train loss: 2.5863, train accu: 0.71%
val loss: 2.5925, val accu: 0.25%
驗證準確率提升，模型已保存。
Epoch [10/120]
train loss: 2.3940, trai

KeyboardInterrupt: 