## Directory Structure

```plaintext

.
├── pretrain/  fanbyprinciple/captcha-images 
│   ├── img/
│   │   ├── captcha1.jpg
│   │   ├── captcha2.png
│   │   └── ...
│   ├── model/
│   │   ├── crnn_model-resnet18-gru_pretrain.pt
│   └── output/
│       ├── train_val_results_pretrain.csv
│       └── ...
├── new/
│   ├── img-1/  parsasam/captcha-dataset
│   │   ├── img/
│   │   │   ├── captcha1.jpg
│   │   │   ├── captcha2.png
│   │   │   └── ...
│   │   ├── model/
│   │   │   ├── crnn_model-resnet18-gru_img-1.pt
│   │   └── output/
│   │       ├── train_val_results_img-1.csv
│   │       └── ...
│   ├── img-2/  greysky/captcha-dataset
│   │   ├── img/
│   │   │   ├── captcha3.jpg
│   │   │   ├── captcha4.png
│   │   │   └── ...
│   │   ├── model/
│   │   │   ├── crnn_model-resnet18-gru_img-2.pt
│   │   └── output/
│   │       ├── train_val_results_img-2.csv
│   │       └── ...
│   ├── img-3/  fournierp/captcha-version-2-images
│   │   ├── img/
│   │   │   ├── captcha7.jpg
│   │   │   ├── captcha8.png
│   │   │   └── ...
│   │   ├── model/
│   │   │   ├── crnn_model-resnet18-gru_img-3.pt
│   │   └── output/
│   │       ├── train_val_results_img-3.csv
│   │       └── ...
│   ├── img-4/  utkarshdoshi/captcha-dataset
│   │   ├── img/
│   │   │   ├── captcha9.jpg
│   │   │   ├── captcha10.png
│   │   │   └── ...
│   │   ├── model/
│   │   │   ├── crnn_model-resnet18-gru_img-4.pt
│   │   └── output/
│   │       ├── train_val_results_img-4.csv
│   │       └── ...
│   ├── img-5/  brunoasnascimento/captcha
│   │   ├── img/
│   │   │   ├── captcha11.jpg
│   │   │   ├── captcha12.png
│   │   │   └── ...
│   │   ├── model/
│   │   │   ├── crnn_model-resnet18-gru_img-5.pt
│   │   └── output/
│   │       ├── train_val_results_img-5.csv
│   │       └── ...
│   ├── classifier/   from img-1 and generate-captcha
│   │   ├── model/
│   │   │   ├── classifier_model.pt
│   │   └── output/
│   │       
│   └── ... [其他 img-N 文件夹]
├── train-evaluate/
│   ├── img/
│   │   ├── captcha5.jpg
│   │   ├── captcha6.png
│   │   └── ...
│   ├── model/
│   │   ├── crnn_model-resnet18-gru_train-evaluate.pt
│   └── output/
│       ├── train_val_results_train-evaluate.csv
│       └── ...
├── predict/
│   ├── new_captcha1.jpg
│   ├── new_captcha2.png
│   └── predictions.csv
└── main.py


In [2]:
import os
from pathlib import Path
import kagglehub
import shutil  # 用於跨磁碟分區移動檔案

def create_folder_structure(num_new_imgs=5):
    """
    在當前目錄下建立預定的資料夾結構，包括 pretrain、new/img-1 至 new/img-5、
    以及 train-evaluate 資料夾。
    
    :param num_new_imgs: 在 'new/' 資料夾下要建立多少個 'img-數字' 子資料夾。
    """
    base_path = Path.cwd()
    
    # 定義 pretrain 資料夾結構
    pretrain_structure = [
        base_path / "pretrain" / "img",
        base_path / "pretrain" / "model",
        base_path / "pretrain" / "output",
    ]
    
    # 定義 new 資料夾結構
    new_base = base_path / "new"
    new_structures = []
    for i in range(1, num_new_imgs + 1):
        img_folder = new_base / f"img-{i}" / "img"
        model_folder = new_base / f"img-{i}" / "model"
        output_folder = new_base / f"img-{i}" / "output"
        new_structures.extend([img_folder, model_folder, output_folder])
    
    # 定義 train-evaluate 資料夾結構
    train_evaluate_structure = [
        base_path / "train-evaluate" / "img",
        base_path / "train-evaluate" / "model",
        base_path / "train-evaluate" / "output",
    ]
    
    # 合併所有資料夾結構
    all_folders = pretrain_structure + new_structures + train_evaluate_structure
    
    # 創建資料夾
    for folder in all_folders:
        try:
            folder.mkdir(parents=True, exist_ok=True)
            
        except Exception as e:
            print(f"創建資料夾失敗: {folder}\n錯誤訊息: {e}")

def move_across_disks(source, destination):
    """
    處理跨磁碟分區的檔案移動，使用 shutil.move。
    
    :param source: 原始檔案路徑。
    :param destination: 目標檔案路徑。
    """
    try:
        shutil.move(str(source), str(destination))
        
    except Exception as e:
        print(f"移動失敗: {source} 到 {destination}\n錯誤訊息: {e}")

def download_dataset_with_kagglehub(dataset, target_path):
    """
    使用 kagglehub 下載指定的資料集並移動到目標目錄。
    
    :param dataset: Kaggle 資料集名稱。
    :param target_path: 資料集目標存放路徑。
    """
    try:
        print(f"正在下載資料集: {dataset}")
        downloaded_path = kagglehub.dataset_download(dataset)  # 下載資料集
        print(f"成功下載資料集: {dataset} 到緩存路徑: {downloaded_path}")
        
        # 檢查並移動檔案到目標目錄
        target_path = Path(target_path)
        if not target_path.exists():
            target_path.mkdir(parents=True, exist_ok=True)
        for item in Path(downloaded_path).iterdir():
            destination = target_path / item.name
            move_across_disks(item, destination)  # 使用跨磁碟移動函數
    except Exception as e:
        print(f"下載或移動資料集失敗: {dataset}\n錯誤訊息: {e}")

def main():
    num_new_imgs = 5  
    
    # 建立資料夾結構
    create_folder_structure(num_new_imgs=num_new_imgs)
    
    # 定義要下載的資料集及其對應的存放路徑
    datasets = {
        "pretrain": {
            "dataset": "fanbyprinciple/captcha-images",
            "path": Path.cwd() / "pretrain" / "img"
        },
        "img-1": {
            "dataset": "parsasam/captcha-dataset",
            "path": Path.cwd() / "new" / "img-1" / "img"
        },
        "img-2": {
            "dataset": "greysky/captcha-dataset",
            "path": Path.cwd() / "new" / "img-2" / "img"
        },
        "img-3": {
            "dataset": "fournierp/captcha-version-2-images",
            "path": Path.cwd() / "new" / "img-3" / "img"
        },
        "img-4": {
            "dataset": "utkarshdoshi/captcha-dataset",
            "path": Path.cwd() / "new" / "img-4" / "img"
        },
        "img-5": {
            "dataset": "brunoasnascimento/captcha",
            "path": Path.cwd() / "new" / "img-5" / "img"
        },
    }
    
    # 下載並移動資料集到對應的資料夾
    for key, value in datasets.items():
        download_dataset_with_kagglehub(value["dataset"], value["path"])

if __name__ == "__main__":
    main()




正在下載資料集: fanbyprinciple/captcha-images
Downloading from https://www.kaggle.com/api/v1/datasets/download/fanbyprinciple/captcha-images?dataset_version_number=1...


100%|██████████| 12.4M/12.4M [00:05<00:00, 2.39MB/s]

Extracting files...





成功下載資料集: fanbyprinciple/captcha-images 到緩存路徑: C:\Users\ray03\.cache\kagglehub\datasets\fanbyprinciple\captcha-images\versions\1
正在下載資料集: parsasam/captcha-dataset
Downloading from https://www.kaggle.com/api/v1/datasets/download/parsasam/captcha-dataset?dataset_version_number=1...


100%|██████████| 356M/356M [00:37<00:00, 9.87MB/s] 

Extracting files...





成功下載資料集: parsasam/captcha-dataset 到緩存路徑: C:\Users\ray03\.cache\kagglehub\datasets\parsasam\captcha-dataset\versions\1


KeyboardInterrupt: 

In [1]:
# #下載dataset 
# import kagglehub

# # pretrain
# path = kagglehub.dataset_download("fanbyprinciple/captcha-images")

# #image 
# path = kagglehub.dataset_download("parsasam/captcha-dataset") #img-1
# path = kagglehub.dataset_download("greysky/captcha-dataset") #img-2
# path = kagglehub.dataset_download("fournierp/captcha-version-2-images") #img-3
# path = kagglehub.dataset_download("utkarshdoshi/captcha-dataset") #img-4
# path = kagglehub.dataset_download("brunoasnascimento/captcha")   #img-5

# #train-evaluate
# #from img-1 and generate-captcha