In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import pandas as pd
from pathlib import Path
import shutil

# 設定 Google Drive 上的 VisA 路徑
visa_root_dir = "/content/drive/MyDrive/Colab Notebooks/CLIP_Anomaly_ZSAD/data/VisA_20220922"
split_csv_path = os.path.join(visa_root_dir, "split_csv", "2cls_fewshot.csv")
output_root = "/content/drive/MyDrive/Colab Notebooks/CLIP_Anomaly_ZSAD/data/VisA_pytorch"

# 讀取 few-shot split csv
split_df = pd.read_csv(split_csv_path)
print("📄 Split CSV 讀取完成，共有樣本數量：", len(split_df))


📄 Split CSV 讀取完成，共有樣本數量： 10821


In [None]:
# import os, shutil
# import pandas as pd
# from tqdm import tqdm
# from pathlib import Path

# def split_visa_by_csv(csv_path, root_path, save_root, target_class="candle"):
#     df = pd.read_csv(csv_path)
#     df = df[df['object'] == target_class]

#     train_dir = os.path.join(save_root, target_class, "train")
#     test_dir  = os.path.join(save_root, target_class, "test")
#     gt_dir    = os.path.join(save_root, target_class, "ground_truth")

#     os.makedirs(train_dir, exist_ok=True)
#     os.makedirs(test_dir, exist_ok=True)
#     os.makedirs(gt_dir, exist_ok=True)

#     cnt = {"train": 0, "test": 0, "mask": 0}

#     for _, row in tqdm(df.iterrows(), total=len(df), desc=f"Copying {target_class}"):
#         split, label = row["split"], row["label"]
#         img_rel = row["image"]
#         mask_rel= row["mask"] if not pd.isna(row["mask"]) else None

#         src_img = os.path.join(root_path, img_rel)
#         dst_img = os.path.join(train_dir if split=="train" else test_dir, Path(src_img).name)
#         shutil.copy(src_img, dst_img)
#         cnt[split] += 1

#         # 如有異常遮罩，另存到 ground_truth
#         if label == "anomaly" and mask_rel:
#             src_mask = os.path.join(root_path, mask_rel)
#             dst_mask = os.path.join(gt_dir, Path(src_mask).name)
#             if os.path.exists(src_mask):
#                 shutil.copy(src_mask, dst_mask)
#                 cnt["mask"] += 1

#     print(f"✅ 分類完成：train={cnt['train']}, test={cnt['test']}, masks={cnt['mask']}")


In [None]:
# split_visa_by_csv(split_csv_path, visa_root_dir, output_root, target_class="candle")

Copying candle: 100%|██████████| 1100/1100 [10:29<00:00,  1.75it/s]

✅ 分類完成：train=220, test=880, masks=100





In [None]:
import os, shutil
import pandas as pd
from tqdm import tqdm
from pathlib import Path

def split_visa_by_csv_strict(csv_path, root_path, save_root, target_class="candle"):
    df = pd.read_csv(csv_path)
    df = df[df['object'] == target_class]

    train_dir = os.path.join(save_root, target_class, "train")
    test_dir  = os.path.join(save_root, target_class, "test")
    gt_dir    = os.path.join(save_root, target_class, "ground_truth")

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    os.makedirs(gt_dir, exist_ok=True)

    cnt = {"train": 0, "test": 0, "mask": 0}

    for _, row in tqdm(df.iterrows(), total=len(df), desc=f"Copying {target_class}"):
        split = row["split"]
        label = row["label"]
        img_rel = row["image"]
        mask_rel = row["mask"] if not pd.isna(row["mask"]) else None

        src_img = os.path.join(root_path, img_rel)
        dst_dir = train_dir if split == "train" else test_dir
        dst_img = os.path.join(dst_dir, Path(src_img).name)

        shutil.copy(src_img, dst_img)
        cnt[split] += 1

        # ✅ 僅保留 test/anomaly 的 mask
        if split == "test" and label == "anomaly" and mask_rel:
            src_mask = os.path.join(root_path, mask_rel)
            dst_mask = os.path.join(gt_dir, Path(src_mask).name)
            if os.path.exists(src_mask):
                shutil.copy(src_mask, dst_mask)
                cnt["mask"] += 1
            else:
                print(f"⚠️ 遺失遮罩：{src_mask}")

    print(f"\n✅ 切分完成：train={cnt['train']}, test={cnt['test']}, masks(test/anomaly only)={cnt['mask']}")


In [None]:
split_visa_by_csv_strict(split_csv_path, visa_root_dir, output_root, target_class="candle")

Copying candle: 100%|██████████| 1100/1100 [00:29<00:00, 36.80it/s]


✅ 切分完成：train=220, test=880, masks(test/anomaly only)=80





In [None]:
import os
import pandas as pd
from pathlib import Path

def analyze_existing_split(csv_path, data_root, target_class="candle"):
    # 載入 split csv
    df = pd.read_csv(csv_path)
    df = df[df["object"] == target_class]

    # 建立圖片名稱 → label 對照表
    label_map = {Path(row["image"]).name: row["label"] for _, row in df.iterrows()}

    folders = ["train", "test", "ground_truth"]
    result = {}

    for folder in folders:
        folder_path = os.path.join(data_root, target_class, folder)
        if not os.path.exists(folder_path):
            print(f"❌ 路徑不存在：{folder_path}")
            continue
        files = os.listdir(folder_path)
        total = len(files)

        if folder == "ground_truth":
            result[folder] = {"total_masks": total}
        else:
            normal_count = 0
            anomaly_count = 0
            for f in files:
                label = label_map.get(f, "unknown")
                if label == "normal":
                    normal_count += 1
                elif label == "anomaly":
                    anomaly_count += 1
                else:
                    print(f"⚠️ 無法辨識：{f}")
            result[folder] = {
                "total": total,
                "normal": normal_count,
                "anomaly": anomaly_count
            }

    print(f"\n📊 資料夾分析結果 ({target_class}):")
    for k, v in result.items():
        print(f"📁 {k}:")
        for kk, vv in v.items():
            print(f"  - {kk}: {vv}")
    return result


In [None]:
csv_path = "/content/drive/MyDrive/Colab Notebooks/CLIP_Anomaly_ZSAD/data/VisA_20220922/split_csv/2cls_fewshot.csv"
data_root = "/content/drive/MyDrive/Colab Notebooks/CLIP_Anomaly_ZSAD/data/VisA_pytorch"

analyze_existing_split(csv_path, data_root, target_class="candle")



📊 資料夾分析結果 (candle):
📁 train:
  - total: 220
  - normal: 200
  - anomaly: 20
📁 test:
  - total: 880
  - normal: 800
  - anomaly: 80
📁 ground_truth:
  - total_masks: 80


{'train': {'total': 220, 'normal': 200, 'anomaly': 20},
 'test': {'total': 880, 'normal': 800, 'anomaly': 80},
 'ground_truth': {'total_masks': 80}}