In [6]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
run_exp3_noise_9x.py  ―  실험군 3
  • Train : 70개 원본 × 9배 증강
             (Flip + ColorJitter + GaussianNoise σ)  = 630 이미지
  • Test  : split42_70-30.json 의 나머지 30개 원본
  • σ : 0.05
출력 : result_exp3_noise<σ>.json / pred_exp3_noise<σ>.csv
"""

# ────────────────────────────────────────────────────────────────
# 0. 기본 import
import os, json, random, math, warnings, argparse
import numpy as np, pandas as pd
from PIL import Image, ImageFile
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms
import timm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score

ImageFile.LOAD_TRUNCATED_IMAGES = True
warnings.filterwarnings("ignore", category=UserWarning)

# ────────────────────────────────────────────────────────────────
# 1. CLI 인자  
parser = argparse.ArgumentParser()
parser.add_argument("--sigma", type=float, default=0.05,
                    help="Gaussian noise std (e.g. 0.05)")
args, _ = parser.parse_known_args()          
SIGMA = args.sigma
tag   = f"noise{SIGMA:.2f}".replace('.', '')  # e.g. noise005

# ────────────────────────────────────────────────────────────────
# 2. 실험 고정 파라미터
SEED   = 42
CSV    = r"C:\Users\ast\Documents\project\train.csv"
IMG    = r"C:\Users\ast\Documents\project\train_images"
BATCH  = 16
EPOCHS = 10
LR     = 1e-4
WD     = 3e-4
SMOOTH = 0.05

SPLIT     = "split42_70-30.json"                # 네 케이스 동일
OUT_MET   = f"result_exp3_{tag}.json"
OUT_PRED  = f"pred_exp3_{tag}.csv"

# ────────────────────────────────────────────────────────────────
# 3. 시드 & 멀티프로세싱 환경 고정
def seed_all(seed: int):
    random.seed(seed); np.random.seed(seed); os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark     = False
seed_all(SEED)

import torch.multiprocessing as mp
if mp.get_start_method(allow_none=True) != "spawn":
    mp.set_start_method("spawn", force=True)

# ────────────────────────────────────────────────────────────────
# 4. 데이터셋 정의
class ScrapDataset(Dataset):
    def __init__(self, dataframe, transform, label_enc):
        self.df   = dataframe.reset_index(drop=True).copy()
        self.dir  = IMG
        self.tf   = transform
        self.df["cls"] = label_enc.transform(self.df["weight_class"])
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(os.path.join(self.dir, row.filename)).convert("RGB")
        return self.tf(img), torch.tensor(row.cls), row.filename

# ── 노이즈 변환
class AddGaussianNoise:
    def __init__(self, std: float): self.std = std
    def __call__(self, tensor):     return tensor + torch.randn_like(tensor) * self.std
    def __repr__(self):             return f"AddGaussianNoise(std={self.std})"

# ────────────────────────────────────────────────────────────────
# 5. Transform 설정
train_tf = transforms.Compose([
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    AddGaussianNoise(SIGMA),                    # ★ 노이즈 추가
    transforms.Normalize([0.5]*3, [0.5]*3)
])
test_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# ────────────────────────────────────────────────────────────────
# 6. split 70 / 30 고정 로드
df_all = pd.read_csv(CSV)
if os.path.exists(SPLIT):
    idx = json.load(open(SPLIT, "r")); train_idx, test_idx = idx["train"], idx["test"]
else:                                   # 최초 실행 시만 생성
    train_idx, test_idx = train_test_split(
        range(len(df_all)), train_size=70, test_size=30,
        stratify=df_all["weight_class"], random_state=SEED)
    json.dump({"train":train_idx, "test":test_idx}, open(SPLIT, "w"))

train_df, test_df = df_all.iloc[train_idx], df_all.iloc[test_idx]
le = LabelEncoder().fit(train_df["weight_class"])

# ────────────────────────────────────────────────────────────────
# 7. DataLoader (9× 증강)
base_train_ds = ScrapDataset(train_df, train_tf, le)
train_ds      = ConcatDataset([base_train_ds]*9)         # 70 × 9 = 630
test_ds       = ScrapDataset(test_df,  test_tf,  le)

train_loader = DataLoader(train_ds, batch_size=BATCH, shuffle=True , num_workers=0)
test_loader  = DataLoader(test_ds , batch_size=BATCH, shuffle=False, num_workers=0)

# ────────────────────────────────────────────────────────────────
# 8. 모델, Optimizer, Scheduler
class CoaTMedium(nn.Module):
    def __init__(self, n_cls):
        super().__init__()
        self.net = timm.create_model('coat_lite_medium',
                                     pretrained=True, num_classes=n_cls)
    def forward(self, x): return self.net(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model  = CoaTMedium(len(le.classes_)).to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)

total_steps  = len(train_loader) * EPOCHS
warmup_steps = len(train_loader)              # 1 epoch warm-up

def lr_lambda(step):
    if step < warmup_steps:
        return (step + 1) / warmup_steps
    progress = (step - warmup_steps) / (total_steps - warmup_steps)
    return 0.5 * (1 + math.cos(math.pi * progress))

scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
criterion  = nn.CrossEntropyLoss(label_smoothing=SMOOTH)

# ────────────────────────────────────────────────────────────────
# 9. 학습 루프
for epoch in range(1, EPOCHS + 1):
    model.train(); running_loss = 0.0
    for xb, yb, _ in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        loss = criterion(model(xb), yb)
        loss.backward(); optimizer.step(); scheduler.step()
        running_loss += loss.item() * xb.size(0)
    print(f"[{epoch}/{EPOCHS}] Loss={running_loss/len(train_loader.dataset):.4f}")

# ────────────────────────────────────────────────────────────────
# 10. 평가
model.eval(); y_true, y_pred, rows = [], [], []
with torch.no_grad():
    for xb, yb, fn in test_loader:
        preds = model(xb.to(device)).argmax(1).cpu()
        y_true += yb.tolist(); y_pred += preds.tolist()
        rows   += list(zip(fn, le.inverse_transform(preds.numpy())))
acc = accuracy_score(y_true, y_pred)
macro_f1 = f1_score(y_true, y_pred, average="macro")
print(f"✅ Exp3-{tag}  Acc={acc:.4f}  Macro-F1={macro_f1:.4f}")

# ────────────────────────────────────────────────────────────────
# 11. 결과 저장
json.dump({"experiment":f"exp3_{tag}",
           "sigma": SIGMA,
           "accuracy":acc,
           "macro_f1":macro_f1},
          open(OUT_MET, "w"), indent=2)

pd.DataFrame(rows, columns=["filename","predicted_label"])\
  .to_csv(OUT_PRED, index=False)

print(f"📄 Metrics → {OUT_MET}\n📄 Preds   → {OUT_PRED}")


[1/10] Loss=0.6366
[2/10] Loss=0.1840
[3/10] Loss=0.1714
[4/10] Loss=0.1701
[5/10] Loss=0.1701
[6/10] Loss=0.1698
[7/10] Loss=0.1698
[8/10] Loss=0.1697
[9/10] Loss=0.1697
[10/10] Loss=0.1697
✅ Exp3-noise005  Acc=0.5000  Macro-F1=0.5042
📄 Metrics → result_exp3_noise005.json
📄 Preds   → pred_exp3_noise005.csv


In [10]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
run_exp3_noise_9x.py ― 실험군 3
  · Train : 70 원본 × 9 배 (Flip + ColorJitter + GaussianNoise σ=0.03) = 630
  · Test  : split42_70-30.json 의 30 원본
  · Aug PNG 저장: 앞 2 배치 / 클래스당 2 장 / 총 120 장 한도
출력 : result_exp3_noise003.json , pred_exp3_noise003.csv
"""

# ────────────────────────── 0. import
import os, json, random, math, argparse, warnings, numpy as np, pandas as pd
from PIL import Image, ImageFile
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms
from torchvision.utils import save_image
import timm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score

ImageFile.LOAD_TRUNCATED_IMAGES = True
warnings.filterwarnings("ignore", category=UserWarning)

# ────────────────────────── 1. 인자 & 고정값
parser = argparse.ArgumentParser()
parser.add_argument("--sigma", type=float, default=0.03,
                    help="Gaussian noise std (default 0.03)")
args, _ = parser.parse_known_args()          # Jupyter -f 옵션 무시
SIGMA = args.sigma                           # 0.03
TAG   = f"noise{SIGMA:.2f}".replace('.', '') # noise003

SEED = 42
CSV  = r"C:\Users\ast\Documents\project\train.csv"
IMG  = r"C:\Users\ast\Documents\project\train_images"
SPLIT = "split42_70-30.json"

BATCH  = 16
EPOCHS = 10
LR     = 1e-4
WD     = 3e-4
SMOOTH = 0.05

OUT_MET  = f"result_exp3_{TAG}.json"
OUT_PRED = f"pred_exp3_{TAG}.csv"

# --- 증강본 저장 옵션 ---
SAVE_AUG            = True
SAVE_DIR            = f"./aug_vis_{TAG}"
SAVE_FIRST_BATCHES  = 2     # (a) 처음 k 배치
SAVE_PER_CLASS      = 2     # (b) 클래스별 최대 c 장
SAVE_TOTAL_LIMIT    = 120   # (c) 전체 n 장
os.makedirs(SAVE_DIR, exist_ok=True)

# ────────────────────────── 2. 시드 고정
def seed_all(s: int):
    random.seed(s); np.random.seed(s); os.environ["PYTHONHASHSEED"] = str(s)
    torch.manual_seed(s); torch.cuda.manual_seed_all(s)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_all(SEED)

# ────────────────────────── 3. Dataset 및 변환
class AddGaussianNoise:
    def __init__(self, std: float):
        self.std = std
    def __call__(self, t: torch.Tensor):
        return t + torch.randn_like(t) * self.std
    def __repr__(self):
        return f"AddGaussianNoise(std={self.std})"

train_tf = transforms.Compose([
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    AddGaussianNoise(SIGMA),
    transforms.Normalize([0.5] * 3, [0.5] * 3),
])
test_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5] * 3, [0.5] * 3),
])

class ScrapDataset(Dataset):
    def __init__(self, df: pd.DataFrame, tf, le, save_aug=False):
        self.df = df.reset_index(drop=True).copy()
        self.dir = IMG
        self.tf = tf
        self.cls = le.transform(self.df["weight_class"])
        self.save_aug = save_aug
        self.saved_total = 0
        self.class_cnt = {i: 0 for i in range(len(le.classes_))}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_pil = Image.open(os.path.join(self.dir, row.filename)).convert("RGB")
        tensor = self.tf(img_pil)

        # ---- 증강본 저장 ----
        if (self.save_aug
            and self.saved_total < SAVE_TOTAL_LIMIT
            and self.saved_total < SAVE_FIRST_BATCHES * BATCH
            and self.class_cnt[self.cls[idx]] < SAVE_PER_CLASS):
            fname = f"{self.saved_total:04d}_{row.filename}"
            save_path = os.path.join(SAVE_DIR, fname)
            save_image((tensor * 0.5 + 0.5).clamp(0, 1), save_path)
            self.saved_total += 1
            self.class_cnt[self.cls[idx]] += 1
        # ---------------------

        return tensor, torch.tensor(self.cls[idx]), row.filename

# ────────────────────────── 4. 데이터 분할 로드
df = pd.read_csv(CSV)
if os.path.exists(SPLIT):
    idx = json.load(open(SPLIT))
    train_idx, test_idx = idx["train"], idx["test"]
else:
    train_idx, test_idx = train_test_split(
        range(len(df)), train_size=70, test_size=30,
        stratify=df["weight_class"], random_state=SEED)
    json.dump({"train": train_idx, "test": test_idx}, open(SPLIT, "w"))

train_df, test_df = df.iloc[train_idx], df.iloc[test_idx]
le = LabelEncoder().fit(train_df["weight_class"])

# ────────────────────────── 5. DataLoader (9×)
base_ds = ScrapDataset(train_df, train_tf, le, save_aug=SAVE_AUG)
train_ds = ConcatDataset([base_ds] * 9)  # 630
test_ds = ScrapDataset(test_df, test_tf, le, save_aug=False)

train_ld = DataLoader(train_ds, batch_size=BATCH, shuffle=True, num_workers=0)
test_ld = DataLoader(test_ds, batch_size=BATCH, shuffle=False, num_workers=0)

# ────────────────────────── 6. 모델 · Optim · Scheduler
class CoaTMedium(nn.Module):
    def __init__(self, n_cls: int):
        super().__init__()
        self.net = timm.create_model(
            'coat_lite_medium', pretrained=True, num_classes=n_cls
        )

    def forward(self, x):
        return self.net(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CoaTMedium(len(le.classes_)).to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)

total_steps = len(train_ld) * EPOCHS
warmup_steps = len(train_ld)

def lr_lambda(step):
    if step < warmup_steps:
        return (step + 1) / warmup_steps
    prog = (step - warmup_steps) / (total_steps - warmup_steps)
    return 0.5 * (1 + math.cos(math.pi * prog))

scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
criterion = nn.CrossEntropyLoss(label_smoothing=SMOOTH)

# ────────────────────────── 7. 학습
for ep in range(1, EPOCHS + 1):
    model.train()
    epoch_loss = 0.0
    for xb, yb, _ in train_ld:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        loss = criterion(model(xb), yb)
        loss.backward()
        optimizer.step()
        scheduler.step()
        epoch_loss += loss.item() * xb.size(0)
    print(f"[{ep}/{EPOCHS}] Loss={epoch_loss/len(train_ld.dataset):.4f}")

# ────────────────────────── 8. 평가
model.eval()
yt, yp, rows = [], [], []
with torch.no_grad():
    for xb, yb, fn in test_ld:
        preds = model(xb.to(device)).argmax(1).cpu()
        yt += yb.tolist(); yp += preds.tolist()
        rows += list(zip(fn, le.inverse_transform(preds.numpy())))
acc = accuracy_score(yt, yp)
f1  = f1_score(yt, yp, average="macro")
print(f"✅ Exp3-{TAG}  Acc={acc:.4f}  Macro-F1={f1:.4f}")

# ────────────────────────── 9. 결과 저장
json.dump({"experiment": f"exp3_{TAG}", "sigma": SIGMA,
           "accuracy": acc, "macro_f1": f1},
          open(OUT_MET, "w"), indent=2)
pd.DataFrame(rows, columns=["filename", "predicted_label"]).to_csv(OUT_PRED, index=False)
print(f"📄 Metrics → {OUT_MET}\n📄 Preds   → {OUT_PRED}")

if SAVE_AUG:
    print(f"🖼️  Saved {base_ds.saved_total} augmented images → {SAVE_DIR}")


[1/10] Loss=0.6295
[2/10] Loss=0.1813
[3/10] Loss=0.1718
[4/10] Loss=0.1702
[5/10] Loss=0.1700
[6/10] Loss=0.1698
[7/10] Loss=0.1697
[8/10] Loss=0.1697
[9/10] Loss=0.1696
[10/10] Loss=0.1696
✅ Exp3-noise003  Acc=0.4667  Macro-F1=0.4776
📄 Metrics → result_exp3_noise003.json
📄 Preds   → pred_exp3_noise003.csv
🖼️  Saved 6 augmented images → ./aug_vis_noise003


In [12]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
run_exp3_noise_9x.py ― 실험군 3
  · Train : 70 원본 × 9 배 (Flip + ColorJitter + GaussianNoise σ=0.1) = 630
  · Test  : split42_70-30.json 의 30 원본
  · Aug PNG 저장: 앞 2 배치 / 클래스당 2 장 / 총 120 장 한도
출력 : result_exp3_noise003.json , pred_exp3_noise003.csv
"""

# ────────────────────────── 0. import
import os, json, random, math, argparse, warnings, numpy as np, pandas as pd
from PIL import Image, ImageFile
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms
from torchvision.utils import save_image
import timm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score

ImageFile.LOAD_TRUNCATED_IMAGES = True
warnings.filterwarnings("ignore", category=UserWarning)

# ────────────────────────── 1. 인자 & 고정값
parser = argparse.ArgumentParser()
parser.add_argument("--sigma", type=float, default=0.1,
                    help="Gaussian noise std (default 0.1)")
args, _ = parser.parse_known_args()          # Jupyter -f 옵션 무시
SIGMA = args.sigma                           
TAG   = f"noise{SIGMA:.2f}".replace('.', '') 

SEED = 42
CSV  = r"C:\Users\ast\Documents\project\train.csv"
IMG  = r"C:\Users\ast\Documents\project\train_images"
SPLIT = "split42_70-30.json"

BATCH  = 16
EPOCHS = 10
LR     = 1e-4
WD     = 3e-4
SMOOTH = 0.05

OUT_MET  = f"result_exp3_{TAG}.json"
OUT_PRED = f"pred_exp3_{TAG}.csv"

# --- 증강본 저장 옵션 ---
SAVE_AUG            = True
SAVE_DIR            = f"./aug_vis_{TAG}"
SAVE_FIRST_BATCHES  = 2     # (a) 처음 k 배치
SAVE_PER_CLASS      = 2     # (b) 클래스별 최대 c 장
SAVE_TOTAL_LIMIT    = 120   # (c) 전체 n 장
os.makedirs(SAVE_DIR, exist_ok=True)

# ────────────────────────── 2. 시드 고정
def seed_all(s: int):
    random.seed(s); np.random.seed(s); os.environ["PYTHONHASHSEED"] = str(s)
    torch.manual_seed(s); torch.cuda.manual_seed_all(s)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_all(SEED)

# ────────────────────────── 3. Dataset 및 변환
class AddGaussianNoise:
    def __init__(self, std: float):
        self.std = std
    def __call__(self, t: torch.Tensor):
        return t + torch.randn_like(t) * self.std
    def __repr__(self):
        return f"AddGaussianNoise(std={self.std})"

train_tf = transforms.Compose([
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    AddGaussianNoise(SIGMA),
    transforms.Normalize([0.5] * 3, [0.5] * 3),
])
test_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5] * 3, [0.5] * 3),
])

class ScrapDataset(Dataset):
    def __init__(self, df: pd.DataFrame, tf, le, save_aug=False):
        self.df = df.reset_index(drop=True).copy()
        self.dir = IMG
        self.tf = tf
        self.cls = le.transform(self.df["weight_class"])
        self.save_aug = save_aug
        self.saved_total = 0
        self.class_cnt = {i: 0 for i in range(len(le.classes_))}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_pil = Image.open(os.path.join(self.dir, row.filename)).convert("RGB")
        tensor = self.tf(img_pil)

        # ---- 증강본 저장 ----
        if (self.save_aug
            and self.saved_total < SAVE_TOTAL_LIMIT
            and self.saved_total < SAVE_FIRST_BATCHES * BATCH
            and self.class_cnt[self.cls[idx]] < SAVE_PER_CLASS):
            fname = f"{self.saved_total:04d}_{row.filename}"
            save_path = os.path.join(SAVE_DIR, fname)
            save_image((tensor * 0.5 + 0.5).clamp(0, 1), save_path)
            self.saved_total += 1
            self.class_cnt[self.cls[idx]] += 1
        # ---------------------

        return tensor, torch.tensor(self.cls[idx]), row.filename

# ────────────────────────── 4. 데이터 분할 로드
df = pd.read_csv(CSV)
if os.path.exists(SPLIT):
    idx = json.load(open(SPLIT))
    train_idx, test_idx = idx["train"], idx["test"]
else:
    train_idx, test_idx = train_test_split(
        range(len(df)), train_size=70, test_size=30,
        stratify=df["weight_class"], random_state=SEED)
    json.dump({"train": train_idx, "test": test_idx}, open(SPLIT, "w"))

train_df, test_df = df.iloc[train_idx], df.iloc[test_idx]
le = LabelEncoder().fit(train_df["weight_class"])

# ────────────────────────── 5. DataLoader (9×)
base_ds = ScrapDataset(train_df, train_tf, le, save_aug=SAVE_AUG)
train_ds = ConcatDataset([base_ds] * 9)  # 630
test_ds = ScrapDataset(test_df, test_tf, le, save_aug=False)

train_ld = DataLoader(train_ds, batch_size=BATCH, shuffle=True, num_workers=0)
test_ld = DataLoader(test_ds, batch_size=BATCH, shuffle=False, num_workers=0)

# ────────────────────────── 6. 모델 · Optim · Scheduler
class CoaTMedium(nn.Module):
    def __init__(self, n_cls: int):
        super().__init__()
        self.net = timm.create_model(
            'coat_lite_medium', pretrained=True, num_classes=n_cls
        )

    def forward(self, x):
        return self.net(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CoaTMedium(len(le.classes_)).to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)

total_steps = len(train_ld) * EPOCHS
warmup_steps = len(train_ld)

def lr_lambda(step):
    if step < warmup_steps:
        return (step + 1) / warmup_steps
    prog = (step - warmup_steps) / (total_steps - warmup_steps)
    return 0.5 * (1 + math.cos(math.pi * prog))

scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
criterion = nn.CrossEntropyLoss(label_smoothing=SMOOTH)

# ────────────────────────── 7. 학습
for ep in range(1, EPOCHS + 1):
    model.train()
    epoch_loss = 0.0
    for xb, yb, _ in train_ld:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        loss = criterion(model(xb), yb)
        loss.backward()
        optimizer.step()
        scheduler.step()
        epoch_loss += loss.item() * xb.size(0)
    print(f"[{ep}/{EPOCHS}] Loss={epoch_loss/len(train_ld.dataset):.4f}")

# ────────────────────────── 8. 평가
model.eval()
yt, yp, rows = [], [], []
with torch.no_grad():
    for xb, yb, fn in test_ld:
        preds = model(xb.to(device)).argmax(1).cpu()
        yt += yb.tolist(); yp += preds.tolist()
        rows += list(zip(fn, le.inverse_transform(preds.numpy())))
acc = accuracy_score(yt, yp)
f1  = f1_score(yt, yp, average="macro")
print(f"✅ Exp3-{TAG}  Acc={acc:.4f}  Macro-F1={f1:.4f}")

# ────────────────────────── 9. 결과 저장
json.dump({"experiment": f"exp3_{TAG}", "sigma": SIGMA,
           "accuracy": acc, "macro_f1": f1},
          open(OUT_MET, "w"), indent=2)
pd.DataFrame(rows, columns=["filename", "predicted_label"]).to_csv(OUT_PRED, index=False)
print(f"📄 Metrics → {OUT_MET}\n📄 Preds   → {OUT_PRED}")

if SAVE_AUG:
    print(f"🖼️  Saved {base_ds.saved_total} augmented images → {SAVE_DIR}")


[1/10] Loss=0.6491
[2/10] Loss=0.1828
[3/10] Loss=0.1720
[4/10] Loss=0.1707
[5/10] Loss=0.1703
[6/10] Loss=0.1701
[7/10] Loss=0.1700
[8/10] Loss=0.1700
[9/10] Loss=0.1699
[10/10] Loss=0.1699
✅ Exp3-noise010  Acc=0.5000  Macro-F1=0.5068
📄 Metrics → result_exp3_noise010.json
📄 Preds   → pred_exp3_noise010.csv
🖼️  Saved 6 augmented images → ./aug_vis_noise010


In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
run_exp3_poisson_9x.py ― 실험군-3 (Poisson Shot Noise, 9×)
  · Train : 70 원본 × 9배 (Flip + CJ + PoissonNoise) = 630
  · Test  : split42_70-30.json 의 30 원본
  · λ-scale : 127  (픽셀 0-1 범위를 0-λ 로 매핑)
  · 증강 PNG 저장 : 앞 2 배치 × 클래스당 2장 × 총 120장 한도
출력 : result_exp3_poissonλ127.json / pred_exp3_poissonλ127.csv
"""

# ────────────────────────── 0. import
import os, json, random, math, argparse, warnings, numpy as np, pandas as pd
from PIL import Image, ImageFile
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms
from torchvision.utils import save_image
import timm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score
ImageFile.LOAD_TRUNCATED_IMAGES = True
warnings.filterwarnings("ignore", category=UserWarning)

# ────────────────────────── 1. CLI
parser = argparse.ArgumentParser()
parser.add_argument("--lambda_scale", type=float, default=127,
                    help="Poisson λ scaling factor (default 127)")
args, _ = parser.parse_known_args()
LAM = args.lambda_scale               # shot-noise λ
TAG = f"poisson{int(LAM)}"            # e.g. poisson127

# ────────────────────────── 2. 고정 파라미터
SEED=42
CSV = r"C:\Users\ast\Documents\project\train.csv"
IMG = r"C:\Users\ast\Documents\project\train_images"
SPLIT="split42_70-30.json"

BATCH=16; EPOCHS=10; LR=1e-4; WD=3e-4; SMOOTH=0.05
OUT_MET = f"result_exp3_{TAG}.json"
OUT_PRED= f"pred_exp3_{TAG}.csv"

# 증강본 저장 옵션
SAVE_AUG=True
SAVE_DIR=f"./aug_vis_{TAG}"
SAVE_FIRST_BATCHES=2; SAVE_PER_CLASS=2; SAVE_TOTAL_LIMIT=120
os.makedirs(SAVE_DIR, exist_ok=True)

# ────────────────────────── 3. 시드
def seed_all(s:int):
    random.seed(s); np.random.seed(s); os.environ["PYTHONHASHSEED"]=str(s)
    torch.manual_seed(s); torch.cuda.manual_seed_all(s)
    torch.backends.cudnn.deterministic=True; torch.backends.cudnn.benchmark=False
seed_all(SEED)

# ────────────────────────── 4. 변환
class AddPoissonNoise:
    """tensor ∈ [-1,1] → Poisson 노이즈 후 다시 [-1,1]"""
    def __init__(self, lam_scale: float = 127):
        self.lam = lam_scale
    def __call__(self, t: torch.Tensor):
        t01 = (t * 0.5 + 0.5).clamp(0,1)            # [0,1]
        noisy = torch.poisson(t01 * self.lam) / self.lam
        return (noisy * 2 - 1).clamp(-1,1)          # back to [-1,1]
    def __repr__(self): return f"AddPoissonNoise(lam={self.lam})"

train_tf = transforms.Compose([
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    AddPoissonNoise(LAM),
    transforms.Normalize([0.5]*3, [0.5]*3),
])
test_tf = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

# ────────────────────────── 5. Dataset
class ScrapDataset(Dataset):
    def __init__(self, df, tf, le, save_aug=False):
        self.df=df.reset_index(drop=True).copy()
        self.dir=IMG; self.tf=tf; self.le=le
        self.cls = le.transform(self.df["weight_class"])
        self.save_aug=save_aug; self.saved_tot=0
        self.class_cnt={i:0 for i in range(len(le.classes_))}
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row=self.df.iloc[idx]
        pil=Image.open(os.path.join(self.dir,row.filename)).convert("RGB")
        tensor=self.tf(pil)
        if (self.save_aug and
            self.saved_tot < SAVE_TOTAL_LIMIT and
            self.saved_tot < SAVE_FIRST_BATCHES*BATCH and
            self.class_cnt[self.cls[idx]] < SAVE_PER_CLASS):
            fname=f"{self.saved_tot:04d}_{row.filename}"
            save_image((tensor*0.5+0.5).clamp(0,1), os.path.join(SAVE_DIR,fname))
            self.class_cnt[self.cls[idx]]+=1; self.saved_tot+=1
        return tensor, torch.tensor(self.cls[idx]), row.filename

# ────────────────────────── 6. split
df=pd.read_csv(CSV)
if os.path.exists(SPLIT):
    idx=json.load(open(SPLIT)); tr_idx, te_idx=idx["train"], idx["test"]
else:
    tr_idx, te_idx=train_test_split(range(len(df)),train_size=70,test_size=30,
        stratify=df["weight_class"], random_state=SEED)
    json.dump({"train":tr_idx,"test":te_idx}, open(SPLIT,"w"))
train_df, test_df = df.iloc[tr_idx], df.iloc[te_idx]
le = LabelEncoder().fit(train_df["weight_class"])

# ────────────────────────── 7. DataLoader (9×)
base_ds=ScrapDataset(train_df, train_tf, le, save_aug=SAVE_AUG)
train_ds=ConcatDataset([base_ds]*9)
test_ds =ScrapDataset(test_df, test_tf, le)
train_ld=DataLoader(train_ds,batch_size=BATCH,shuffle=True ,num_workers=0)
test_ld =DataLoader(test_ds ,batch_size=BATCH,shuffle=False,num_workers=0)

# ────────────────────────── 8. 모델·Optim·Scheduler
class CoaTMedium(nn.Module):
    def __init__(self, n_cls: int):
        super().__init__()
        # timm backbone
        self.net = timm.create_model(
            'coat_lite_medium',
            pretrained=True,
            num_classes=n_cls
        )

    def forward(self, x):
        return self.net(x)

device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
model=CoaTMedium(len(le.classes_)).to(device)
optimizer=torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)

total_steps=len(train_ld)*EPOCHS; warmup=len(train_ld)
scheduler=torch.optim.lr_scheduler.LambdaLR(
    optimizer,
    lr_lambda=lambda s:(s+1)/warmup if s<warmup
             else 0.5*(1+math.cos(math.pi*(s-warmup)/(total_steps-warmup))))
criterion=nn.CrossEntropyLoss(label_smoothing=SMOOTH)

# ────────────────────────── 9. 학습
for ep in range(1, EPOCHS+1):
    model.train(); loss_sum=0
    for xb,yb,_ in train_ld:
        xb,yb = xb.to(device), yb.to(device)
        optimizer.zero_grad(); loss=criterion(model(xb), yb)
        loss.backward(); optimizer.step(); scheduler.step()
        loss_sum += loss.item()*xb.size(0)
    print(f"[{ep}/{EPOCHS}] Loss={loss_sum/len(train_ld.dataset):.4f}")

# ────────────────────────── 10. 평가
model.eval(); yt,yp,rows=[],[],[]
with torch.no_grad():
    for xb,yb,f in test_ld:
        p=model(xb.to(device)).argmax(1).cpu()
        yt+= yb.tolist(); yp+= p.tolist()
        rows+=list(zip(f, le.inverse_transform(p.numpy())))
acc=accuracy_score(yt,yp); f1=f1_score(yt,yp,average="macro")
print(f"✅ Exp3-{TAG}  Acc={acc:.4f}  Macro-F1={f1:.4f}")

# ────────────────────────── 11. 저장
json.dump({"experiment":f"exp3_{TAG}","lambda_scale":LAM,
           "accuracy":acc,"macro_f1":f1},
          open(OUT_MET,"w"), indent=2)
pd.DataFrame(rows,columns=["filename","predicted_label"]).to_csv(OUT_PRED,index=False)
print(f"📄 Metrics → {OUT_MET}\n📄 Preds   → {OUT_PRED}")
if SAVE_AUG:
    print(f"🖼️  Saved {base_ds.saved_tot} augmented images → {SAVE_DIR}")


[1/10] Loss=0.6726
[2/10] Loss=0.1984
[3/10] Loss=0.1742
[4/10] Loss=0.1714
[5/10] Loss=0.1709
[6/10] Loss=0.1705
[7/10] Loss=0.1703
[8/10] Loss=0.1701
[9/10] Loss=0.1700
[10/10] Loss=0.1701
✅ Exp3-poisson127  Acc=0.5333  Macro-F1=0.5345
📄 Metrics → result_exp3_poisson127.json
📄 Preds   → pred_exp3_poisson127.csv
🖼️  Saved 6 augmented images → ./aug_vis_poisson127


In [7]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
run_exp3_speckle_9x.py  ―  실험군-3 (Speckle Noise σ=0.08, 9×)
  • Train : 70 원본 × 9배 (Flip + ColorJitter + Speckle σ) = 630
  • Test  : split42_70-30.json의 30 원본
  • PNG 저장 : 앞 2배치 / 클래스당 2장 / 총 120장 한도
출력 : result_exp3_speckle008.json / pred_exp3_speckle008.csv
"""

# ───────────────────────────── 0. import
import os, json, random, math, argparse, warnings, numpy as np, pandas as pd
from PIL import Image, ImageFile
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms
from torchvision.utils import save_image
import timm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score

ImageFile.LOAD_TRUNCATED_IMAGES = True
warnings.filterwarnings("ignore", category=UserWarning)

# ───────────────────────────── 1. CLI
parser = argparse.ArgumentParser()
parser.add_argument("--sigma", type=float, default=0.08,
                    help="Speckle noise σ (default 0.08)")
args, _ = parser.parse_known_args()
SIGMA = args.sigma                               # 0.08
TAG = f"speckle{int(SIGMA*1000):03d}"            # speckle008

# ───────────────────────────── 2. 고정 파라미터
SEED = 42
CSV  = r"C:\Users\ast\Documents\project\train.csv"
IMG  = r"C:\Users\ast\Documents\project\train_images"
SPLIT = "split42_70-30.json"

BATCH  = 16
EPOCHS = 10
LR     = 1e-4
WD     = 3e-4
SMOOTH = 0.05

OUT_MET  = f"result_exp3_{TAG}.json"
OUT_PRED = f"pred_exp3_{TAG}.csv"

SAVE_AUG           = True
SAVE_FIRST_BATCHES = 2
SAVE_PER_CLASS     = 2
SAVE_TOTAL_LIMIT   = 120
SAVE_DIR           = f"./aug_vis_{TAG}"
os.makedirs(SAVE_DIR, exist_ok=True)

# ───────────────────────────── 3. 시드 고정
def seed_all(s: int):
    random.seed(s)
    np.random.seed(s)
    os.environ["PYTHONHASHSEED"] = str(s)
    torch.manual_seed(s)
    torch.cuda.manual_seed_all(s)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_all(SEED)

# ───────────────────────────── 4. 변환
class AddSpeckleNoise:
    """I_noisy = I + I * N(0, σ²)"""
    def __init__(self, std: float):
        self.std = std
    def __call__(self, t: torch.Tensor):
        return (t + t * torch.randn_like(t) * self.std).clamp(-1, 1)
    def __repr__(self):
        return f"AddSpeckleNoise(std={self.std})"

train_tf = transforms.Compose([
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    AddSpeckleNoise(SIGMA),
    transforms.Normalize([0.5] * 3, [0.5] * 3),
])

test_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5] * 3, [0.5] * 3),
])

# ───────────────────────────── 5. Dataset
class ScrapDS(Dataset):
    def __init__(self, df: pd.DataFrame, tf, le, save_aug=False):
        self.df = df.reset_index(drop=True).copy()
        self.dir = IMG
        self.tf = tf
        self.labels = le.transform(self.df["weight_class"])
        self.save_aug = save_aug
        self.saved_total = 0
        self.class_cnt = {i: 0 for i in range(len(le.classes_))}

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(os.path.join(self.dir, row.filename)).convert("RGB")
        tensor = self.tf(img)

        # 저장 로직
        if (
            self.save_aug
            and self.saved_total < SAVE_TOTAL_LIMIT
            and self.saved_total < SAVE_FIRST_BATCHES * BATCH
            and self.class_cnt[self.labels[idx]] < SAVE_PER_CLASS
        ):
            fname = f"{self.saved_total:04d}_{row.filename}"
            save_image((tensor * 0.5 + 0.5).clamp(0, 1), os.path.join(SAVE_DIR, fname))
            self.saved_total += 1
            self.class_cnt[self.labels[idx]] += 1

        return tensor, torch.tensor(self.labels[idx]), row.filename

# ───────────────────────────── 6. split
df = pd.read_csv(CSV)
if os.path.exists(SPLIT):
    idx = json.load(open(SPLIT))
    train_idx, test_idx = idx["train"], idx["test"]
else:
    train_idx, test_idx = train_test_split(
        range(len(df)), train_size=70, test_size=30,
        stratify=df["weight_class"], random_state=SEED
    )
    json.dump({"train": train_idx, "test": test_idx}, open(SPLIT, "w"))

train_df, test_df = df.iloc[train_idx], df.iloc[test_idx]
le = LabelEncoder().fit(train_df["weight_class"])

# ───────────────────────────── 7. DataLoader (9×)
base_ds = ScrapDS(train_df, train_tf, le, save_aug=SAVE_AUG)
train_ds = ConcatDataset([base_ds] * 9)          # 630
test_ds  = ScrapDS(test_df, test_tf, le)

train_ld = DataLoader(train_ds, batch_size=BATCH, shuffle=True,  num_workers=0)
test_ld  = DataLoader(test_ds,  batch_size=BATCH, shuffle=False, num_workers=0)

# ───────────────────────────── 8. 모델 · Optim · Scheduler
class CoaTMedium(nn.Module):
    def __init__(self, n_cls: int):
        super().__init__()
        self.net = timm.create_model(
            'coat_lite_medium', pretrained=True, num_classes=n_cls
        )

    def forward(self, x):
        return self.net(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CoaTMedium(len(le.classes_)).to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)

total_steps = len(train_ld) * EPOCHS
warmup_steps = len(train_ld)

def lr_lambda(step):
    if step < warmup_steps:
        return (step + 1) / warmup_steps
    prog = (step - warmup_steps) / (total_steps - warmup_steps)
    return 0.5 * (1 + math.cos(math.pi * prog))

scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
criterion = nn.CrossEntropyLoss(label_smoothing=SMOOTH)

# ───────────────────────────── 9. 학습
for ep in range(1, EPOCHS + 1):
    model.train()
    epoch_loss = 0.0
    for xb, yb, _ in train_ld:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        loss = criterion(model(xb), yb)
        loss.backward()
        optimizer.step()
        scheduler.step()
        epoch_loss += loss.item() * xb.size(0)
    print(f"[{ep}/{EPOCHS}] Loss={epoch_loss/len(train_ld.dataset):.4f}")

# ───────────────────────────── 10. 평가
model.eval()
yt, yp, rows = [], [], []
with torch.no_grad():
    for xb, yb, fn in test_ld:
        preds = model(xb.to(device)).argmax(1).cpu()
        yt += yb.tolist()
        yp += preds.tolist()
        rows += list(zip(fn, le.inverse_transform(preds.numpy())))

acc = accuracy_score(yt, yp)
f1  = f1_score(yt, yp, average="macro")
print(f"✅ Exp3-{TAG}  Acc={acc:.4f}  Macro-F1={f1:.4f}")

# ───────────────────────────── 11. 저장
json.dump(
    {
        "experiment": f"exp3_{TAG}",
        "sigma": SIGMA,
        "accuracy": acc,
        "macro_f1": f1,
    },
    open(OUT_MET, "w"),
    indent=2,
)
pd.DataFrame(rows, columns=["filename", "predicted_label"]).to_csv(OUT_PRED, index=False)

print(f"📄 Metrics → {OUT_MET}\n📄 Preds   → {OUT_PRED}")
if SAVE_AUG:
    print(f"🖼️  Saved {base_ds.saved_total} augmented images → {SAVE_DIR}")


[1/10] Loss=0.6028
[2/10] Loss=0.1845
[3/10] Loss=0.1718
[4/10] Loss=0.1705
[5/10] Loss=0.1699
[6/10] Loss=0.1698
[7/10] Loss=0.1698
[8/10] Loss=0.1697
[9/10] Loss=0.1696
[10/10] Loss=0.1696
✅ Exp3-speckle080  Acc=0.4667  Macro-F1=0.4522
📄 Metrics → result_exp3_speckle080.json
📄 Preds   → pred_exp3_speckle080.csv
🖼️  Saved 6 augmented images → ./aug_vis_speckle080
