In [None]:
from google.colab import drive
drive.mount('/content/drive')

# 작업 폴더 이동 (Google Drive → MyDrive → CanSim_Project)
%cd /content/drive/MyDrive/김시은/CanSim

Mounted at /content/drive
/content/drive/MyDrive/김시은/CanSim


In [None]:
# @title
# 필수 라이브러리 설치 및 임포트
!pip install torch torchvision pandas matplotlib pillow fpdf2 scikit-learn --quiet

import os
import glob
import urllib.request
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler, Subset
from PIL import Image, ImageDraw
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, classification_report
import matplotlib.pyplot as plt
from fpdf import FPDF

# 공용 상수: 6개 암종(0~5) 통일
CANCER_TYPE_MAP = {
    "lung": 0, "colon": 1, "stomach": 2,
    "prostate": 3, "breast": 4, "glioma": 5,
}
CANCER_TYPE_INV = {v: k for k, v in CANCER_TYPE_MAP.items()}

# 파일명 별칭 매핑(유연성 확보) — brain → glioma
CANCER_ALIAS = {"brain": "glioma"}

def parse_filename(fname: str):
    """
    파일명 규칙: modality_cancerType_patientID.ext  (예: ct_lung_patient0012.png)
    modality ∈ {ct, mri}, cancerType ∈ CANCER_TYPE_MAP (+ 별칭 지원)
    """
    base = os.path.basename(fname)
    parts = base.split("_")
    if len(parts) < 3:
        raise ValueError(f"[파일명 오류] {base} (예: ct_lung_patient001.png)")
    modality_str = parts[0].lower()
    cancer_str = parts[1].lower()

    if modality_str not in ["ct", "mri"]:
        raise ValueError(f"[modality 오류] {base} (ct/mri만 허용)")

    cancer_str = CANCER_ALIAS.get(cancer_str, cancer_str)  # 별칭 치환

    if cancer_str not in CANCER_TYPE_MAP:
        raise ValueError(f"[cancer_type 오류] {base} (허용: {list(CANCER_TYPE_MAP.keys())})")

    modality = 0 if modality_str == "ct" else 1
    cancer_type = CANCER_TYPE_MAP[cancer_str]
    return modality, cancer_type

In [None]:
# [속도] Google Drive → 로컬 복사 (강력 권장: I/O 병목 제거)
import shutil, os

SRC_DIR = "/content/drive/MyDrive/김시은/CanSim/Images"
DST_DIR = "/content/Images_local"

if os.path.exists(DST_DIR):
    shutil.rmtree(DST_DIR)
shutil.copytree(SRC_DIR, DST_DIR)
print("[✔] Copied Images →", DST_DIR)

[✔] Copied Images → /content/Images_local


In [None]:
# (확인용) 약물 효과 CSV 로드 + 컬럼/헤드 출력 + 라벨 가드
df = pd.read_csv("cancer_drug_effect.csv", encoding='UTF-8')
print("[DEBUG] CSV 컬럼명:", list(df.columns))
print(df.head())

# CSV 라벨 가드: drug_type은 반드시 {0,1,2,3}
if not set(df["drug_type"].unique()).issubset({0,1,2,3}):
    bad = df[~df["drug_type"].isin([0,1,2,3])]
    print("[WARN] drug_type이 0~3 범위를 벗어납니다. 다음 행을 수정하세요:")
    print(bad)

[DEBUG] CSV 컬럼명: ['cancer_type', 'cancer_name', 'drug_type', 'drug_name', 'effectiveness', 'effect', 'side_effect']
   cancer_type cancer_name  drug_type      drug_name  effectiveness  \
0            0          폐암          0          시스플라틴           0.72   
1            0          폐암          1          카보플라틴           0.70   
2            0          폐암          2           젬시타빈           0.75   
3            0          폐암          3        면역관문억제제           0.73   
4            1         대장암          0  플루오로유라실(5-FU)           0.68   

              effect                     side_effect  
0          DNA 복제 억제                     신독성, 발열, 감기  
1  DNA 복제 억제 (백금 기반)  빈혈, 혈소판 감소, 메스꺼움, 전해질 문제, 알레르기  
2        분열 중인 세포 사멸      호흡곤란, 혈구감소증, 구역질, 골수 기능 억제  
3   면역세포 활성화, 암세포 공격      피로, 발열, 피부발진, 자가면역질환 악화 가능  
4     DNA와 RNA 합성 억제            골수억제, 구토, 설사, 피로, 탈모  


In [None]:
# tabular_data.csv 생성기 (공용 맵/파서 사용 + 라벨 가드)
def generate_tabular_csv(image_folder, drug_csv, output_csv):
    eff_df = pd.read_csv(drug_csv, encoding='utf-8')

    # 라벨 가드
    bad = eff_df[~eff_df["drug_type"].isin([0,1,2,3])]
    if len(bad) > 0:
        print("[WARN] drug_type이 0~3 범위를 벗어납니다. 다음 행을 수정하세요:\n", bad)

    rows = []
    for img_file in os.listdir(image_folder):
        if not img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
            continue
        try:
            modality, cancer_type = parse_filename(img_file)
        except ValueError as e:
            print(e); continue

        # 해당 암종의 유효 약물(최대 4개)
        drug_types = sorted(
            eff_df[eff_df["cancer_type"] == cancer_type]["drug_type"].unique().tolist()
        )[:4]

        for d in drug_types:
            eff_row = eff_df[(eff_df["cancer_type"] == cancer_type) & (eff_df["drug_type"] == d)]
            if eff_row.empty:
                continue
            effectiveness = float(eff_row["effectiveness"].values[0])
            rows.append({
                "image_name": img_file,
                "modality": modality,
                "cancer_type": cancer_type,
                "drug_type": int(d),
                "effectiveness": effectiveness
            })

    pd.DataFrame(rows).to_csv(output_csv, index=False)
    print(f"[✔] Tabular CSV 생성 완료 → {output_csv} (총 {len(rows)}행)")

# 실행: 로컬 폴더 기준으로 tabular_data.csv 생성
generate_tabular_csv("/content/Images_local", "cancer_drug_effect.csv", output_csv="tabular_data.csv")

[✔] Tabular CSV 생성 완료 → tabular_data.csv (총 37292행)


In [None]:
# Dataset
class CanSimDataset(Dataset):
    def __init__(self, image_folder, tabular_csv, transform=None):
        self.image_folder = image_folder
        self.df = pd.read_csv(tabular_csv)
        self.transform = transform

        # 약물 클래스 수 = 4 고정
        self.num_drugs = 4

        # 가드: drug_type이 0~3인지 확인
        uniq = sorted(self.df["drug_type"].unique().tolist())
        if not all(int(x) in [0,1,2,3] for x in uniq):
            print("[WARN] drug_type 값이 0~3 범위를 벗어납니다. CSV를 확인하세요:", uniq)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = os.path.join(self.image_folder, row["image_name"])
        img = Image.open(img_path).convert("L")

        if self.transform:
            img = self.transform(img)

        tabular = torch.tensor([
            row["modality"], row["cancer_type"], row["effectiveness"]
        ], dtype=torch.float32)

        label = int(row["drug_type"])  # 0~3
        return img, tabular, label

In [None]:
# 모델 정의 (ResNet18 사전학습 3채널 그대로 사용)
class CanSimModel(nn.Module):
    def __init__(self, tabular_dim=3, hidden_dim=64, num_drugs=4):
        super().__init__()
        self.backbone = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        # self.backbone.conv1 = ...  (삭제: 1채널로 바꾸지 않음)
        self.backbone.fc = nn.Identity()

        self.tabular = nn.Sequential(
            nn.Linear(tabular_dim, hidden_dim),
            nn.ReLU()
        )
        self.fusion = nn.Sequential(
            nn.Linear(512 + hidden_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_drugs)
        )

    def forward(self, img, tabular):
        img_feat = self.backbone(img)
        tab_feat = self.tabular(tabular)
        concat = torch.cat([img_feat, tab_feat], dim=1)
        return self.fusion(concat)  # logits

In [None]:
# 학습 준비: 장치/변환/데이터로더/손실/옵티마이저/스케줄러 (속도 튜닝 포함)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import torch.backends.cudnn as cudnn
cudnn.benchmark = True  # 최적 커널 자동 탐색

# Grayscale→3채널 복제 + ImageNet 정규화
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

IMG_DIR = "/content/Images_local"  # 로컬 복사본 사용 권장

dataset = CanSimDataset(IMG_DIR, "tabular_data.csv", transform=transform)
indices = list(range(len(dataset)))

# A방식 stratify: (cancer_type, drug_type)
pair_all = dataset.df["cancer_type"].astype(str) + "_" + dataset.df["drug_type"].astype(str)
train_idx, val_idx = train_test_split(indices, test_size=0.2, stratify=pair_all, random_state=42)

train_subset = Subset(dataset, train_idx)
val_subset   = Subset(dataset, val_idx)

# A방식 샘플러: (cancer_type, drug_type) 조합 빈도 역수
train_df = dataset.df.iloc[train_idx].reset_index(drop=True)
pair_train = train_df["cancer_type"].astype(str) + "_" + train_df["drug_type"].astype(str)
pair_counts = pair_train.value_counts().to_dict()
weights = pair_train.map(lambda x: 1.0 / pair_counts[x]).astype(np.float32).values
sampler = WeightedRandomSampler(torch.from_numpy(weights), len(weights), replacement=True)

# DataLoader 튜닝 + 워밍업 로더 추가
NUM_WORKERS = 4
BATCH_SIZE = 64    # A100이면 64~128도 가능

# 워밍업(샘플러 OFF): 초반 2~3 epoch만 사용
train_loader_warmup = DataLoader(train_subset, batch_size=BATCH_SIZE,
                                 shuffle=True, num_workers=NUM_WORKERS,
                                 pin_memory=True, persistent_workers=True)

# 본학습(샘플러 ON)
train_loader = DataLoader(train_subset, batch_size=BATCH_SIZE, sampler=sampler,
                          num_workers=NUM_WORKERS, pin_memory=True, persistent_workers=True)

val_loader   = DataLoader(val_subset,   batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=NUM_WORKERS, pin_memory=True, persistent_workers=True)

criterion = nn.CrossEntropyLoss()

model = CanSimModel(num_drugs=dataset.num_drugs).to(device)
model = model.to(memory_format=torch.channels_last)

# lr/weight_decay 조정
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

# (선택) torch.compile
try:
    model = torch.compile(model)
    print("[✔] torch.compile 활성화")
except Exception as e:
    print("[i] compile 생략:", e)

# 최신 AMP API
from torch import amp
scaler = amp.GradScaler('cuda' if device.type=='cuda' else 'cpu')

# 학습 루프 (워밍업 + AMP + Early Stopping)
epochs = 100
WARMUP_EPOCHS = 3  # 초반 몇 epoch은 샘플러 OFF 로더 사용

best_val_loss = float("inf")
best_f1 = 0.0
prev_val_loss = float("inf")
prev_f1 = 0.0
patience = 15
counter = 0

for epoch in range(epochs):
    model.train()
    loader = train_loader_warmup if epoch < WARMUP_EPOCHS else train_loader

    total_train_loss = 0.0
    for img, tabular, label in loader:
        img = img.to(device, non_blocking=True).to(memory_format=torch.channels_last)
        tabular = tabular.to(device, non_blocking=True)
        label = label.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)
        with amp.autocast('cuda' if device.type=='cuda' else 'cpu', dtype=torch.float16):
            logits = model(img, tabular)
            loss = criterion(logits, label)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        total_train_loss += float(loss)

    avg_train_loss = total_train_loss / max(1, len(loader))

    # ---- Validation ----
    model.eval()
    total_val_loss = 0.0
    all_preds, all_labels = [], []
    with torch.no_grad():
        for img, tabular, label in val_loader:
            img = img.to(device, non_blocking=True).to(memory_format=torch.channels_last)
            tabular = tabular.to(device, non_blocking=True)
            label = label.to(device, non_blocking=True)

            with amp.autocast('cuda' if device.type=='cuda' else 'cpu', dtype=torch.float16):
                logits = model(img, tabular)
                loss = criterion(logits, label)

            total_val_loss += float(loss)
            preds = logits.argmax(dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(label.cpu().numpy())

    avg_val_loss = total_val_loss / max(1, len(val_loader))
    scheduler.step(avg_val_loss)

    val_acc = np.mean(np.array(all_preds) == np.array(all_labels))
    val_f1 = f1_score(all_labels, all_preds, average='weighted')

    best_record = False
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss; best_record = True
    if val_f1 > best_f1:
        best_f1 = val_f1; best_record = True

    loss_improved = avg_val_loss < prev_val_loss - 1e-4
    f1_improved = val_f1 > prev_f1 + 1e-4

    if best_record or loss_improved or f1_improved:
        counter = 0
        torch.save(model.state_dict(), "best_model.pth")
        print(f"[Epoch {epoch+1}] [V] Val Loss={avg_val_loss:.4f}, Acc={val_acc:.4f}, F1={val_f1:.4f} (개선)")
    else:
        counter += 1
        print(f"[Epoch {epoch+1}] [X] Val Loss={avg_val_loss:.4f}, Acc={val_acc:.4f}, F1={val_f1:.4f} (악화)")
        if counter >= patience:
            print(f"[Early Stopping] {epoch+1} 에포크에서 종료")
            break

    prev_val_loss = avg_val_loss
    prev_f1 = val_f1

[✔] torch.compile 활성화
[Epoch 1] [V] Val Loss=1.3868, Acc=0.2508, F1=0.1199 (개선)
[Epoch 2] [X] Val Loss=1.3871, Acc=0.2500, F1=0.1000 (악화)
[Epoch 3] [V] Val Loss=1.3865, Acc=0.2500, F1=0.1000 (개선)
[Epoch 4] [V] Val Loss=1.3862, Acc=0.2508, F1=0.1036 (개선)
[Epoch 5] [X] Val Loss=1.3865, Acc=0.2499, F1=0.0999 (악화)
[Epoch 6] [X] Val Loss=1.3865, Acc=0.2500, F1=0.1000 (악화)
[Epoch 7] [V] Val Loss=1.3865, Acc=0.2508, F1=0.1061 (개선)
[Epoch 8] [V] Val Loss=1.3870, Acc=0.2461, F1=0.1494 (개선)
[Epoch 9] [V] Val Loss=1.3863, Acc=0.2506, F1=0.1019 (개선)
[Epoch 10] [V] Val Loss=1.3870, Acc=0.2486, F1=0.1216 (개선)
[Epoch 11] [V] Val Loss=1.3891, Acc=0.2519, F1=0.1826 (개선)
[Epoch 12] [V] Val Loss=1.3864, Acc=0.2487, F1=0.1626 (개선)
[Epoch 13] [X] Val Loss=1.3864, Acc=0.2496, F1=0.1230 (악화)
[Epoch 14] [V] Val Loss=1.3865, Acc=0.2492, F1=0.1486 (개선)
[Epoch 15] [V] Val Loss=1.3865, Acc=0.2499, F1=0.1643 (개선)
[Epoch 16] [X] Val Loss=1.3865, Acc=0.2500, F1=0.1612 (악화)
[Epoch 17] [V] Val Loss=1.3862, Acc=0.2504,

In [None]:
# (추론/리포트) PDF 유틸 및 최적화된 추론 루프(주차당 1회 forward)
# 폰트 다운로드 (이미 존재 시 생략)
import shutil

font_path = "/content/NanumGothic.ttf"
if not os.path.exists(font_path):
    candidates = [
        # Google Fonts 공식 저장소 (정상 작동)
        "https://raw.githubusercontent.com/google/fonts/main/ofl/nanumgothic/NanumGothic-Regular.ttf",
        # 예비: Bold 버전 (Regular 실패시)
        "https://raw.githubusercontent.com/google/fonts/main/ofl/nanumgothic/NanumGothic-Bold.ttf",
    ]
    ok = False
    for url in candidates:
        try:
            urllib.request.urlretrieve(url, font_path)
            ok = True
            print(f"[✔] Font downloaded from {url}")
            break
        except Exception as e:
            print(f"[WARN] Font download failed: {url} -> {e}")
    # 최후 fallback: 시스템 폰트(콜랩에 보통 있음). 한글 완전 지원은 Nanum이 더 좋음.
    if not ok:
        fallback = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
        if os.path.exists(fallback):
            shutil.copy(fallback, font_path)
            print("[i] Using fallback font DejaVuSans (한글 일부 미지원 가능)")
        else:
            raise FileNotFoundError("폰트 다운로드 실패. 인터넷/URL 확인 필요.")

# PDF 클래스
class PDF(FPDF):
    def __init__(self):
        super().__init__()
        self.add_font("Nanum", '', font_path, uni=True)
        self.set_font("Nanum", '', 12)

    def add_summary_page(self, patient_info, summary_data):
        self.add_page()
        self.set_font("Nanum", '', 16)
        self.cell(0, 10, "환자 시뮬레이션 결과 보고서", ln=True, align="C")
        self.ln(10)

        self.set_font("Nanum", '', 12)
        self.cell(0, 10, f"이름: {patient_info['name']}", ln=True)
        self.cell(0, 10, f"나이: {patient_info['age']}세", ln=True)
        self.cell(0, 10, f"성별: {patient_info['gender']}", ln=True)
        self.ln(10)
        self.cell(0, 10, "예측 치료 확률 (약물별):", ln=True)

        for drug in summary_data:
            mark = "(추천)" if drug["is_best"] else ""
            self.cell(0, 10, f"- {drug['drug']}: {drug['final_score']*100:.1f}% {mark}", ln=True)

    def add_drug_pages(self, image_data):
        images_per_page = 12
        images_per_row = 3
        img_w = 55
        img_h = 45
        margin_x = 10
        margin_y = 10

        for drug_data in image_data:
            drug_name = f"{drug_data['drug']} (추천)" if drug_data['is_best'] else drug_data['drug']
            weeks = drug_data["weeks"]

            for i in range(0, len(weeks), images_per_page):
                self.add_page()
                self.set_font("Nanum", '', 14)
                self.cell(0, 10, drug_name, ln=True)
                self.ln(5)

                batch = weeks[i:i+images_per_page]
                for row in range(4):
                    y = self.get_y()
                    max_height = 0
                    for col in range(images_per_row):
                        idx = row * images_per_row + col
                        if idx >= len(batch):
                            break
                        week = batch[idx]
                        x = margin_x + col * (img_w + margin_x)

                        self.set_xy(x, y)
                        self.set_font("Nanum", '', 10)
                        self.cell(img_w, 6, f"{week['week']}주차", ln=False, align='C')

                        if os.path.exists(week['image_path']):
                            self.image(week['image_path'], x=x, y=y + 6, w=img_w, h=img_h)
                        max_height = max(max_height, img_h)

                    self.ln(max_height + margin_y)
                self.ln(5)

def generate_pdf(patient_info, summary_data, image_data, output_path="report.pdf"):
    pdf = PDF()
    pdf.add_summary_page(patient_info, summary_data)
    pdf.add_drug_pages(image_data)
    pdf.output(output_path)
    print(f"[✔] PDF 저장 완료 → {output_path}")

[✔] Font downloaded from https://raw.githubusercontent.com/google/fonts/main/ofl/nanumgothic/NanumGothic-Regular.ttf


In [None]:
# 시뮬레이션 유틸: 주차당 1회 forward → 4클래스 확률 공통 사용
drug_df = pd.read_csv("cancer_drug_effect.csv", encoding='utf-8')

uniq_drugs = sorted(drug_df["drug_type"].unique().tolist())
if any(int(x) not in [0,1,2,3] for x in uniq_drugs):
    print("[WARN] CSV drug_type 라벨을 0~3으로 정규화하세요:", uniq_drugs)

drug_map = {(int(r["cancer_type"]), int(r["drug_type"])): str(r["drug_name"]) for _, r in drug_df.iterrows()}

def available_drugs_for(cancer_type: int):
    sub = drug_df[drug_df["cancer_type"] == cancer_type]
    cand = sorted(sub["drug_type"].unique().tolist())[:4]
    if any(int(x) not in [0,1,2,3] for x in cand):
        print(f"[WARN] drug_type 값이 0~3 범위를 벗어납니다: {cand}. CSV를 정규화하세요.")
    return [int(x) for x in cand]

def simulate_all_drugs_fast(model, base_image, tabular_tensor, device, cancer_type, drug_map, num_weeks=12):
    # 추론 변환: 학습과 동일(Grayscale→3채널 + ImageNet 정규화)
    tform = transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

    valid_drugs = available_drugs_for(int(cancer_type))

    # 공통 주차 이미지(시각화용)
    week_images = []
    for w in range(num_weeks):
        img_np = np.array(base_image).astype(np.float32)
        tumor_radius = 40 - int((w / num_weeks) * 30)  # 40 → 10
        yy, xx = np.ogrid[:img_np.shape[0], :img_np.shape[1]]
        cy, cx = img_np.shape[0] // 2, img_np.shape[1] // 2
        dist = (yy - cy) ** 2 + (xx - cx) ** 2
        mask = np.ones_like(img_np); mask[dist < tumor_radius**2] = 0.7
        img_np = np.clip(img_np * mask, 0, 255).astype(np.uint8)

        img = Image.fromarray(img_np)
        draw = ImageDraw.Draw(img)
        draw.ellipse([(cx - tumor_radius, cy - tumor_radius),
                      (cx + tumor_radius, cy + tumor_radius)], outline="red", width=2)
        week_images.append(img)

    # 주차당 1회 forward
    week_probs = []
    model.eval()
    with torch.no_grad():
        for wimg in week_images:
            x = tform(wimg).unsqueeze(0).to(device)
            tab = tabular_tensor.unsqueeze(0).to(device)
            with amp.autocast('cuda' if device.type=='cuda' else 'cpu', dtype=torch.float16):
                logits = model(x, tab).squeeze(0)
                probs = torch.softmax(logits, dim=0).cpu().numpy()
            week_probs.append(probs)

    results = []
    for d in valid_drugs:
        dname = drug_map.get((int(cancer_type), int(d)), f"Drug_{d}")
        scores = [float(p[d]) for p in week_probs]
        results.append({
            "drug": dname,
            "drug_idx": int(d),
            "images": week_images,
            "scores": scores,
            "final_score": scores[-1]
        })
    best_drug = max(results, key=lambda x: x["final_score"])
    return results, best_drug

In [None]:
# get_simulation_data 함수 복구
import os

def get_simulation_data(results, best_drug, detailed=False):
    if not results:
        raise ValueError("results가 비어 있습니다. 시뮬레이션 결과를 확인하세요.")
    num_weeks = len(results[0]["images"])
    week_interval = 1 if detailed else 4
    week_list = list(range(0, num_weeks, week_interval))

    os.makedirs("temp_images", exist_ok=True)
    data = []
    for result in results:
        drug_data = {
            "drug": result["drug"],
            "is_best": result["drug"] == best_drug["drug"],
            "weeks": []
        }
        for w in week_list:
            image_path = f"temp_images/{result['drug'].replace(' ', '_')}_week{w+1}.png"
            result["images"][w].save(image_path)
            drug_data["weeks"].append({
                "week": w+1,
                "score": float(result["scores"][w]),
                "image_path": image_path
            })
        data.append(drug_data)
    return data

In [None]:
# 추론 실행: 첫 이미지 자동 선택 → 시뮬레이션 → PDF 생성
# (학습 직후라면 현재 model 그대로 사용. 저장본 쓰려면 아래 두 줄 활성화)
# model = CanSimModel(num_drugs=4).to(device)
# model.load_state_dict(torch.load("best_model.pth", map_location=device))

# 로컬 이미지에서 1장 선택
image_paths = glob.glob("/content/Images_local/*.[jp][pn]g")
if len(image_paths) == 0:
    raise FileNotFoundError("Images_local 폴더에 이미지가 없습니다.")
img_path = image_paths[0]
img = Image.open(img_path).convert("L")

# 파일명에서 modality, cancer_type 추출 (공용 파서)
modality, cancer_type = parse_filename(img_path)

# 탭형 입력(예시: effectiveness는 CSV/로직에 맞게 설정)
tabular_tensor = torch.tensor([float(modality), float(cancer_type), 0.7], dtype=torch.float32)

# 시뮬레이션 (최적화 버전)
results, best_drug = simulate_all_drugs_fast(model, img, tabular_tensor, device, int(cancer_type), drug_map)

# 신뢰도 지표
final_probs = np.array([r["final_score"] for r in results], dtype=np.float64)
final_probs = final_probs / (final_probs.sum() + 1e-12)
sorted_probs = np.sort(final_probs)[::-1]
confidence = float(sorted_probs[0])
margin = float(sorted_probs[0] - sorted_probs[1]) if len(sorted_probs) > 1 else float(sorted_probs[0])
entropy = float(-(final_probs * np.log(final_probs + 1e-12)).sum())

best_name = best_drug["drug"]
print(f"[추천] {best_name} 약물이 {confidence*100:.1f}% 확률로 가장 높은 치료 효과.")
print(f"[신뢰도] confidence={confidence:.4f}, margin(top1-top2)={margin:.4f}, entropy={entropy:.4f}")

# PDF 생성
data = get_simulation_data(results, best_drug, detailed=True)
pdf_data = {
    "patient_info": {"name": "홍길동", "age": 58, "gender": "남"},
    "summary": [{"drug": r["drug"], "final_score": r["final_score"], "is_best": r["drug"] == best_name} for r in results],
    "images": data
}
generate_pdf(pdf_data["patient_info"], pdf_data["summary"], pdf_data["images"], "report.pdf")

[추천] 도세탁셀 약물이 25.1% 확률로 가장 높은 치료 효과.
[신뢰도] confidence=0.2505, margin(top1-top2)=0.0003, entropy=1.3863


  self.add_font("Nanum", '', font_path, uni=True)
  self.cell(0, 10, "환자 시뮬레이션 결과 보고서", ln=True, align="C")
  self.cell(0, 10, f"이름: {patient_info['name']}", ln=True)
  self.cell(0, 10, f"나이: {patient_info['age']}세", ln=True)
  self.cell(0, 10, f"성별: {patient_info['gender']}", ln=True)
  self.cell(0, 10, "예측 치료 확률 (약물별):", ln=True)
  self.cell(0, 10, f"- {drug['drug']}: {drug['final_score']*100:.1f}% {mark}", ln=True)
  self.cell(0, 10, drug_name, ln=True)
  self.cell(img_w, 6, f"{week['week']}주차", ln=False, align='C')


[✔] PDF 저장 완료 → report.pdf
