# UFoE Phase 6-A: Metal Binding Scaffold — ColabFold 검증 v2
## Zn²⁺ 결합 사이트 (His₂Cys₂) 기하학 검증

### v2 개선사항:
- ESMFold API를 1차 구조예측 엔진으로 사용 (설치 불필요, 즉시 실행)
- ColabFold는 선택적 2차 검증으로 제공
- 모든 셀에 에러 핸들링 추가
- 결과 시각화 강화

### 목표:
1. **Scaffold 안정성** — pLDDT ≥ 80, 4-helix bundle 유지
2. **Zn 배위 기하학** — His₂Cys₂ 사면체 배위 확인
3. **Phase 5b-2 분석 함수 재사용** — salt bridge, core packing, crossing angle, SASA

### 성공 조건:
- Scaffold pLDDT ≥ 80
- Cys-Cys 거리: 3-5Å
- His-His 거리: 3-5Å
- 4잔기 pocket RMSD < 2.0Å from ideal tetrahedral
- Helix crossing angle: 20-35°

In [None]:
# Step 1: 설치 + 환경 확인
!pip install -q biopython numpy scipy matplotlib requests
!pip install -q py3Dmol

import subprocess, sys, os

# GPU 확인
gpu_available = False
try:
    result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
    if result.returncode == 0:
        gpu_available = True
        print("✅ GPU 사용 가능")
        # GPU 정보 표시
        for line in result.stdout.split('\n'):
            if 'MiB' in line or 'Tesla' in line or 'A100' in line or 'V100' in line or 'T4' in line:
                print(f"  {line.strip()}")
    else:
        print("⚠️ GPU 없음 — ESMFold API 모드로 실행")
except:
    print("⚠️ GPU 없음 — ESMFold API 모드로 실행")

print(f"\n환경: Python {sys.version.split()[0]}")
print("설치 완료 ✅")

In [None]:
# Step 2: 서열 정의 + 곤감리건 검증
import os, json, glob, time, warnings
import numpy as np
from pathlib import Path
warnings.filterwarnings('ignore')

# Phase 6-A Metal Binding Scaffold (101aa, pLDDT=80.0 on ESMFold)
# His2: pos 48, 52 | Cys2: pos 25, 28
PHASE6A_SEQ = 'DEKRKDLAALLLKAILLALVALLGTCKQCELLAIKVEALLAVLKAILGSHSNQHSLAILVAKELLALVKAILGLAALLLKAILLALVALLGLLAVEKDKRD'

# Phase 5b-2 참조 (WT GCN4, pLDDT=95.1 on ColabFold)
WT_GCN4_SEQ = 'RMKQLEDKVEELLSKNYHLENEVARLKKLVGER'

SEQUENCES = {
    'Phase6A_Metal': PHASE6A_SEQ,
    'GCN4_WT_ref': WT_GCN4_SEQ,
}

GGRL_MAP = {
    'A':'gon','L':'gon','V':'gon','I':'gon','F':'gon','M':'gon','W':'gon',
    'G':'gam','C':'gam','P':'gam',
    'S':'ri','T':'ri','N':'ri','Q':'ri','H':'ri','Y':'ri',
    'E':'geon','D':'geon','K':'geon','R':'geon'
}

# 서열 검증
print("=" * 60)
print("서열 검증 + 곤감리건 분석")
print("=" * 60)
for name, seq in SEQUENCES.items():
    ggrl = [GGRL_MAP[aa] for aa in seq]
    n = len(ggrl)
    ratio = {t: round(ggrl.count(t)/n*100,1) for t in ['gon','gam','ri','geon']}
    print(f'\n{name} ({len(seq)}aa):')
    print(f'  곤(소수성)={ratio["gon"]}% 감(특수)={ratio["gam"]}% 리(극성)={ratio["ri"]}% 건(전하)={ratio["geon"]}%')
    
    # His, Cys 위치 표시
    his_pos = [i for i,aa in enumerate(seq) if aa == 'H']
    cys_pos = [i for i,aa in enumerate(seq) if aa == 'C']
    if his_pos: print(f'  His positions: {his_pos} ({len(his_pos)}개)')
    if cys_pos: print(f'  Cys positions: {cys_pos} ({len(cys_pos)}개)')

print("\n서열 검증 완료 ✅")

In [None]:
# Step 3: ESMFold API로 구조 예측 (설치 불필요, 즉시 실행)
import requests

os.makedirs('phase6a_structures', exist_ok=True)

pdb_files = {}
pdb_strings = {}

def predict_with_esmfold(name, sequence):
    """ESMFold API로 구조 예측"""
    print(f'\n{"="*50}')
    print(f'  {name} ({len(sequence)}aa) — ESMFold API')
    print(f'{"="*50}')
    
    url = "https://api.esmatlas.com/foldSequence/v1/pdb/"
    
    try:
        print(f'  요청 전송 중...')
        start = time.time()
        response = requests.post(url, data=sequence, 
                                headers={'Content-Type': 'text/plain'},
                                timeout=120)
        elapsed = time.time() - start
        
        if response.status_code == 200:
            pdb_str = response.text
            
            # PDB 파일 저장
            outpath = f'phase6a_structures/{name}_esmfold.pdb'
            with open(outpath, 'w') as f:
                f.write(pdb_str)
            
            # pLDDT 추출 (B-factor 컬럼)
            plddts = []
            for line in pdb_str.split('\n'):
                if line.startswith('ATOM') and line[12:16].strip() == 'CA':
                    try:
                        plddt = float(line[60:66].strip())
                        plddts.append(plddt)
                    except:
                        pass
            
            mean_plddt = np.mean(plddts) if plddts else 0
            print(f'  ✅ 성공 ({elapsed:.1f}초)')
            print(f'  Mean pLDDT: {mean_plddt:.1f}')
            print(f'  저장: {outpath}')
            
            return outpath, pdb_str
        else:
            print(f'  ❌ API 오류: {response.status_code}')
            print(f'  {response.text[:200]}')
            return None, None
            
    except requests.exceptions.Timeout:
        print(f'  ❌ 타임아웃 (120초 초과)')
        return None, None
    except Exception as e:
        print(f'  ❌ 오류: {e}')
        return None, None

# 모든 서열 예측
for name, seq in SEQUENCES.items():
    outpath, pdb_str = predict_with_esmfold(name, seq)
    if outpath:
        pdb_files[name] = outpath
        pdb_strings[name] = pdb_str

print(f'\n{"="*50}')
print(f'구조 예측 완료: {len(pdb_files)}/{len(SEQUENCES)} 성공')
print(f'{"="*50}')

---
## Phase 6-A 핵심 분석: Zn 배위 기하학

In [None]:
# Step 4: Zn²⁺ 배위 기하학 분석
from Bio.PDB import PDBParser

def analyze_zn_geometry(pdb_file):
    """His₂Cys₂ Zn 배위 사이트 기하학 분석"""
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('s', pdb_file)
    model = structure[0]
    
    # 모든 체인에서 His, Cys 찾기
    his_residues = []
    cys_residues = []
    
    for chain in model:
        for res in chain:
            resname = res.get_resname()
            if resname == 'HIS':
                his_residues.append(res)
            elif resname == 'CYS':
                cys_residues.append(res)
    
    print(f'  Found: {len(his_residues)} His, {len(cys_residues)} Cys')
    
    if len(his_residues) < 2 or len(cys_residues) < 2:
        print('  ❌ Insufficient His/Cys for Zn site')
        return None
    
    # 배위 원자 좌표 추출
    coord_atoms = []
    coord_labels = []
    
    for res in cys_residues[:2]:
        if 'SG' in res:
            coord_atoms.append(res['SG'].get_vector().get_array())
            coord_labels.append(f'Cys{res.get_id()[1]}:SG')
    
    for res in his_residues[:2]:
        for atom_name in ['NE2', 'ND1']:
            if atom_name in res:
                coord_atoms.append(res[atom_name].get_vector().get_array())
                coord_labels.append(f'His{res.get_id()[1]}:{atom_name}')
                break
    
    if len(coord_atoms) < 4:
        print(f'  ❌ Only {len(coord_atoms)} coordination atoms found')
        return None
    
    coords = np.array(coord_atoms)
    zn_center = coords.mean(axis=0)
    
    print(f'\n  === Zn²⁺ 배위 기하학 ===')
    print(f'  가상 Zn 위치: ({zn_center[0]:.1f}, {zn_center[1]:.1f}, {zn_center[2]:.1f})')
    
    results = {
        'coord_atoms': coord_labels,
        'zn_center': zn_center.tolist(),
        'distances': {},
        'zn_distances': {},
        'angles': {}
    }
    
    # Zn-배위원자 거리
    print('\n  Zn-배위원자 거리 (이상: 2.0-2.3Å):')
    for i, (c, l) in enumerate(zip(coords, coord_labels)):
        d = np.linalg.norm(c - zn_center)
        status = "✅" if 1.5 <= d <= 3.5 else "⚠️"
        print(f'    Zn — {l}: {d:.2f}Å {status}')
        results['zn_distances'][l] = round(d, 2)
    
    # 배위원자 간 거리
    print('\n  배위원자 간 거리:')
    for i in range(4):
        for j in range(i+1, 4):
            d = np.linalg.norm(coords[i] - coords[j])
            pair = f'{coord_labels[i]} — {coord_labels[j]}'
            ok = 2.5 <= d <= 8.0
            print(f'    {pair}: {d:.2f}Å {"✅" if ok else "⚠️"}')
            results['distances'][pair] = round(d, 2)
    
    # Cys-Cys, His-His 거리 체크
    cys_d = np.linalg.norm(coords[0] - coords[1])
    his_d = np.linalg.norm(coords[2] - coords[3])
    print(f'\n  Cys-Cys 거리: {cys_d:.2f}Å (목표: 3-5Å) {"✅" if 3<=cys_d<=5 else "⚠️"}')
    print(f'  His-His 거리: {his_d:.2f}Å (목표: 3-5Å) {"✅" if 3<=his_d<=5 else "⚠️"}')
    results['cys_cys_dist'] = round(cys_d, 2)
    results['his_his_dist'] = round(his_d, 2)
    
    # 사면체 각도 (이상: 109.5°)
    print('\n  배위 각도 (이상 사면체: 109.5°):')
    for i in range(4):
        for j in range(i+1, 4):
            v1 = coords[i] - zn_center
            v2 = coords[j] - zn_center
            cos_a = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
            cos_a = np.clip(cos_a, -1, 1)
            angle = np.degrees(np.arccos(cos_a))
            pair = f'{coord_labels[i]}-Zn-{coord_labels[j]}'
            ok = 80 <= angle <= 140
            print(f'    {pair}: {angle:.1f}° {"✅" if ok else "⚠️"}')
            results['angles'][pair] = round(angle, 1)
    
    # 사면체 편차
    ideal_angle = 109.5
    angles = list(results['angles'].values())
    rmsd_angle = np.sqrt(np.mean([(a - ideal_angle)**2 for a in angles]))
    print(f'\n  사면체 RMSD: {rmsd_angle:.1f}° (< 20° = 우수, < 30° = 허용)')
    results['tetrahedral_rmsd'] = round(rmsd_angle, 1)
    
    return results

# 실행
zn_result = None
if 'Phase6A_Metal' in pdb_files:
    print('=== Phase 6-A Metal Binding Scaffold ===')
    zn_result = analyze_zn_geometry(pdb_files['Phase6A_Metal'])
else:
    print('❌ Phase6A PDB 없음 — Step 3 확인 필요')

In [None]:
# Step 5: 4-Helix Bundle 구조 분석

def analyze_helix_bundle(pdb_file, sequence, name):
    """Helix bundle의 packing 품질 분석"""
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('s', pdb_file)
    model = structure[0]
    chain = list(model.get_chains())[0]
    residues = list(chain.get_residues())
    
    # CA 좌표
    ca_coords = []
    for res in residues:
        if 'CA' in res:
            ca_coords.append(res['CA'].get_vector().get_array())
    ca_coords = np.array(ca_coords)
    
    print(f'  Residues: {len(residues)}, CA coords: {len(ca_coords)}')
    
    # pLDDT (B-factor에서)
    plddts = []
    for res in residues:
        for atom in res:
            plddts.append(atom.get_bfactor())
            break
    mean_plddt = np.mean(plddts)
    print(f'  Mean pLDDT: {mean_plddt:.1f}')
    
    # 영역별 pLDDT
    n = len(plddts)
    if name == 'Phase6A_Metal':
        regions = {
            'N-term (1-15)': plddts[:15] if n > 15 else plddts,
            'Helix1 (15-35)': plddts[15:35] if n > 35 else [],
            'CysLoop (25-30)': plddts[25:30] if n > 30 else [],
            'Helix2 (35-45)': plddts[35:45] if n > 45 else [],
            'HisLoop (48-55)': plddts[48:55] if n > 55 else [],
            'Helix3 (55-75)': plddts[55:75] if n > 75 else [],
            'Helix4 (75-90)': plddts[75:90] if n > 90 else [],
            'C-term (90+)': plddts[90:] if n > 90 else []
        }
    else:
        regions = {'전체': plddts}
    
    print('\n  영역별 pLDDT:')
    for rname, vals in regions.items():
        if vals:
            print(f'    {rname}: {np.mean(vals):.1f} (min={min(vals):.1f})')
    
    # SASA 분석 (곤감리건별)
    try:
        from Bio.PDB.SASA import ShrakeRupley
        sr = ShrakeRupley()
        sr.compute(model, level='R')
        
        max_sasa = {'A':129,'R':274,'N':195,'D':193,'C':167,'E':223,'Q':225,
                    'G':104,'H':224,'I':197,'L':201,'K':236,'M':224,'F':240,
                    'P':159,'S':155,'T':172,'W':285,'Y':263,'V':174}
        
        by_type = {'gon':[],'gam':[],'ri':[],'geon':[]}
        for i, res in enumerate(residues):
            if i < len(sequence):
                aa = sequence[i]
                t = GGRL_MAP.get(aa)
                ms = max_sasa.get(aa, 200)
                if t and hasattr(res, 'sasa'):
                    by_type[t].append(res.sasa / ms)
        
        print('\n  상대 SASA (곤감리건):')
        for t in ['gon','gam','ri','geon']:
            if by_type[t]:
                print(f'    {t}: {np.mean(by_type[t]):.3f} (n={len(by_type[t])})')
        
        gon_m = np.mean(by_type['gon']) if by_type['gon'] else 999
        ri_m = np.mean(by_type['ri']) if by_type['ri'] else 999
        geon_m = np.mean(by_type['geon']) if by_type['geon'] else 0
        naitae = gon_m < ri_m < geon_m
        print(f'    나이테 (곤<리<건): {"✅" if naitae else "❌"} ({gon_m:.3f} < {ri_m:.3f} < {geon_m:.3f})')
    except Exception as e:
        print(f'\n  SASA 계산 실패: {e}')
        naitae = None
    
    # Radius of gyration
    center = ca_coords.mean(axis=0)
    rg = np.sqrt(np.mean(np.sum((ca_coords - center)**2, axis=1)))
    print(f'\n  Radius of gyration: {rg:.1f}Å')
    
    # Contact density
    from scipy.spatial.distance import cdist
    dm = cdist(ca_coords, ca_coords)
    n_contacts = int(np.sum((dm > 4) & (dm < 8)) / 2)
    contact_density = n_contacts / len(ca_coords)
    print(f'  Contact density (4-8Å): {contact_density:.1f} per residue')
    
    return {
        'mean_plddt': round(mean_plddt, 1),
        'rg': round(rg, 1),
        'contact_density': round(contact_density, 1),
        'naitae': naitae
    }

# 실행
bundle_results = {}
for name in pdb_files:
    print(f'\n{"="*60}')
    print(f'=== {name} ===')
    print(f'{"="*60}')
    bundle_results[name] = analyze_helix_bundle(pdb_files[name], SEQUENCES[name], name)

In [None]:
# Step 6: 3D 시각화 (py3Dmol)
try:
    import py3Dmol
    
    for name in pdb_files:
        if name not in pdb_strings or pdb_strings[name] is None:
            with open(pdb_files[name]) as f:
                pdb_strings[name] = f.read()
        
        pdb_str = pdb_strings[name]
        view = py3Dmol.view(width=700, height=450)
        view.addModel(pdb_str, 'pdb')
        
        # 기본 구조: cartoon rainbow
        view.setStyle({'cartoon': {'color': 'spectrum'}})
        
        # His 잔기: 빨간색 stick
        view.addStyle({'resn': 'HIS'}, {'stick': {'color': 'red', 'radius': 0.2}})
        
        # Cys 잔기: 노란색 stick
        view.addStyle({'resn': 'CYS'}, {'stick': {'color': 'yellow', 'radius': 0.2}})
        
        # 가상 Zn 위치
        if name == 'Phase6A_Metal' and zn_result:
            zc = zn_result['zn_center']
            view.addSphere({
                'center': {'x': zc[0], 'y': zc[1], 'z': zc[2]},
                'radius': 1.2, 'color': 'gray', 'opacity': 0.7
            })
            # 배위 원자 레이블
            view.addLabel('Zn²⁺', {
                'position': {'x': zc[0], 'y': zc[1]+2, 'z': zc[2]},
                'fontSize': 14, 'fontColor': 'white',
                'backgroundColor': 'gray', 'backgroundOpacity': 0.8
            })
        
        view.zoomTo()
        print(f'\n=== {name} ===')
        view.show()

except ImportError:
    print('py3Dmol not available — matplotlib fallback')
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    
    for name in pdb_files:
        parser = PDBParser(QUIET=True)
        structure = parser.get_structure('s', pdb_files[name])
        ca_coords = []
        for chain in structure[0]:
            for res in chain:
                if 'CA' in res:
                    ca_coords.append(res['CA'].get_vector().get_array())
        ca = np.array(ca_coords)
        
        fig = plt.figure(figsize=(8, 6))
        ax = fig.add_subplot(111, projection='3d')
        ax.plot(ca[:,0], ca[:,1], ca[:,2], 'b-', linewidth=2, alpha=0.7)
        ax.scatter(ca[:,0], ca[:,1], ca[:,2], c=range(len(ca)), cmap='rainbow', s=20)
        ax.set_title(f'{name} — CA backbone')
        plt.show()

In [None]:
# Step 7: pLDDT 히트맵 시각화
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

fig, axes = plt.subplots(len(pdb_files), 1, figsize=(14, 3*len(pdb_files)))
if len(pdb_files) == 1:
    axes = [axes]

for idx, (name, pdb_path) in enumerate(pdb_files.items()):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('s', pdb_path)
    
    plddts = []
    for chain in structure[0]:
        for res in chain:
            for atom in res:
                plddts.append(atom.get_bfactor())
                break
    
    ax = axes[idx]
    seq = SEQUENCES[name]
    positions = list(range(1, len(plddts)+1))
    
    # 색상: pLDDT 기준
    colors = []
    for p in plddts:
        if p >= 90: colors.append('#0053D6')    # 매우 높음 (파랑)
        elif p >= 70: colors.append('#65CBF3')  # 높음 (하늘)
        elif p >= 50: colors.append('#FFDB13')  # 중간 (노랑)
        else: colors.append('#FF7D45')           # 낮음 (주황)
    
    ax.bar(positions, plddts, color=colors, width=1.0, edgecolor='none')
    ax.axhline(y=80, color='red', linestyle='--', alpha=0.5, label='목표 (80)')
    ax.axhline(y=70, color='orange', linestyle='--', alpha=0.3)
    ax.set_ylabel('pLDDT')
    ax.set_xlabel('Residue position')
    ax.set_title(f'{name} — pLDDT per residue (Mean: {np.mean(plddts):.1f})')
    ax.set_ylim(0, 100)
    ax.legend()
    
    # His/Cys 위치 마커
    if name == 'Phase6A_Metal':
        his_pos = [i+1 for i,aa in enumerate(seq) if aa == 'H']
        cys_pos = [i+1 for i,aa in enumerate(seq) if aa == 'C']
        for hp in his_pos:
            if hp <= len(plddts):
                ax.annotate('H', (hp, plddts[hp-1]+2), fontsize=8, ha='center', color='red')
        for cp in cys_pos:
            if cp <= len(plddts):
                ax.annotate('C', (cp, plddts[cp-1]+2), fontsize=8, ha='center', color='gold')

plt.tight_layout()
plt.show()

In [None]:
# Step 8: GCN4 WT 참조 분석 (Phase 5b-2 비교)

if 'GCN4_WT_ref' in pdb_files:
    print('=== GCN4 WT — Phase 5b-2 참조 ===')
    pdb = pdb_files['GCN4_WT_ref']
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('wt', pdb)
    chains = list(structure[0].get_chains())
    
    print(f'  체인 수: {len(chains)}')
    
    if len(chains) >= 2:
        seq = WT_GCN4_SEQ
        heptad = ['d','e','f','g','a','b','c']
        reg = {i: heptad[i % 7] for i in range(len(seq))}
        a_pos = [i for i,h in reg.items() if h=='a']
        
        ra = list(chains[0].get_residues())
        rb = list(chains[1].get_residues())
        
        def get_cb(res):
            if 'CB' in res: return res['CB'].get_vector().get_array()
            elif 'CA' in res: return res['CA'].get_vector().get_array()
            return None
        
        print('\n  a↔a\' distances (core):')
        for p in a_pos:
            if p < len(ra) and p < len(rb):
                ca, cb = get_cb(ra[p]), get_cb(rb[p])
                if ca is not None and cb is not None:
                    d = np.linalg.norm(ca-cb)
                    print(f'    pos{p+1}({seq[p]}): {d:.2f}Å')
        
        # Crossing angle
        def get_axis(chain):
            ca = np.array([r['CA'].get_vector().get_array() for r in chain if 'CA' in r])
            centered = ca - ca.mean(axis=0)
            eigvals, eigvecs = np.linalg.eigh(np.cov(centered.T))
            return eigvecs[:, np.argmax(eigvals)]
        
        ax_a, ax_b = get_axis(chains[0]), get_axis(chains[1])
        dot = np.dot(ax_a, ax_b)
        angle = np.degrees(np.arccos(min(1.0, abs(dot))))
        print(f'\n  Crossing angle: {angle:.1f}° (목표: 20-35°)')
        
        plddts = [atom.get_bfactor() for res in chains[0] for atom in res]
        print(f'  Mean pLDDT: {np.mean(plddts):.1f}')
    else:
        # ESMFold는 monomer만 예측 — 단일 체인 분석
        print('  ⚠️ ESMFold는 monomer만 예측 — dimer 분석 불가')
        print('  단일 체인 분석:')
        chain = chains[0]
        plddts = [atom.get_bfactor() for res in chain for atom in res]
        print(f'  Mean pLDDT: {np.mean(plddts):.1f}')
        
        ca = np.array([r['CA'].get_vector().get_array() for r in chain if 'CA' in r])
        rg = np.sqrt(np.mean(np.sum((ca - ca.mean(axis=0))**2, axis=1)))
        print(f'  Rg: {rg:.1f}Å')
else:
    print('❌ GCN4 WT PDB 없음')

In [None]:
# Step 9: 종합 판정

print('=' * 60)
print('  Phase 6-A: Metal Binding Scaffold — 종합 판정')
print('=' * 60)

verdict = {'pass': 0, 'warn': 0, 'fail': 0}

# [1] Scaffold 안정성
print('\n[1] Scaffold 안정성')
if 'Phase6A_Metal' in bundle_results:
    mp = bundle_results['Phase6A_Metal']['mean_plddt']
    print(f'  pLDDT: {mp:.1f} (목표 ≥80)')
    if mp >= 80:
        print(f'  → ✅ PASS')
        verdict['pass'] += 1
    elif mp >= 70:
        print(f'  → ⚠️ 근접 (70-80)')
        verdict['warn'] += 1
    else:
        print(f'  → ❌ 미달')
        verdict['fail'] += 1
else:
    print('  → ❌ 데이터 없음')
    verdict['fail'] += 1

# [2] Zn 배위 기하학
print('\n[2] Zn²⁺ 배위 기하학')
if zn_result:
    rmsd = zn_result['tetrahedral_rmsd']
    print(f'  사면체 RMSD: {rmsd:.1f}°')
    if rmsd < 20:
        print(f'  → ✅ PASS (우수)')
        verdict['pass'] += 1
    elif rmsd < 30:
        print(f'  → ⚠️ 허용 범위')
        verdict['warn'] += 1
    else:
        print(f'  → ❌ FAIL')
        verdict['fail'] += 1
    
    ccd = zn_result.get('cys_cys_dist', 0)
    hhd = zn_result.get('his_his_dist', 0)
    print(f'\n  Cys-Cys: {ccd:.2f}Å (목표 3-5Å) {"✅" if 3<=ccd<=5 else "⚠️"}')
    print(f'  His-His: {hhd:.2f}Å (목표 3-5Å) {"✅" if 3<=hhd<=5 else "⚠️"}')
    
    zn_d = list(zn_result['zn_distances'].values())
    mean_zn = np.mean(zn_d)
    print(f'  Zn-배위 평균거리: {mean_zn:.2f}Å')
else:
    print('  → ❌ Zn 분석 실패')
    verdict['fail'] += 1

# [3] 나이테 원리
print('\n[3] 나이테 원리 (곤<리<건)')
if 'Phase6A_Metal' in bundle_results and bundle_results['Phase6A_Metal'].get('naitae') is not None:
    nt = bundle_results['Phase6A_Metal']['naitae']
    print(f'  → {"✅ PASS" if nt else "❌ FAIL"}')
    verdict['pass' if nt else 'fail'] += 1
else:
    print('  → ❌ 데이터 없음')
    verdict['fail'] += 1

# [4] Phase 위치
print('\n[4] Phase 5b-2 → 6-A 진행')
print('  Phase 5b-2: 접힘 검증 (WT pLDDT 95.1) ✅')
print('  Phase 6-A:  기능 검증 (Metal binding)')
print('  전환: 구조 → 기능')

# 종합
print(f'\n{"="*60}')
total = sum(verdict.values())
print(f'  종합: ✅ {verdict["pass"]}/{total} PASS  |  ⚠️ {verdict["warn"]}/{total} WARN  |  ❌ {verdict["fail"]}/{total} FAIL')
if verdict['fail'] == 0:
    print('  → Phase 6-A 검증 PASS — Phase 6-B 진행 가능')
elif verdict['fail'] == 1 and verdict['warn'] <= 1:
    print('  → 부분 통과 — 서열 최적화 후 재검증 권장')
else:
    print('  → 재설계 필요')
print(f'{"="*60}')

In [None]:
# Step 10: 결과 저장
from datetime import datetime

output = {
    'experiment': 'UFoE Phase 6-A: Metal Binding Scaffold (v2)',
    'date': datetime.now().strftime('%Y-%m-%d %H:%M'),
    'method': 'ESMFold API',
    'sequences': SEQUENCES,
    'zn_geometry': zn_result if zn_result else {},
    'bundle_analysis': {k: v for k, v in bundle_results.items()} if bundle_results else {},
    'pdb_files': pdb_files,
}

results_path = 'phase6a_results_v2.json'
with open(results_path, 'w') as f:
    json.dump(output, f, indent=2, default=str)
print(f'✅ Results saved to {results_path}')

# 파일 목록
print(f'\n생성된 파일:')
for f in glob.glob('phase6a_structures/*'):
    size = os.path.getsize(f)
    print(f'  {f} ({size//1024}KB)')
print(f'  {results_path}')

# Google Drive 저장 (선택)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    import shutil
    save_dir = '/content/drive/MyDrive/UFoE_Phase6A/'
    os.makedirs(save_dir, exist_ok=True)
    shutil.copy(results_path, save_dir)
    for name, pdb in pdb_files.items():
        shutil.copy(pdb, save_dir)
    print(f'\n✅ Google Drive에 저장: {save_dir}')
except:
    print('\nGoogle Drive 미연결 (선택사항)')

---
## (선택) ColabFold로 추가 검증

ColabFold는 GPU 런타임이 필요합니다.  
런타임 → 런타임 유형 변경 → GPU 선택 후 아래 셀을 실행하세요.

In [None]:
# (선택) Step 11: ColabFold 추가 검증
# ⚠️ GPU 런타임 필요! 런타임 → 런타임 유형 변경 → GPU

ENABLE_COLABFOLD = False  # True로 변경하면 ColabFold 실행

if ENABLE_COLABFOLD:
    print("ColabFold 설치 중... (5-10분 소요)")
    
    # 최신 안정 설치 방법 (2025-2026)
    import subprocess
    
    # 1. localcolabfold 설치
    !pip install -q "colabfold[alphafold2] @ git+https://github.com/sokrypton/ColabFold" 2>&1 | tail -5
    !pip install -q "jax[cuda12]" 2>&1 | tail -3
    
    try:
        import colabfold
        print(f"✅ ColabFold {colabfold.__version__} 설치 완료")
        
        os.makedirs('phase6a_colabfold', exist_ok=True)
        
        # FASTA 생성
        with open('phase6a_colabfold/Phase6A_Metal.fasta', 'w') as f:
            f.write(f'>Phase6A_Metal\n{PHASE6A_SEQ}\n')
        
        # ColabFold 실행 (monomer)
        !colabfold_batch phase6a_colabfold/Phase6A_Metal.fasta phase6a_colabfold/output_Phase6A \
            --num-models 5 --num-recycles 3 --model-type alphafold2_ptm
        
        # 결과 확인
        cf_pdbs = glob.glob('phase6a_colabfold/output_Phase6A/*rank_001*.pdb')
        if cf_pdbs:
            print(f"\n✅ ColabFold 결과: {cf_pdbs[0]}")
            print("위 분석 셀을 ColabFold PDB로 다시 실행할 수 있습니다.")
        else:
            print("❌ ColabFold 실행 실패")
            
    except ImportError:
        print("❌ ColabFold 설치 실패 — ESMFold 결과를 사용하세요")
else:
    print("ColabFold 비활성 — ESMFold 결과를 사용합니다.")
    print("ColabFold를 사용하려면 ENABLE_COLABFOLD = True로 변경하세요.")