In [3]:
# --- 1. 환경 설정 셀 (수정된 버전) ---
import os
import sys

# 작업 디렉토리를 올바른 위치로 변경
workspace_path = '/workspace'
os.chdir(workspace_path)

# RFdiffusion 경로를 파이썬이 인식하도록 추가
rfdiffusion_path = os.path.join(workspace_path, 'RFdiffusion')
if rfdiffusion_path not in sys.path:
    sys.path.append(rfdiffusion_path)
import time
import signal
import sys
import random
import string
import re
import json
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import ipywidgets as widgets
import py3Dmol

# Colab이 아닌 로컬 환경이므로, RFdiffusion 경로를 직접 추가
if 'RFdiffusion' not in sys.path:
    home_dir = os.path.expanduser("~")
    rfdiffusion_path = os.path.join(home_dir, 'RFdiffusion')
    sys.path.append(rfdiffusion_path)

os.environ["DGLBACKEND"] = "pytorch"

from inference.utils import parse_pdb
from colabdesign.rf.utils import get_ca
from colabdesign.rf.utils import fix_contigs, fix_partial_contigs, fix_pdb, sym_it
from colabdesign.shared.protein import pdb_to_string
from colabdesign.shared.plot import plot_pseudo_3D

# --- Colab의 files.upload()를 대체하는 로컬 파일 처리 함수 ---
def get_pdb(pdb_code=None, use_upload=False):
    if use_upload:
        upload_widget = widgets.FileUpload(
            accept='.pdb', description='PDB 파일 업로드', button_style='info'
        )
        display(upload_widget)
        
        def wait_for_upload(widget):
            while len(widget.value) == 0: time.sleep(0.1)
            uploaded_filename = list(widget.value.keys())[0]
            pdb_content = widget.value[uploaded_filename]['content']
            pdb_filename = "tmp.pdb"
            with open(pdb_filename, "wb") as out: out.write(pdb_content)
            print(f"'{uploaded_filename}'이(가) 'tmp.pdb'로 저장되었습니다.")
            return pdb_filename
            
        return wait_for_upload(upload_widget)
    elif pdb_code is None or pdb_code == "":
        print("PDB 코드를 입력하거나 use_upload=True로 설정하세요.")
        return None
    elif os.path.isfile(pdb_code): return pdb_code
    elif len(pdb_code) == 4:
        pdb_filename = f"{pdb_code}.pdb1"
        if not os.path.isfile(pdb_filename):
            os.system(f"wget -qnc https://files.rcsb.org/download/{pdb_code}.pdb1.gz && gunzip -f {pdb_code}.pdb1.gz")
        return pdb_filename
    else:
        pdb_filename = f"AF-{pdb_code}-F1-model_v4.pdb"
        if not os.path.isfile(pdb_filename):
            os.system(f"wget -qnc https://alphafold.ebi.ac.uk/files/{pdb_filename}")
        return pdb_filename

def run_ananas(pdb_str, path, sym=None):
    pdb_filename = f"outputs/{path}/ananas_input.pdb"
    out_filename = f"outputs/{path}/ananas.json"
    os.makedirs(f"outputs/{path}", exist_ok=True)
    with open(pdb_filename,"w") as handle: handle.write(pdb_str)
    cmd = f"./ananas {pdb_filename} -u -j {out_filename}"
    if sym is None: os.system(cmd)
    else: os.system(f"{cmd} {sym}")
    try:
        with open(out_filename,"r") as f: out = json.load(f)
        results, AU = out[0], out[-1]["AU"]
        group, chains, rmsd = AU["group"], AU["chain names"], results["Average_RMSD"]
        print(f"AnAnaS detected {group} symmetry at RMSD:{rmsd:.3}")
        C = np.array(results['transforms'][0]['CENTER'])
        A = [np.array(t["AXIS"]) for t in results['transforms']]
        new_lines = []
        for line in pdb_str.split("\n"):
            if line.startswith("ATOM"):
                chain = line[21:22]
                if chain in chains:
                    x = np.array([float(line[i:(i+8)]) for i in [30,38,46]])
                    if group[0] == "c": x = sym_it(x,C,A[0])
                    if group[0] == "d": x = sym_it(x,C,A[1],A[0])
                    coord_str = "".join([f"{a:8.3f}" for a in x])
                    new_lines.append(line[:30]+coord_str+line[54:])
            else: new_lines.append(line)
        return results, "\n".join(new_lines)
    except Exception as e:
        print(f"AnAnaS 결과 처리 중 오류 발생: {e}")
        return None, pdb_str

def run(command, steps, num_designs=1, visual="none"):
    def run_command_and_get_pid(command):
        pid_file = '/dev/shm/pid'
        os.system(f'nohup {command} > /dev/null & echo $! > {pid_file}')
        with open(pid_file, 'r') as f: pid = int(f.read().strip())
        os.remove(pid_file)
        return pid
    def is_process_running(pid):
        try: os.kill(pid, 0)
        except OSError: return False
        else: return True
    run_output = widgets.Output()
    progress = widgets.FloatProgress(min=0, max=1, description='running', bar_style='info')
    display(widgets.VBox([progress, run_output]))
    for n in range(steps):
        if os.path.isfile(f"/dev/shm/{n}.pdb"): os.remove(f"/dev/shm/{n}.pdb")
    pid = run_command_and_get_pid(command)
    try:
        fail = False
        for _ in range(num_designs):
            for n in range(steps):
                wait = True
                while wait and not fail:
                    time.sleep(0.1)
                    if os.path.isfile(f"/dev/shm/{n}.pdb"):
                        with open(f"/dev/shm/{n}.pdb", "r") as f: pdb_str = f.read()
                        if "TER" in pdb_str or "ENDMDL" in pdb_str: wait = False
                        elif not is_process_running(pid): fail = True
                    elif not is_process_running(pid): fail = True
                if fail:
                    progress.bar_style = 'danger'; progress.description = "failed"
                    break
                else:
                    progress.value = (n+1) / steps
                    if visual != "none":
                        with run_output:
                            run_output.clear_output(wait=True)
                            if visual == "image":
                                xyz, bfact = get_ca(f"/dev/shm/{n}.pdb", get_bfact=True)
                                fig = plt.figure(); fig.set_dpi(100);fig.set_figwidth(6);fig.set_figheight(6)
                                ax1 = fig.add_subplot(111);ax1.set_xticks([]);ax1.set_yticks([])
                                plot_pseudo_3D(xyz, c=bfact, cmin=0.5, cmax=0.9, ax=ax1); plt.show()
                            if visual == "interactive":
                                view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')
                                view.addModel(pdb_str,'pdb'); view.setStyle({'cartoon': {'colorscheme': {'prop':'b','gradient': 'roygb','min':0.5,'max':0.9}}})
                                view.zoomTo(); view.show()
                if os.path.exists(f"/dev/shm/{n}.pdb"): os.remove(f"/dev/shm/{n}.pdb")
            if fail:
                progress.bar_style = 'danger'; progress.description = "failed"
                break
        while is_process_running(pid): time.sleep(0.1)
    except KeyboardInterrupt:
        os.kill(pid, signal.SIGTERM); progress.bar_style = 'danger'; progress.description = "stopped"
    
def run_diffusion(contigs, path, pdb=None, iterations=50, symmetry="none", order=1, hotspot=None,
                  chains=None, add_potential=False, num_designs=1, visual="none"):
    full_path = f"outputs/{path}"; os.makedirs(full_path, exist_ok=True)
    opts = [f"inference.output_prefix={full_path}", f"inference.num_designs={num_designs}"]
        # --- 👇 여기 이 한 줄을 추가하세요! ---
    opts.append(f"+inference.schedule_directory_path={full_path}/schedules")
    # ---
    if chains == "": chains = None
    if symmetry in ["auto","cyclic","dihedral"]:
        if symmetry == "auto": sym, copies = None, 1
        else: sym, copies = {"cyclic":(f"c{order}",order), "dihedral":(f"d{order}",order*2)}[symmetry]
    else: symmetry = None; sym, copies = None, 1
    contigs_list = contigs.replace(","," ").replace(":"," ").split()
    is_fixed, is_free = False, False; fixed_chains = []
    for contig in contigs_list:
        for x in contig.split("/"):
            a = x.split("-")[0]
            if a and a[0].isalpha():
                is_fixed = True
                if a[0] not in fixed_chains: fixed_chains.append(a[0])
            if a.isnumeric(): is_free = True
    if len(contigs_list) == 0 or not is_free: mode = "partial"
    elif is_fixed: mode = "fixed"
    else: mode = "free"
    if mode in ["partial","fixed"]:
        pdb_str = pdb_to_string(pdb, chains=chains)
        if symmetry == "auto":
            a, pdb_str = run_ananas(pdb_str, path)
            if a is None: print(f'ERROR: no symmetry detected'); symmetry = None; sym, copies = None, 1
            else:
                if a["group"][0] == "c": symmetry = "cyclic"; sym, copies = a["group"], int(a["group"][1:])
                elif a["group"][0] == "d": symmetry = "dihedral"; sym, copies = a["group"], 2 * int(a["group"][1:])
                else: print(f'ERROR: detected symm ({a["group"]}) not supported'); symmetry = None; sym, copies = None, 1
        elif mode == "fixed": pdb_str = pdb_to_string(pdb_str, chains=fixed_chains)
        pdb_filename = f"{full_path}/input.pdb"
        with open(pdb_filename, "w") as handle: handle.write(pdb_str)
        parsed_pdb = parse_pdb(pdb_filename)
        opts.append(f"inference.input_pdb={pdb_filename}")
        if mode in ["partial"]:
            iterations = int(80 * (iterations / 200)); opts.append(f"diffuser.partial_T={iterations}")
            contigs_list = fix_partial_contigs(contigs_list, parsed_pdb)
        else: opts.append(f"diffuser.T={iterations}"); contigs_list = fix_contigs(contigs_list, parsed_pdb)
    else:
        opts.append(f"diffuser.T={iterations}"); parsed_pdb = None
        contigs_list = fix_contigs(contigs_list, parsed_pdb)
    if hotspot is not None and hotspot != "": opts.append(f"ppi.hotspot_res=[{hotspot}]")
    if sym is not None:
        sym_opts = ["--config-name symmetry", f"inference.symmetry={sym}"]
        if add_potential: sym_opts += ["'potentials.guiding_potentials=[\"type:olig_contacts,weight_intra:1,weight_inter:0.1\"]'", "potentials.olig_intra_all=True","potentials.olig_inter_all=True", "potentials.guide_scale=2","potentials.guide_decay=quadratic"]
        opts = sym_opts + opts; contigs_list = sum([contigs_list] * copies,[])
    opts.append(f"'contigmap.contigs=[{' '.join(contigs_list)}]'")
    opts += ["inference.dump_pdb=True","inference.dump_pdb_path='/dev/shm'"]
    print("mode:", mode); print("output:", full_path); print("contigs:", contigs_list)
    run_script_path = "/workspace/RFdiffusion/run_inference.py"
    opts_str = " ".join(opts)
    cmd = f"python {run_script_path} {opts_str}"
    print(cmd)
    run(cmd, iterations, num_designs, visual=visual)
    for n in range(num_designs):
        pdbs = [f"outputs/traj/{path}_{n}_pX0_traj.pdb", f"outputs/traj/{path}_{n}_Xt-1_traj.pdb", f"{full_path}_{n}.pdb"]
        for pdb_file in pdbs:
            if os.path.exists(pdb_file):
                with open(pdb_file,"r") as handle: pdb_str = handle.read()
                with open(pdb_file,"w") as handle: handle.write(fix_pdb(pdb_str, contigs_list))
    return contigs_list, copies

print("✅ Setup cell is ready.")

✅ Setup cell is ready.


In [4]:
# --- 바인더 디자인 실행 셀 (수정된 버전) ---

# 1. 파라미터 설정
name = "0902-50-test"
contigs = "C311-391/0 100-100"
hotspot = "C331,C360,C378,C381,C323"
iterations = 50
num_designs = 5
visual = "image"

# --- 2. 업로드한 PDB 파일 경로 직접 지정 ---
# 여기에 1단계에서 업로드한 PDB 파일의 정확한 이름을 입력하세요.
pdb_filename = "4k9e_c.pdb" 
pdb = os.path.join("/workspace", pdb_filename)

# 3. RFdiffusion 실행
# --- 이 아랫부분이 수정되었습니다 ---

# 기본 실행 이름(폴더명으로 사용될)을 설정합니다.
base_name = name
output_dir = f"outputs/{base_name}"

# 만약 동일한 이름의 폴더가 이미 존재하면, 중복을 피하기 위해 랜덤 문자열을 뒤에 붙입니다.
while os.path.exists(output_dir):
    random_suffix = ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
    base_name = f"{name}_{random_suffix}"
    output_dir = f"outputs/{base_name}"

# run_diffusion 함수는 이 path를 파일 이름의 접두사로 사용합니다.
# 예: path가 "8051/8051" 이면, 결과는 outputs/8051/8051_0.pdb 로 저장됩니다.
path = os.path.join(base_name, base_name)

print(f"✅ 결과가 저장될 폴더: {output_dir}")
# --- 수정된 부분 끝 ---


flags = {
    "contigs": contigs, "pdb": pdb, "iterations": int(iterations),
    "hotspot": hotspot, "path": path, "num_designs": int(num_designs),
    "visual": visual, "symmetry": "none", "order": 1, "chains": "", "add_potential": True
}

for k, v in flags.items():
    if isinstance(v, str):
        flags[k] = v.replace("'", "").replace('"', '')

contigs, copies = run_diffusion(**flags)

# --- 마지막 출력 메시지도 수정되었습니다 ---
print(f"✅ RFdiffusion 실행 완료! 결과는 '{output_dir}' 폴더에 저장되었습니다.")

✅ 결과가 저장될 폴더: outputs/0902-50-test
mode: fixed
output: outputs/0902-50-test/0902-50-test
contigs: ['C311-391', '100-100']
python /workspace/RFdiffusion/run_inference.py inference.output_prefix=outputs/0902-50-test/0902-50-test inference.num_designs=5 +inference.schedule_directory_path=outputs/0902-50-test/0902-50-test/schedules inference.input_pdb=outputs/0902-50-test/0902-50-test/input.pdb diffuser.T=50 ppi.hotspot_res=[C331,C360,C378,C381,C323] 'contigmap.contigs=[C311-391 100-100]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Please either pass the dim explicitly or simply use torch.linalg.cross.
The default value of dim will change to agree with that of linalg.cross in a future release. (Triggered internally at /opt/pytorch/pytorch/aten/src/ATen/native/Cross.cpp:62.)
  CBrotaxis1 = (CBr-CAr).cross(NCr-CAr)


✅ RFdiffusion 실행 완료! 결과는 'outputs/0902-50-test' 폴더에 저장되었습니다.


In [None]:
%%time
import os
import sys
import pandas as pd
import glob

# @markdown ### **👇 여기에 이전 실행 정보를 입력하세요.**
# @markdown ---

# <<< (1) RFdiffusion 실행 시 사용했던 `name` 변수 값을 입력하세요.
# 결과가 저장된 폴더의 이름입니다. (예: "0811-200-200")
run_name = "0829-150-600sc"  # @param {type:"string"}

# <<< (2) 위 백본들을 생성할 때 사용했던 '정확한' Contig를 입력하세요.
# 이 정보는 코드가 스스로 알 수 없으므로, 반드시 올바르게 입력해야 합니다.
contigs_for_run = "C311-391/0 100-100"  # @param {type:"string"}

# @markdown ---
# @markdown ### **MPNN 및 AlphaFold 설정**
num_seqs = 8      #@param {type:"raw"}
num_recycles = 3  #@param {type:"raw"}
initial_guess = True
use_multimer = True
rm_aa = "C"
mpnn_sampling_temp = 0.1

# --- 아래는 수정할 필요 없이 그대로 실행하면 됩니다. ---

# (A) 이전 실행 정보를 바탕으로 변수들을 다시 만듭니다.
path = os.path.join(run_name, run_name)
contigs_str = ":".join(contigs_for_run.replace("'", "").replace('"', '').split())
copies = 1

# (B) PDB 파일 개수를 자동으로 셉니다.
pdb_path_pattern = f"outputs/{path}_*.pdb"
pdb_files = glob.glob(pdb_path_pattern)
num_designs = len(pdb_files)

if num_designs == 0:
    print(f"🚨 에러: '{os.path.dirname(pdb_path_pattern)}' 폴더에서 PDB 파일을 찾을 수 없습니다. 'run_name'을 확인해주세요.")
else:
    print(f"✅ 총 {num_designs}개의 PDB 파일을 찾았습니다. 검증 작업을 시작합니다.")
    
    # (C) designability_test 스크립트 실행
    opts = [f"--pdb=outputs/{path}_0.pdb",
            f"--loc=outputs/{path}",
            f"--contig={contigs_str}",
            f"--copies={copies}",
            f"--num_seqs={num_seqs}",
            f"--num_recycles={num_recycles}",
            f"--rm_aa={rm_aa}",
            f"--mpnn_sampling_temp={mpnn_sampling_temp}",
            f"--num_designs={num_designs}"]
    if initial_guess: opts.append("--initial_guess")
    if use_multimer: opts.append("--use_multimer")
    opts_str = ' '.join(opts)
    
    get_ipython().system(f"python -m colabdesign.rf.designability_test {opts_str}")

    # (D) 결과 요약 출력
    best_pdb_path = f"outputs/{path}/best.pdb"
    all_results_path = f"outputs/{path}/all_results.csv"

    print("\n----------------------------------------------------")
    try:
        if os.path.exists(all_results_path):
            print("📊 전체 디자인 결과 요약:")
            df = pd.read_csv(all_results_path)
            print(df[['design', 'n', 'plddt', 'rmsd']].to_string(index=False))
            print("----------------------------------------------------")

        with open(best_pdb_path, "r") as f:
            info = f.readline().strip().split()
            design_num = info[3]
            seq_num = info[5]
            rmsd_val = info[7]

        print(f"🏆 Best 모델이 선정되었습니다! 🏆")
        print(f"   - 모델 번호: design {design_num} / sequence {seq_num}")
        print(f"   - RMSD 값: {rmsd_val}")
        print("----------------------------------------------------")

    except FileNotFoundError:
        print("⚠️ 'best.pdb' 또는 'all_results.csv' 파일을 찾을 수 없습니다. 스크립트 실행에 문제가 있었을 수 있습니다.")

✅ 총 600개의 PDB 파일을 찾았습니다. 검증 작업을 시작합니다.
{'pdb':'outputs/0829-150-600sc/0829-150-600sc_0.pdb','loc':'outputs/0829-150-600sc/0829-150-600sc','contigs':'C311-391/0:100-100','copies':1,'num_seqs':8,'initial_guess':True,'use_multimer':True,'use_soluble':False,'num_recycles':3,'rm_aa':'C','num_designs':600,'mpnn_sampling_temp':0.1}
protocol=binder
2025-09-02 07:13:45.677852: W external/xla/xla/service/gpu/nvptx_compiler.cc:765] The NVIDIA driver's CUDA version is 12.8 which is older than the ptxas CUDA version (12.9.86). Because the driver is older than the ptxas version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.
running proteinMPNN...
running AlphaFold...
design:0 n:0 mpnn:1.092 plddt:0.321 i_ptm:0.050 i_pae:28.068 rmsd:27.739 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/AWRAVAPGGPTAAAVAAGAGAARVVGFAAAPPPTEVG

design:3 n:5 mpnn:1.139 plddt:0.442 i_ptm:0.053 i_pae:28.455 rmsd:45.338 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SSVVWTPVTPRDAAVAAGAAAAIVLAFTASPPPTSIAWIRDGAPYPGPFAFAPAAPSAANTAYVAVLALAGVTPADRGAYT
design:3 n:6 mpnn:1.116 plddt:0.392 i_ptm:0.048 i_pae:28.767 rmsd:48.805 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SSVVVTPVTPTDAAVAPGTDAARVVAFDASPPLTEVRWIFKGAPYPGPFAFAPASPSAANTAYVAVLPLTGVTPETRGEYR
design:3 n:7 mpnn:1.079 plddt:0.401 i_ptm:0.049 i_pae:28.628 rmsd:44.034 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYVRVTPVTPVDAAVAPGTDAALVVAFEAHPPLTDLRWLFRGAPYPGPFAFAPAAPGPGNTAYVAVLPLTAVTPASRGEYR
design:4 n:0 mpnn:1.104 plddt:0.311 i_ptm:0.050 i_pae:28.640 rmsd:44.243 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLVVEPATPTTLTVAAGSGAARVVVFTAHPPLTEIGWIYRGRPYPGGFGYGPAAPGA

design:7 n:6 mpnn:1.079 plddt:0.385 i_ptm:0.053 i_pae:28.318 rmsd:46.560 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/AGVAVGPVTGAGAGVAAGTDAALVVGLAAAPPLTEVGWTFRGAPYPGPFAFAPAAPGAGATGYVAVLPITAVTPATRGAYA
design:7 n:7 mpnn:1.087 plddt:0.317 i_ptm:0.050 i_pae:28.383 rmsd:57.190 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/GFLVVGPVGGVGAGVGAGTDAALTVGFAAAPPPTALGWVFRGAPYPGPFALAPAAPGAGNTGYVAVLPLTGVTPATRGAYL
design:8 n:0 mpnn:1.082 plddt:0.416 i_ptm:0.053 i_pae:28.386 rmsd:37.041 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/AFVEVTPLTPTDLTVPAGAGAAIVVAFRAHPPLTEVRWIFKGAPYPGPFAFAPLSPGADNTWYRAVLGLGGVTPATRGAYE
design:8 n:1 mpnn:1.105 plddt:0.437 i_ptm:0.054 i_pae:28.101 rmsd:27.785 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SFLEVRPLTPTDLAVAPGAGAVLAVEFRASPPPTELGWRFRGAPYPGPYAYGPLSPGA

design:11 n:6 mpnn:1.088 plddt:0.355 i_ptm:0.048 i_pae:28.667 rmsd:25.846 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYVRWTPLTPTDAAVAAGAGAALAVRFEASPPLTEWGWYFRGAPYPGPFAFAPVSPGPDNTEYVASLALAGVTPATRGAYE
design:11 n:7 mpnn:1.147 plddt:0.377 i_ptm:0.053 i_pae:28.417 rmsd:19.378 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLRVTPLTPTTATVPAGAGAQLTLRFEAHPPLTEWRWVFKGQPYPGPFTFRPVSPSASNTEYVSTLTITGVTPATRGAYT
design:12 n:0 mpnn:1.172 plddt:0.345 i_ptm:0.042 i_pae:29.850 rmsd:60.520 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLLVFPVTSTTLTVPAGAGALIVRLFQAYPPPTEIGWVYRGVPYPGPFLYFPVSAGADNTWYVSVLPLSGITPASRGAYE
design:12 n:1 mpnn:1.097 plddt:0.329 i_ptm:0.044 i_pae:29.804 rmsd:52.621 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLIVYPLTPTDAAVAPGTGALRVVLFEAYPPPTELGWTFRGAPYPGPFLFAPV

design:15 n:6 mpnn:1.109 plddt:0.454 i_ptm:0.058 i_pae:27.811 rmsd:33.570 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYVRWTPVTPTDLTVAPGTDAAIVVEFEADPPLTEVQWIYKGKPYPGPFAFGPVAGGAGNTRYVSVLTLRGVTPATRGEYK
design:15 n:7 mpnn:1.115 plddt:0.432 i_ptm:0.049 i_pae:29.186 rmsd:36.384 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/AYLRLTPVTPTDAAVAAGTDAVLALLFEAYPPPTEIGWVRDGKPYPGPFAFGPVAGGAGNTRYVAVLPIRGVTAATRGAYT
design:16 n:0 mpnn:1.183 plddt:0.416 i_ptm:0.077 i_pae:27.050 rmsd:23.248 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYTKWFPLTPTNAAVGGGGDYTKELLYESSPPLTEIQWIFKGKPYPGPFEYRKLSPSSSNTKYLSILKITGVGGANRGEIK
design:16 n:1 mpnn:1.096 plddt:0.437 i_ptm:0.085 i_pae:26.536 rmsd:63.603 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLEIEYVTPKDAAVGGGGDYELRVRFKAYPPLTEIKWIFKGKPYPGPFEYRPV

design:19 n:6 mpnn:1.128 plddt:0.425 i_ptm:0.050 i_pae:29.215 rmsd:65.129 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLRVHPVTPEDADVAPGTDALIVREFEAHPPPTRFEWIFRGRPYPGPFAFAPAAPGPGNTRYVAVLPVTGVTPATRGEYR
design:19 n:7 mpnn:1.134 plddt:0.341 i_ptm:0.043 i_pae:28.939 rmsd:51.461 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLRVHPLTPTDATVPPGANALRVLRYEADPPPTEHRWVFKGAPYPGPYAHAPLAPGADNTAYLSVLPITAVTPATRGAYE
design:20 n:0 mpnn:1.175 plddt:0.465 i_ptm:0.046 i_pae:29.461 rmsd:29.752 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYVEVFPLVPTNLDVAPGTDAFLVVLFRAHPPLTSWGWTFKGQPYPGPFLYRPLSPSADNTWYLSVLPLTGVTPETRGEYT
design:20 n:1 mpnn:1.169 plddt:0.335 i_ptm:0.052 i_pae:28.498 rmsd:34.788 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLVVFPLTPTDLAVAPGADAFIVVAYVAYPPPTEIGWTFKGAPYPGPFAYAPA

design:23 n:6 mpnn:1.098 plddt:0.388 i_ptm:0.057 i_pae:28.445 rmsd:32.139 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/ASLTVAPLTPTDAAVAPGTDAAIVVAFTAAPAPTEFGWTFKGAPYPGPFAFAPAAGGAAETAYVAVLLLTGVTPATRGEYK
design:23 n:7 mpnn:1.102 plddt:0.345 i_ptm:0.051 i_pae:28.375 rmsd:35.812 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SSLVVTPVTPTTATVAPGTGAAIVVAFDAAPPPTSIGWVFKGAPYPGPFAFAPAAGGAGNTAYVAVLTLTGVTPADRGVYE
design:24 n:0 mpnn:1.091 plddt:0.399 i_ptm:0.042 i_pae:29.919 rmsd:64.612 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SWLRVTPVTPAAAAVAPGTDALRVLVFSASPPPTEFGWVRDGAPYPGPFAFGPAAGGAGETAYVAVLPLRAVTPATRGAYE
design:24 n:1 mpnn:1.108 plddt:0.374 i_ptm:0.049 i_pae:28.895 rmsd:53.208 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLRVTPVTPTAAAVAEGTDYLIVVAFTASPPPTRWEWRFRGAPYPGPFAFGPA

design:27 n:6 mpnn:1.159 plddt:0.558 i_ptm:0.046 i_pae:29.844 rmsd:50.185 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLNIEYLTPKEANVPLGSDYDLIVRFTADPPLTEFGWIFKGEPYPGPFEFRPVDPSEDNTEYIAILHITGVTAANRGEYK
design:27 n:7 mpnn:1.172 plddt:0.519 i_ptm:0.050 i_pae:29.128 rmsd:53.958 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SFLEVEPVTPTDATVPLGTDYTLEVRFTASPPLTEIQWIFQGKPYPGPFEFRPVDDSEDNTEYVAVLTIRGVTAATRGEYT
design:28 n:0 mpnn:1.124 plddt:0.465 i_ptm:0.051 i_pae:28.812 rmsd:39.575 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLRVTPVTPTDAAVPAGAGAALVVRFEAHPPPTELQWVRGGAPYPGPFLLRPVSPGSDNTEWVAVLPIAGVGPASRGAYE
design:28 n:1 mpnn:1.139 plddt:0.463 i_ptm:0.051 i_pae:28.059 rmsd:28.104 GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG/SYLVVTPVTPTTLTVAAGTGANIVLRFEAHPPPTEFQWIRNGAPYPGPFTYRPV

In [1]:
# --- [수정된 버전] Top 디자인 선별 및 PDB 파일 복사 ---

import pandas as pd
import os
import shutil

# ==============================================================================
# ✅ 1. 설정: 여기만 수정하시면 됩니다.
# ==============================================================================

# <<< (1) 분석을 완료한 RFdiffusion 실행의 'run_name'을 입력하세요.
# 예: "0828-150-600sc"
run_name = "0828-150-600sc_validation"

# <<< (2) 선별 기준과 개수를 지정하세요.
i_pae_threshold = 15      # i_pae 점수가 이 값보다 '낮은' 디자인만 선별
top_n_to_select = 100     # 최종적으로 선별할 상위 디자인 개수

# ==============================================================================
# 🚀 2. 스크립트 실행: 이 아래는 수정하지 마세요.
# ==============================================================================

# --- 경로 설정 ---
# 검증 결과가 저장된 폴더 (예: outputs/run_name_validation)
validation_folder = f"outputs/{run_name}_validation"
# 종합 결과 CSV 파일 경로
results_csv_path = os.path.join(validation_folder, "all_results_summary.csv")
# 최종적으로 순위별 PDB를 저장할 폴더
ranked_pdb_folder = os.path.join(validation_folder, f"ranked_top_{top_n_to_select}")

print(f"'{results_csv_path}' 파일에서 결과 분석을 시작합니다.")

# --- CSV 파일 읽기 및 처리 ---
if not os.path.exists(results_csv_path):
    print(f"🚨 에러: 결과 파일({results_csv_path})을 찾을 수 없습니다. 이전 분석 스크립트가 성공적으로 실행되었는지, run_name이 올바른지 확인하세요.")
else:
    df_all = pd.read_csv(results_csv_path)
    
    # --- 필터링 및 정렬 ---
    if 'i_pae' in df_all.columns:
        df_filtered = df_all[df_all['i_pae'] < i_pae_threshold].copy()
        print(f"i_pae < {i_pae_threshold} 조건을 만족하는 디자인 {len(df_filtered)}개를 찾았습니다.")
    else:
        df_filtered = df_all.copy()

    df_sorted = df_filtered.sort_values(by='rmsd', ascending=True)
    top_designs = df_sorted.head(top_n_to_select)
    print(f"RMSD가 낮은 순서대로 상위 {len(top_designs)}개를 선별했습니다.")
    
    # --- PDB 파일 복사 ---
    os.makedirs(ranked_pdb_folder, exist_ok=True)
    print(f"선별된 PDB 파일은 '{ranked_pdb_folder}' 폴더에 저장됩니다.")

    copied_count = 0
    for rank, (index, row) in enumerate(top_designs.iterrows(), 1):
        # ✅ *** 여기가 수정된 핵심 부분입니다 ***
        # 올바른 원본 PDB 파일 경로를 생성합니다.
        # 예: outputs/run_name_validation/design_553/n0.pdb
        design_num = int(row['design_num'])
        seq_num = int(row['n'])
        source_pdb_file = os.path.join(validation_folder, f"design_{design_num}", f"n{seq_num}.pdb")
        
        # 순위를 포함한 새로운 파일명 지정
        rmsd_val = row['rmsd']
        dest_pdb_file = os.path.join(ranked_pdb_folder, f"rank_{rank:03d}_design{design_num}_n{seq_num}_rmsd_{rmsd_val:.2f}.pdb")

        if os.path.exists(source_pdb_file):
            shutil.copy(source_pdb_file, dest_pdb_file)
            copied_count += 1
        else:
            # 이제 올바른 경로를 찾으므로, 이 경고는 거의 나타나지 않을 것입니다.
            print(f"  - 경고: 원본 파일 '{source_pdb_file}'을 찾을 수 없습니다.")

    print(f"\n✅ 작업 완료: 총 {copied_count}개의 PDB 파일을 순위별로 저장했습니다.")
    
    # --- 최종 목록 출력 ---
    print("\n📊 선별된 Top 디자인 목록:")
    print(top_designs[['design_num', 'backbone_name', 'n', 'plddt', 'i_pae', 'rmsd']].to_string(index=False))

'outputs/0828-150-600sc_validation_validation/all_results_summary.csv' 파일에서 결과 분석을 시작합니다.
🚨 에러: 결과 파일(outputs/0828-150-600sc_validation_validation/all_results_summary.csv)을 찾을 수 없습니다. 이전 분석 스크립트가 성공적으로 실행되었는지, run_name이 올바른지 확인하세요.
