# DiffDock Inference App (Local UI + Runpod trigger)
- Local에서 파라미터를 설정하고, **Runpod Serverless Endpoint**로 작업을 보냅니다.
- 컨테이너 측에서는 `handler.py`가 이벤트(JSON)를 받아 DiffDock 추론을 실행합니다.


### 1. input file setting

### Quick setup guide:
- place your protein PDB files in the `inputs/` folder.
- place your ligand SDF files in the `inputs/` folder (if needed).
- fill up the `input_protein_ligand_info.csv` with your data.

  - `complex_name`: unique name for each protein-ligand complex. (job name; this will be used for output dir&file names)
  - `protein_path`: path to the protein PDB file.
  - `ligand_description`: either path to the ligand SDF file or SMILES string.
  - `protein_sequence`: (optional) protein sequence in single-letter code.
- instead of SDF, you can also use SMILES strings in the `ligand_description` column.

- hit the "Run All" button (or run each cell sequentially) to trigger the Runpod Endpoint job.
- outputs will be saved downloaded to the `downloads/` folder.


In [15]:

protein_ligand_csv_input_file = 'input_protein_ligand_info.csv'

### 2. Runpod Endpoint setting

In [16]:
!pip install requests urllib3 idna certifi charset-normalizer --upgrade



In [17]:
# ---- Runpod 인증/엔드포인트 설정 ----
RUNPOD_API_KEY = "RUNPOD_API_KEY_HERE"  
RUNPOD_ENDPOINT_ID = "ahbtveirspw7r4"  

assert RUNPOD_API_KEY, 'RUNPOD_API_KEY 를 입력하세요.'
assert RUNPOD_ENDPOINT_ID, 'RUNPOD_ENDPOINT_ID 를 입력하세요.'


### 3. input file encoding

In [18]:
import json, os, time, requests, base64
from pathlib import Path

# ---- 로컬 입력 파일 경로 설정 ----
# 노트북이 실행되는 머신에서의 경로
local_protein_ligand_csv = Path(protein_ligand_csv_input_file) ## input csv 파일 경로
local_inputs_dir = Path('inputs')

# 컨테이너 내부에서 사용할 폴더 이름
remote_data_dir = 'data'
remote_inputs_dir = 'inputs'

# DiffDock 실행 파라미터
config = 'default_inference_args.yaml'
out_dir = 'results/'   # 컨테이너 내부 경로 (DiffDock --out_dir)
extra_args = ''                  # 예: '--samples_per_complex 20'
cuda_visible_devices = '0'       # 필요한 경우 지정

# ---- 파일을 base64 로 인코딩하는 함수 ----
def encode_file(path: Path) -> dict:
    return {
        'filename': path.name,
        'data_b64': base64.b64encode(path.read_bytes()).decode('ascii'),
    }

# CSV 파일 인코딩
protein_ligand_obj = encode_file(local_protein_ligand_csv)

# inputs 폴더의 pdb 파일 인코딩
pdb_file_objs = []
if local_inputs_dir.is_dir():
    for p in sorted(local_inputs_dir.glob('*.pdb')):
        pdb_file_objs.append(encode_file(p))

# inputs 폴더의 sdf 파일 인코딩
sdf_file_objs = []
if local_inputs_dir.is_dir():
    for p in sorted(local_inputs_dir.glob('*.sdf')):
        sdf_file_objs.append(encode_file(p))


# 컨테이너 안에서 사용할 CSV 경로
remote_csv_path = f"{remote_data_dir}/{protein_ligand_obj['filename']}"

# ---- 실행 명령어 (Run with explicit args 스타일) ----
# 예: python -m inference --config default_inference_args.yaml --protein_ligand_csv data/... --out_dir results/...
cmd = (
    f"python3 -m inference "
    f"--config {config} "
    f"--protein_ligand_csv {remote_csv_path} "
    f"--out_dir {out_dir}"
)
if extra_args:
    cmd = f"{cmd} {extra_args}"

# RunPod 로 보낼 event (JSON)
event = {
    'cmd': cmd,
    'protein_ligand_csv': protein_ligand_obj,
    'pdb_files': pdb_file_objs,
    'sdf_files': sdf_file_objs,
    'data_dir': remote_data_dir,
    'inputs_dir': remote_inputs_dir,
    'out_dir': out_dir,
    'config': config,
    'extra_args': extra_args,
    'cuda_visible_devices': cuda_visible_devices,
}

print(json.dumps(event, ensure_ascii=False, indent=2))


{
  "cmd": "python3 -m inference --config default_inference_args.yaml --protein_ligand_csv data/input_protein_ligand_info.csv --out_dir results/",
  "protein_ligand_csv": {
    "filename": "input_protein_ligand_info.csv",
    "data_b64": "Y29tcGxleF9uYW1lLHByb3RlaW5fcGF0aCxsaWdhbmRfZGVzY3JpcHRpb24scHJvdGVpbl9zZXF1ZW5jZQ0KZm9sZF8xXzAxMDY3LGlucHV0cy9mb2xkXzFfMDEwNjdfbW9kZWxfMC5wZGIsaW5wdXRzL2lucHV0X0M2LnNkZiwNCmZvbGRfMl8wMjk1NSxpbnB1dHMvZm9sZF8yXzAyOTU1X21vZGVsXzAucGRiLGlucHV0cy9pbnB1dF9DNi5zZGYsDQo="
  },
  "pdb_files": [
    {
      "filename": "1a0q_protein_processed.pdb",
      "data_b64": "UkVNQVJLIFNlbGVjdGlvbiAnY2hhaW4gTCBvciBjaGFpbiBIJwpBVE9NICAgICAgMSAgTiAgIElMRSBMICAgMiAgICAgIDI3LjIzNCAgMTIuOTU1ICA1OS41NzMgIDEuMDAgIDAuMDAgICAgICAgICAgIE4gIApBVE9NICAgICAgMiAgQ0EgIElMRSBMICAgMiAgICAgIDI2LjI1OSAgMTEuOTkzICA1OS4wNjIgIDEuMDAgIDAuMDAgICAgICAgICAgIEMgIApBVE9NICAgICAgMyAgQyAgIElMRSBMICAgMiAgICAgIDI2LjA2MCAgMTIuMDA1ICA1Ny41NDQgIDEuMDAgIDAuMDAgICAgICAgICAgIEMgIApBVE9NICAgICAgNCAgTyAgIElM

### 4. request job to Runpod Endpoint

In [19]:
import json, os, time, requests
from pathlib import Path
import base64
import urllib3

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
session = requests.Session()
headers = { 'Authorization': f'Bearer {RUNPOD_API_KEY}', 'Content-Type': 'application/json' }
base = f'https://api.runpod.ai/v2/{RUNPOD_ENDPOINT_ID}'
session.verify = False

# 1) run 요청
resp = session.post(f'{base}/run', headers=headers, json={'input': event})
resp.raise_for_status()
run_id = resp.json().get('id') or resp.json().get('jobId')
print('Run submitted, id:', run_id)
assert run_id, resp.text

# 2) 상태 폴링
status = None
output = None
while True:
    time.sleep(2)
    r = session.get(f'{base}/status/{run_id}', headers=headers)
    if r.status_code == 404:
        print('Status 404, breaking')
        break
    r.raise_for_status()
    js = r.json()
    status = js.get('status') or js.get('state')
    print('Status:', status)

    if status in ('COMPLETED', 'FAILED', 'ERROR', 'FINISHED'):
        # 전체 응답 먼저 출력
        print(json.dumps(js, ensure_ascii=False, indent=2))
        # 다양한 필드명에 대응
        output = js.get('output') or js.get('result') or js.get('outputData')
        break

# 3) out_dir 를 zip 으로 받은 경우 로컬에 저장

print("Received output keys:", list(output.keys()) if output else "NO OUTPUT")

# 다운로드 폴더 생성
results_dir = Path("downloads")
results_dir.mkdir(exist_ok=True)

# out_dir_zip_b64 존재하면 ZIP 저장
if output and output.get("out_dir_zip_b64"):
    zip_name = output.get("out_dir_zip_name", "diffdock_results.zip")
    zip_bytes = base64.b64decode(output["out_dir_zip_b64"])
    
    zip_path = results_dir / zip_name
    zip_path.write_bytes(zip_bytes)
    
    print(f"📁 ZIP saved to: {zip_path.resolve()}")
    print(f"👉 다운로드 완료! Notebook File Browser에서 바로 다운로드하면 됩니다.")
else:
    print("❌ No zip file found in output.")
    print("🔍 Raw output:")
    print(json.dumps(output, indent=2, ensure_ascii=False))

status

print("STATUS:", resp.status_code)
print("TEXT:", resp.text)
resp.raise_for_status()

Run submitted, id: e8614c66-d5e6-433d-b558-6e2d0a6d3df2-e2
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: IN_PROGRESS
Status: COMPLETED
{
  "delayTime": 1004,
  "executionTime": 70934,
  "id": "e8614c66-d5e6-433d-b558-6e2d0a6d3df2-e2",
  "output": {
    "cmd": "python3 -m inference --config default_inference_args.yaml --protein_ligand_csv data/input_protein_ligand_info.csv --out_dir results/",
    "cwd": "/workspace/DiffDock",
    "out

### 오류확인용

In [20]:
# ============================================
# 📂 RunPod DiffDock out_dir 파일 목록 보기 (디버깅)
# ============================================

import json
from pathlib import Path
import os

# output 변수는 위 run/status 셀에서 생성됨
# output 안에 다음 필드가 있을 수 있음:
#   - out_dir                : "results/xxx"
#   - out_dir_zip_b64        : base64 zip (있을 때만)
#   - out_dir_zip_name       : zip 파일 이름
#   - out_dir_listing        : handler가 보내줄 수도 있는 경우(선택)

print("📌 Output Keys:", list(output.keys()) if output else "NO OUTPUT")


# -------------------------------
# 1) Handler가 out_dir 경로를 제공?
# -------------------------------
out_dir = None
if output:
    out_dir = output.get("out_dir")

print("\n📁 out_dir:", out_dir)

# -------------------------------
# 2) Handler가 직접 listing 제공한 경우?
# -------------------------------
if output and "out_dir_listing" in output:
    print("\n📝 out_dir_listing (from handler):")
    print(json.dumps(output["out_dir_listing"], indent=2, ensure_ascii=False))
else:
    print("🔍 handler가 listing 제공하지 않음.")


# -------------------------------
# 3) 로컬에 받은 ZIP 안의 내용 보기
# -------------------------------
zip_path = None
if output and output.get("out_dir_zip_name"):
    zip_path = Path("downloads") / output["out_dir_zip_name"]

if zip_path and zip_path.exists():
    import zipfile
    print("\n🗂 ZIP 내용물:")
    with zipfile.ZipFile(zip_path, "r") as z:
        for name in z.namelist():
            print("  -", name)
else:
    print("\n❌ ZIP 파일이 없거나 비었습니다.")


📌 Output Keys: ['cmd', 'cwd', 'out_dir', 'out_dir_zip_b64', 'out_dir_zip_name', 'returncode', 'stderr', 'stdout']

📁 out_dir: results/
🔍 handler가 listing 제공하지 않음.

🗂 ZIP 내용물:
  - CEW/fold_1_01067/rank1.sdf
  - CEW/fold_1_01067/rank1_confidence-0.71.sdf
  - CEW/fold_1_01067/rank2_confidence-0.81.sdf
  - CEW/fold_1_01067/rank3_confidence-1.03.sdf
  - CEW/fold_1_01067/rank4_confidence-1.05.sdf
  - CEW/fold_1_01067/rank5_confidence-1.66.sdf
  - CEW/fold_1_01067/rank6_confidence-1.75.sdf
  - CEW/fold_1_01067/rank7_confidence-1.84.sdf
  - CEW/fold_1_01067/rank8_confidence-2.30.sdf
  - CEW/fold_1_01067/rank9_confidence-2.88.sdf
  - CEW/fold_1_01067/rank10_confidence-3.84.sdf
  - CEW/fold_2_02955/rank1.sdf
  - CEW/fold_2_02955/rank1_confidence-0.50.sdf
  - CEW/fold_2_02955/rank2_confidence-0.66.sdf
  - CEW/fold_2_02955/rank3_confidence-0.70.sdf
  - CEW/fold_2_02955/rank4_confidence-1.35.sdf
  - CEW/fold_2_02955/rank5_confidence-1.87.sdf
  - CEW/fold_2_02955/rank6_confidence-3.52.sdf
  - CEW/fo

In [21]:
# ===========================
# 🔍 DiffDock 로그 확인 셀
# ===========================
import textwrap
import json

if not output:
    print("❌ output 변수가 비어 있습니다. run/status 셀을 먼저 실행했는지 확인하세요.")
else:
    print("🔹 Return code:", output.get("returncode"))
    print("\n🔹 실행된 cmd:")
    print(output.get("cmd"))

    stdout = output.get("stdout") or ""
    stderr = output.get("stderr") or ""

    print("\n================ STDOUT ================\n")
    if stdout.strip():
        print(stdout)
    else:
        print("(STDOUT 비어 있음)")

    print("\n================ STDERR ================\n")
    if stderr.strip():
        print(stderr)
    else:
        print("(STDERR 비어 있음)")


🔹 Return code: 0

🔹 실행된 cmd:
python3 -m inference --config default_inference_args.yaml --protein_ligand_csv data/input_protein_ligand_info.csv --out_dir results/


Generating ESM language model embeddings
Processing 1 of 1 batches (2 sequences)



  import pkg_resources

  Y = indices.astype(int)

1it [00:27, 27.01s/it]
2it [00:44, 21.14s/it]
2it [00:44, 22.02s/it]

