# 🧬 ProtGPT2 + AlphaFold-Multimer 기반 펩타이드-단백질 결합 예측 파이프라인

## 📁 1. Google Drive 마운트

In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')
work_dir = "/content/drive/MyDrive/peptide_docking_pipeline"
os.makedirs(work_dir, exist_ok=True)
os.chdir(work_dir)
print(f"Working directory: {work_dir}")

## 🧠 2. ProtGPT2 모델로 펩타이드 서열 생성

In [None]:
!pip install transformers sentencepiece

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tokenizer = AutoTokenizer.from_pretrained("nferruz/ProtGPT2")
model = AutoModelForCausalLM.from_pretrained("nferruz/ProtGPT2")

input_ids = tokenizer("generate:", return_tensors="pt").input_ids.to("cuda" if torch.cuda.is_available() else "cpu")
output = model.generate(input_ids, max_length=30, num_return_sequences=1, do_sample=True, top_k=950, top_p=0.96)
sequence = tokenizer.decode(output[0], skip_special_tokens=True).replace("generate:", "").strip()

print("🔹 생성된 펩타이드 서열:", sequence)
peptide_sequence = sequence

# fasta 저장
with open("peptide.fasta", "w") as f:
    f.write(">peptide\n" + peptide_sequence)


## 🔮 3. AlphaFold-Multimer (ColabFold) 구조 예측

In [None]:
# ColabFold 설치 (AlphaFold-Multimer 경량 실행)
!pip install -q colabfold batchfold
!colabfold_download

# 예시용 단백질 서열 수동 입력 (실제 단백질 fasta 업로드 필요)
protein_sequence = "MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWLD..."

with open("protein.fasta", "w") as f:
    f.write(">protein\n" + protein_sequence)

# 입력 파일 병합 (peptide + protein)
with open("complex.fasta", "w") as out, open("peptide.fasta") as pep, open("protein.fasta") as pro:
    out.writelines(pro.readlines())
    out.writelines(pep.readlines())

# 구조 예측 실행
!colabfold_batch complex.fasta complex_prediction


## 📂 4. 예측된 구조 파일 선택

In [None]:
# best rank 모델 파일 이름: complex_prediction/complex_0.pdb
!cp complex_prediction/complex_0.pdb complex.pdb

# 분리: receptor.pdb (protein), ligand.pdb (peptide) 수동 분리 필요 또는 체인 기준으로 분리
# 예시: PyMOL이나 BioPython 사용 가능


## 🔄 5. 구조 변환 (pdb → pdbqt)

In [None]:
!apt-get install -y openbabel
!obabel receptor.pdb -O receptor.pdbqt
!obabel ligand.pdb -O ligand.pdbqt

## 🚀 6. AutoDock Vina 도킹 실행

In [None]:
!wget -q https://github.com/ccsb-scripps/AutoDock-Vina/releases/download/v1.2.3/vina_1.2.3_linux_x86_64.zip
!unzip -q vina_1.2.3_linux_x86_64.zip
!chmod +x vina_1.2.3_linux_x86_64/vina

!vina_1.2.3_linux_x86_64/vina \
    --receptor receptor.pdbqt \
    --ligand ligand.pdbqt \
    --center_x 0 --center_y 0 --center_z 0 \
    --size_x 20 --size_y 20 --size_z 20 \
    --out output.pdbqt --log log.txt

with open("log.txt") as f:
    lines = f.readlines()
vina_score = None
for line in lines:
    if "REMARK VINA RESULT" in line:
        vina_score = float(line.strip().split()[3])
        break
print(f"AutoDock Vina ΔG: {vina_score} kcal/mol")

## 🔬 7. PLIP 상호작용 분석

In [None]:
!pip install -q plip
from plip.structure.preparation import PDBComplex
structure = PDBComplex()
structure.load_pdb("output.pdbqt")
structure.analyze()

interaction_count = 0
for ligand in structure.ligands:
    inter = structure.interaction_sets[ligand]
    interaction_count += len(inter.hbonds) + len(inter.hydrophobic_contacts) + len(inter.saltbridge_ligands)
print(f"총 상호작용 수: {interaction_count}")

## 🧠 8. Pafnucy 예측

In [None]:
!git clone https://github.com/oddt/pafnucy.git
%cd pafnucy
!pip install -q -r requirements.txt
%cd ..

!obabel output.pdbqt -O complex_final.pdb

!python pafnucy/predict.py --pdb complex_final.pdb --out affinity.csv

import pandas as pd
pafnucy_df = pd.read_csv("affinity.csv")
pafnucy_affinity = float(pafnucy_df["predicted_affinity"].iloc[0])
print(f"Pafnucy 예측 ΔG: {pafnucy_affinity}")

## ✅ 9. 최종 종합 점수 계산

In [None]:
final_score = (-1 * vina_score) + (-1 * pafnucy_affinity) + (0.5 * interaction_count)
print(f"\n✅ 최종 종합 점수: {final_score:.2f}")