<a href="https://colab.research.google.com/github/kangmg/compchem_with_colab/blob/main/autodock_minor_fix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# dataset // 훈련 데이터셋
!wget "https://drive.usercontent.google.com/download?id=1d_UxhRHJWjpxIkWa4kKNjMrdRx4_nh_1&export=download&authuser=1&confirm=t" -O data.tar.gz -q
!tar -xzf data.tar.gz
!mv data/* ./
!rm -rf data.tar.gz data

In [None]:
#@title install `openbable`
#@markdown runtime ~ `3 min`
install_openbable = True # @param ["True","False"] {"type":"raw","allow-input":true}
if install_openbable:
    # 의존성 프로그램 설치
    !apt install swig > /dev/null 2>&1 # version : 4.0.2

    # openbabel 소스코드 설치
    !wget -q https://github.com/openbabel/openbabel/archive/refs/tags/openbabel-3-1-1.tar.gz -O openbable.tar.gz
    !tar xzf ./openbable.tar.gz
    !rm ./openbable.tar.gz

    # 소스코드 빌드 & 파이썬 바인딩
    !mkdir -p ./build
    !cmake -B ./build ./openbabel-openbabel-3-1-1 -DPYTHON_BINDINGS=ON -DRUN_SWIG=ON > /dev/null 2>&1
    !make -C ./build -j2 > /dev/null 2>&1
    !make -C ./build install > /dev/null 2>&1

In [None]:
# `uv pip` is more faster than `pip`
!uv pip install rdkit
!uv pip install vina

[2mUsing Python 3.11.13 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 99ms[0m[0m
[2mUsing Python 3.11.13 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 107ms[0m[0m


# smiles to pdbqt

In [None]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import MolToPDBBlock
import os
import traceback
from tqdm import tqdm

csv_path = "/content/train.csv"
output_folder = "/content/output_folder"

df = pd.read_csv(csv_path)

os.makedirs(output_folder, exist_ok=True)

log_file = os.path.join(output_folder, "error_log.txt")

success_df = pd.DataFrame(columns=df.columns)

with open(log_file, "w") as log:
    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        smiles = row["Canonical_Smiles"]
        compound_name = row["ID"]

        try:
            if pd.isna(smiles) or not Chem.MolFromSmiles(smiles):
                raise ValueError(f"Invalid SMILES in row {index + 2}")

            mol = Chem.MolFromSmiles(smiles)
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol)

            pdbqt_filename = os.path.join(output_folder, f"{compound_name}.pdbqt")
            with open(pdbqt_filename, "w") as pdbqt_file:
                pdbqt_file.write(MolToPDBBlock(mol))


            success_df = success_df.append(row, ignore_index=True)

        except Exception as e:
            log.write(f"Error processing row {index + 2}: {str(e)}\n")
            traceback.print_exc(file=log)


success_df.to_csv(csv_path, index=False)

print("Conversion complete. Check the error log for details.")


100%|██████████| 1681/1681 [01:32<00:00, 18.08it/s]

Conversion complete. Check the error log for details.





In [None]:
# !pip install scripts.prepare_receptor

[31mERROR: Could not find a version that satisfies the requirement scripts.prepare_receptor (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for scripts.prepare_receptor[0m[31m
[0m

In [None]:
from vina import Vina

def run_vina(receptor, ligand_path, out_pdbqt, log_file, center_x, center_y, center_z, size_x, size_y, size_z, exhaustiveness):
    # Vina 객체 생성
    v = Vina(sf_name='vina')

    # 수용체(receptor) 설정
    v.set_receptor(receptor)

    # 리간드(ligand) 설정
    v.set_ligand_from_file(ligand_path)

    # 도킹 박스 설정 (중심과 크기)
    v.compute_vina_maps(center=[center_x, center_y, center_z], box_size=[size_x, size_y, size_z])

    # 도킹 실행
    v.dock(exhaustiveness=exhaustiveness)

    # 결과 저장
    v.write_pdbqt(out_pdbqt, n_poses=10)  # n_poses는 생성할 포즈 수, 필요에 따라 조정
    v.write_log(log_file)

    print(f"Vina 도킹 완료! 출력: {out_pdbqt}, 로그: {log_file}")

# autodock vina

In [None]:
import os, glob, subprocess
import pandas as pd
from tqdm import tqdm

# --- 설정 ---
receptor     = "/content/receptor.pdbqt"
ligand_dir   = "/content/output_folder"
output_dir   = "/content/out_path"
os.makedirs(output_dir, exist_ok=True)

# Grid box 설정 (예시값)
center_x, center_y, center_z = 22.57, 22.77, 11.81
size_x,   size_y,   size_z   = 5.58,  13.38,  10.50
exhaustiveness = 8

# --- 도킹 반복 실행 ---
results = []
for ligand_path in tqdm(glob.glob(os.path.join(ligand_dir, "*.pdbqt"))):
    ligand_name = os.path.basename(ligand_path)
    out_pdbqt   = os.path.join(output_dir, ligand_name.replace(".pdbqt", "_out.pdbqt"))
    log_file    = os.path.join(output_dir, ligand_name.replace(".pdbqt", ".log"))

    run_vina(
        receptor=receptor,
        ligand_path=ligand_path,
        out_pdbqt=out_pdbqt,
        log_file=log_file,
        center_x=center_x,
        center_y=center_y,
        center_z=center_z,
        size_x=size_x,
        size_y=size_y,
        size_z=size_z,
        exhaustiveness=exhaustiveness
        )

    # 첫 번째 포즈(라인 시작이 "1")에서 affinity 추출
    affinity = None
    with open(log_file) as f:
        for line in f:
            if line.strip().startswith("1"):
                affinity = float(line.split()[1])
                break

    results.append({"ligand": ligand_name, "affinity_kcal_per_mol": affinity})

# --- 결과 CSV로 저장 ---
df = pd.DataFrame(results)
csv_path = os.path.join(output_dir, "docking_results.csv")
df.to_csv(csv_path, index=False)

print("✅ 도킹 완료! 결과는", csv_path, "에 저장되었습니다.")

  0%|          | 0/1681 [00:00<?, ?it/s]


RuntimeError: Error: file /content/receptor.pdbqt does not exist.