# Prepare Docking Files for LDHA Inhibitors (Enhanced Version)

This notebook:
- Loads `LDHA_inhibitors_deduplicated.csv`
- Generates 3D ligand structures (`.mol`, `.pdb`)
- Prepares a placeholder docking results file
- Provides instructions for `.pdb` ➔ `.pdbqt` conversion


In [None]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, rdmolfiles
import os

## 1. Load Dataset

In [30]:
df = pd.read_csv('LDHA_inhibitors_deduplicated.csv')
df.head()

Unnamed: 0,Ligand SMILES,IC50 (nM),class
0,C#CCCCC(=O)Nc1ccc(C(=O)c2ccc(NCc3ccc(-c4ccc(O)...,500.0,1
1,C#CCN(Cc1ccc(-c2ccc(O)c(C(=O)O)c2)o1)Cc1ccc(-c...,1600.0,0
2,C#CCNCc1ccc(-c2ccc(O)c(C(=O)O)c2)o1.N,2000.0,0
3,C#Cc1cc(-c2nn(-c3nc(C(=O)O)cs3)c(CC3CC3)c2Cc2c...,456.0,1
4,C/C=C/c1cc(-c2nn(-c3nc(C(=O)O)cs3)c(CC3CC3)c2C...,215.8,1


In [None]:
try:
    df = pd.read_csv('LDHA_inhibitors_deduplicated.csv')
    df = df.dropna(subset=['Ligand SMILES', 'class'])
    print(f"Loaded {len(df)} compounds.")
except FileNotFoundError:
    raise FileNotFoundError("CSV file not found. Please place 'LDHA_inhibitors_deduplicated.csv' in the data folder.")
print.


## 2. Prepare Output Folder

In [22]:
output_dir = 'docking/ligands'
os.makedirs(output_dir, exist_ok=True)
print(f'Ligand files will be saved to: {output_dir}')

Ligand files will be saved to: docking/ligands


## 3. Generate 3D Structures & Save Files

In [None]:
for idx, row in df.iterrows():
    cmpd_id = row['class']
    smiles = row['Ligand SMILES']
    try:
        mol = Chem.MolFromSmiles(smiles)
        if mol is None:
            print(f'SKIP: Invalid SMILES for {cmpd_id}')
            continue
        mol = Chem.AddHs(mol)
        res = AllChem.EmbedMolecule(mol, AllChem.ETKDG())
        if res != 0:
            print(f'WARNING: Embedding may have failed for {cmpd_id}')
        AllChem.UFFOptimizeMolecule(mol)

        # Save MOL file
        mol_path = os.path.join(output_dir, f'{cmpd_id}.mol')
        rdmolfiles.MolToMolFile(mol, mol_path)

        # Save PDB file
        pdb_path = os.path.join(output_dir, f'{cmpd_id}.pdb')
        rdmolfiles.MolToPDBFile(mol, pdb_path)

        print(f'Successfully saved: {cmpd_id}.mol and {cmpd_id}.pdb')
    except Exception as e:
        print(f'Error processing {cmpd_id}: {e}')

## 4. Prepare Docking Results Placeholder

In [None]:
placeholder = df[['Compound_ID']].copy()
placeholder['Docking_Score'] = None
placeholder_path = 'docking_results_placeholder.csv'
placeholder.to_csv(placeholder_path, index=False)
print(f'Docking results placeholder saved to: {placeholder_path}')

## Note:
- To convert `.pdb` files to `.pdbqt` format, use OpenBabel from the command line:
```
obabel CMPD_001.pdb -O CMPD_001.pdbqt
```
- Repeat for all ligands after generating `.pdb` files.

**All ligands are prepared and ready for docking.**