In [2]:
import pandas as pd
import os

In [14]:
# Import Docking output file
data = []
with open(f"./output/docking_output.txt") as merged_logfile:
    captured_data = {}
    result_table = ""
    capture_run = False # flag for capture docking run
    capture_table = False # flag for capture markdown table
    for line in merged_logfile:
        if line.startswith("[STARTRUN]"):
            capture_run = True
            splited = line.split(" ")
            lig_dir_path = splited[-1]
            captured_data["lig_id"] = os.path.basename(lig_dir_path).strip()
            captured_data["lig_dir_path"] = lig_dir_path.strip()
        if line.startswith("[ENDRUN]"):
            capture_run = False
            captured_data['table'] = result_table
            data.append(captured_data)
            captured_data = {}
            result_table = ""
        
        if line.startswith('[ENDRESULT]'):
            capture_table = False
        
        # Capturing table data
        if capture_table:
            if not line.startswith('[REMARK]'):
                result_table += line
        
        if line.startswith('[RESULT]'):
            capture_table = True

In [15]:
data

[{'lig_id': '0',
  'lig_dir_path': '/mnt/d/computers-in-biology/docking/runs2/7BQY/0',
  'table': 'mode | affinity | rmsd_lb | rmsd_ub\n-----|----------|---------|--------\n   1 |     -7.7 |    0.000 |     0.000\n   2 |     -7.7 |    0.479 |     2.068\n   3 |     -7.6 |    1.953 |     7.983\n'},
 {'lig_id': '1',
  'lig_dir_path': '/mnt/d/computers-in-biology/docking/runs2/7BQY/1',
  'table': 'mode | affinity | rmsd_lb | rmsd_ub\n-----|----------|---------|--------\n   1 |     -8.3 |    0.000 |     0.000\n   2 |     -7.9 |    1.871 |     6.870\n   3 |     -7.8 |    2.261 |     6.602\n'},
 {'lig_id': '2',
  'lig_dir_path': '/mnt/d/computers-in-biology/docking/runs2/7BQY/2',
  'table': 'mode | affinity | rmsd_lb | rmsd_ub\n-----|----------|---------|--------\n   1 |     -8.3 |    0.000 |     0.000\n   2 |     -8.2 |    3.115 |     7.459\n   3 |     -8.0 |    3.116 |     7.339\n'},
 {'lig_id': '3',
  'lig_dir_path': '/mnt/d/computers-in-biology/docking/runs2/7BQY/3',
  'table': 'mode | aff

### A function definition to convert markdown table to dict

In [11]:
def covert_table_to_dict(table):
    """
    Convert markdown table to Dictionary
    """
    table_splitted = table.split('\n', 1)
    
    if len(table_splitted) != 2:
        print("Invalid table format detected.")
        return None
    
    data = []
    # Get First Header Line
    columns = table_splitted[0].split('|')
    def rename_columns(c):
        c = c.strip().replace('.', "").replace(" ", "_")
        if c.startswith('affinity'):
            c = "affinity"
        return c
    columns = list(map(rename_columns, columns))
    # columns = [c.strip().replace('.', "").replace(" ", "_") for c in columns]
    for line in table_splitted[1].split('\n'):
        row = dict()
        if (not line.startswith('-')):
            cells = line.split('|')
            if(len(cells)) > 1:
                for i, c in enumerate(cells):
                    row[columns[i]] = float(cells[i].strip())
                data.append(row)
    return data
     

## Create the result dataframe

In [16]:
result_df = pd.DataFrame()
for e in data:
    res = pd.DataFrame(covert_table_to_dict(e['table']))
    res['lig_id'] = e['lig_id']
    res['lig_dir_path'] = e['lig_dir_path']
    result_df = result_df.append(res, ignore_index=True)

result_df.head()

Unnamed: 0,mode,affinity,rmsd_lb,rmsd_ub,lig_id,lig_dir_path
0,1.0,-7.7,0.0,0.0,0,/mnt/d/computers-in-biology/docking/runs2/7BQY/0
1,2.0,-7.7,0.479,2.068,0,/mnt/d/computers-in-biology/docking/runs2/7BQY/0
2,3.0,-7.6,1.953,7.983,0,/mnt/d/computers-in-biology/docking/runs2/7BQY/0
3,1.0,-8.3,0.0,0.0,1,/mnt/d/computers-in-biology/docking/runs2/7BQY/1
4,2.0,-7.9,1.871,6.87,1,/mnt/d/computers-in-biology/docking/runs2/7BQY/1


In [17]:
# change numeric types
numeric_columns = ["mode", "affinity", "lig_id"]
result_df[numeric_columns] = result_df[numeric_columns].apply(pd.to_numeric, errors='coerce')
result_df.dtypes

mode            float64
affinity        float64
rmsd_lb         float64
rmsd_ub         float64
lig_id            int64
lig_dir_path     object
dtype: object

In [19]:
# select first mode and sort ASC
result_df_m1 = result_df[result_df["mode"] == 1].sort_values(by="affinity", ascending=True)
result_df_m1

Unnamed: 0,mode,affinity,rmsd_lb,rmsd_ub,lig_id,lig_dir_path
270,1.0,-9.9,0.0,0.0,90,/mnt/d/computers-in-biology/docking/runs2/7BQY/90
171,1.0,-9.7,0.0,0.0,57,/mnt/d/computers-in-biology/docking/runs2/7BQY/57
267,1.0,-9.5,0.0,0.0,89,/mnt/d/computers-in-biology/docking/runs2/7BQY/89
45,1.0,-9.4,0.0,0.0,15,/mnt/d/computers-in-biology/docking/runs2/7BQY/15
708,1.0,-9.3,0.0,0.0,238,/mnt/d/computers-in-biology/docking/runs2/7BQY...
...,...,...,...,...,...,...
408,1.0,-6.4,0.0,0.0,136,/mnt/d/computers-in-biology/docking/runs2/7BQY...
810,1.0,-6.3,0.0,0.0,278,/mnt/d/computers-in-biology/docking/runs2/7BQY...
189,1.0,-1.0,0.0,0.0,63,/mnt/d/computers-in-biology/docking/runs2/7BQY/63
135,1.0,-1.0,0.0,0.0,45,/mnt/d/computers-in-biology/docking/runs2/7BQY/45


## Join the Ligand Metadata

In [22]:
lig_metadata = pd.read_csv("./output/ligand_dataset.csv").rename(columns={"LIG_ID": "lig_id"})
lig_metadata.head()

Unnamed: 0,lig_id,smiles,name,molecular_weight,formula,logP,molecular_wt_calculated,havy_atom_count,hydrogen_acceptors,hydrogen_donors,rotatable_bonds,amide_bonds,ring_count
0,0,COC1=CC(=CC(OC)=C1O)C1=[O+]C2=C(C=C1OC1OC(COC(...,Malvidin 3-O-(6''-p-coumaroyl-glucoside),639.58,C32H31O14,2.6635,639.586,46,13,7,18,0,5
1,1,CC(=O)OCC1OC(OC2=CC3=C(O)C=C(O)C=C3[O+]=C2C2=C...,Delphinidin 3-O-(6''-acetyl-galactoside),507.421,C23H23O13,0.6584,507.424,36,12,8,14,0,4
2,2,[H][C@]1(COC(C)=O)O[C@@]([H])(OC2=CC3=C(O)C=C(...,Cyanidin 3-O-(6''-acetyl-galactoside),491.422,C23H23O12,0.9528,491.425,35,11,7,13,0,4
3,3,OC[C@H]1O[C@@H](OC2=CC3=C(O)C=C(O)C=C3[O+]=C2C...,Cyanidin 3-O-galactoside,449.385,C21H21O11,0.382,449.388,32,10,8,12,0,4
4,4,OC[C@H]1O[C@@H](OC2=CC3=C(C=C(O)C=C3O)[O+]=C2C...,Cyanidin 3-O-glucoside,449.385,C21H21O11,0.382,449.388,32,10,8,12,0,4


In [31]:
# Do a Left Join
df_merged = pd.merge(result_df_m1, lig_metadata, how="left", on="lig_id")
df_merged

Unnamed: 0,mode,affinity,rmsd_lb,rmsd_ub,lig_id,lig_dir_path,smiles,name,molecular_weight,formula,logP,molecular_wt_calculated,havy_atom_count,hydrogen_acceptors,hydrogen_donors,rotatable_bonds,amide_bonds,ring_count
0,1.0,-9.9,0.0,0.0,90,/mnt/d/computers-in-biology/docking/runs2/7BQY/90,[H][C@@]1(OC2=CC(OC(=O)C3=CC(O)=C(O)C(O)=C3)=C...,"Theaflavin 3,3'-O-digallate",868.702,C43H32O20,3.4742,868.709,63,20,13,19,0,8
1,1.0,-9.7,0.0,0.0,57,/mnt/d/computers-in-biology/docking/runs2/7BQY/57,OC1C(COC(=O)\C=C\C2=CC=C(O)C=C2)OC(OC2=CC3=C(O...,Cyanidin 3-O-(6''-p-coumaroyl-glucoside),595.528,C30H27O13,2.3519,595.533,43,12,8,15,0,5
2,1.0,-9.5,0.0,0.0,89,/mnt/d/computers-in-biology/docking/runs2/7BQY/89,O[C@@H]1CC2=C(O)C=C(O)C=C2O[C@@H]1C1=CC2=C(C(O...,Theaflavin 3'-O-gallate,716.598,C36H28O16,3.1949,716.604,52,16,11,15,0,7
3,1.0,-9.4,0.0,0.0,15,/mnt/d/computers-in-biology/docking/runs2/7BQY/15,COC1=CC(=CC(OC)=C1O)C1=C(O[C@@H]2O[C@H](CO)[C@...,"Malvidin 3,5-O-diglucoside",655.578,C29H35O17,-1.8333,655.582,46,16,10,21,0,5
4,1.0,-9.3,0.0,0.0,238,/mnt/d/computers-in-biology/docking/runs2/7BQY...,[H][C@@]1(C)O[C@@]([H])(O[C@]2([H])[C@@]([H])(...,Kaempferol 3-O-xylosyl-rutinoside,740.659,C33H40O19,-2.5409,740.664,52,19,11,21,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266,1.0,-6.4,0.0,0.0,136,/mnt/d/computers-in-biology/docking/runs2/7BQY...,COC1=C(OC)C=C(C=C1)C1=CC(=O)C2=C(O1)C(OC)=C(OC...,Nobiletin,402.395,C21H22O8,3.5116,402.399,29,8,0,13,0,3
267,1.0,-6.3,0.0,0.0,278,/mnt/d/computers-in-biology/docking/runs2/7BQY...,CN(C)CCCSC1=CC=CC=C1NC(=O)C=CC2=CC=CC=C2,Cinanserine,340.500,C20H24N2OS,4.3823,340.492,24,3,1,10,1,2
268,1.0,-1.0,0.0,0.0,63,/mnt/d/computers-in-biology/docking/runs2/7BQY/63,[Cl-].OC[C@H]1O[C@@H](OC2=CC(O)=CC3=[O+]C(=C(O...,"Pelargonidin 3,5-O-diglucoside",630.979,C27H31ClO15,-4.8465,630.983,43,14,10,17,0,5
269,1.0,-1.0,0.0,0.0,45,/mnt/d/computers-in-biology/docking/runs2/7BQY/45,[Cl-].COC1=CC(=CC(OC)=C1O)C1=[O+]C2=CC(O)=CC(O...,Malvidin 3-O-galactoside,528.890,C23H25ClO12,-2.3024,528.894,36,11,7,15,0,4


In [24]:
df_merged.shape

(271, 18)

## Split Poses and Convert to PDB

In [28]:
import shlex, subprocess 
from datetime import datetime
import os

In [37]:
def split_docked_complex(destination_dir, docked_file = "results.pdbqt"):
    result_dir = os.path.join(destination_dir, "poses")
    
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    
    args = shlex.split(f"vina_split --input {os.path.join(destination_dir, docked_file)}  --ligand pose_ --flex chain_")
    process = subprocess.Popen(
        args, 
        stdout=subprocess.PIPE, 
        stderr=subprocess.PIPE,
        cwd=result_dir # Set current working directory
    )
    output, error = process.communicate()
    if error:
        print("Error: ", error.decode("utf-8")) 
    else:
        print(output.decode("utf-8"))

In [1]:
selected_ligands = ["90", "57", "89"]

In [38]:
!pwd

/mnt/d/computers-in-biology/notebooks


In [40]:
# Iterate over selected ligand ids and split
for l in selected_ligands:
    split_docked_complex(f"/mnt/d/computers-in-biology/notebooks/data/docking_results/{l}")






### Create complex on Pose 1 and Convert to PDB

In [2]:
from pymol import cmd
for l in selected_ligands:
    receptor = f"/mnt/d/computers-in-biology/notebooks/data/docking_results/{l}/7BQY_final.pdbqt"
    ligand = f"/mnt/d/computers-in-biology/notebooks/data/docking_results/{l}/poses/pose_1.pdbqt"
    output = f"/mnt/d/computers-in-biology/notebooks/data/docking_results/{l}/poses/pose_1_complex.pdb"
    
    cmd.delete('all')
    cmd.load(receptor, "receptor")
    #cmd.alter('name receptor', 'chain="A"')
    cmd.load(ligand, "ligand")
    #cmd.alter('name ligand', 'chain="B"')
    #cmd.png('./test2.png', 1000, 1000)
    cmd.set('pdb_conect_all', 'on')
    cmd.save(output)
    #cmd.quit()

 PyMOL not running, entering library mode (experimental)
