In [1]:
import pandas as pd
from tqdm.auto import tqdm
from utils import prepare_ligand, prepare_protien, get_docking_score
import firebase_admin
from firebase_admin import firestore
from pathlib import Path
from multiprocessing import Pool
from functools import partial
from rdkit.rdBase import BlockLogs

Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading some PyTorch models, missing a dependency. No module named 'torch'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'torch'
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'torch'
Skipped loading some Jax models, missing a dependency. No module named 'jax'


In [2]:
def dock_ligands(ligand_chunk, protien_pdb_path):
    with BlockLogs():
        protien_mol = prepare_protien(protien_pdb_path)
        ligand_mols = [prepare_ligand(ligand_smiles) for ligand_smiles in ligand_chunk]

        scores = []
        for ligand_mol in ligand_mols:
            try:
                scores.append(get_docking_score(protien_mol, ligand_mol))
            except Exception as e:
                scores.append(None)

        return scores


def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i : i + n]

In [3]:
app = firebase_admin.initialize_app()
fdb = firestore.client()

In [24]:
ligands_path = Path("./KasA_smm_val.csv")
protien_pdb_path = Path("./KasA_3285_no_min.pdb")

df = pd.read_csv(ligands_path)
smiles = df["smiles"]

collection = fdb.collection(f"{ligands_path.name}-{protien_pdb_path.name}")
processed_idxs = [d.get().to_dict()['idx'] for d in collection.list_documents()]
unprocessed_idxs = ~smiles.index.isin(processed_idxs)
unprocessed_smiles = smiles[unprocessed_idxs]

unprocessed_smiles_chunked = [chunk for chunk in chunks(unprocessed_smiles, 4)]

In [6]:
def parallel_inner(ligand_chunk, protien_pdb_path):
    protien_pdb_path = str(protien_pdb_path.absolute())
    scores = dock_ligands(ligand_chunk, protien_pdb_path)
    return list(zip(ligand_chunk.index, scores))

pool = Pool(2)

iterable = pool.imap(
    partial(parallel_inner, protien_pdb_path=protien_pdb_path), unprocessed_smiles_chunked
)

for result_chunk in tqdm(iterable, total=len(unprocessed_smiles_chunked)):
    batch = fdb.batch()

    for idx, score in result_chunk:
        
        doc_ref = fdb.collection(
            f"{ligands_path.name}-{protien_pdb_path.name}"
        ).document(str(idx))

        batch.set(
            doc_ref,
            {"idx": idx, "score": score},
        )

    print("writing", result_chunk)
    batch.commit()

pool.close()

  0%|          | 0/2 [00:00<?, ?it/s]

Mol [H]OC1=C([H])C([H])=C(C([H])([H])[C@@]([H])(C(=O)N([H])C([H])([H])C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])C([H])([H])C(=O)N([H])[C@]([H])(C(=O)N([H])C([H])([H])C(=O)N([H])C([H])([H])C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])C2([H])([H])OC2=O)C([H])([H])C2=C([H])C([H])=C([H])C([H])=C2[H])C([H])([H])[H])C([H])([H])C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H])C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])C(=O)N([H])[H])C([H])([H])C2=C([H])NC([H])=[N+]2[H])C([H])([H])C2=C([H])C([H])=C([H])C([H])=C2[H])C([H])([H])C2=C([H])C([H])=C([H])C([H])=C2[H])C([H])([H])O[H])C([H])([H])C(=O)N([H])[H])C([H])([H])C(=O)N([H])[H])C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H])C([H])([H])C2=C([H])C

Computing Vina grid ... Computing Vina grid ... done.
done.
Performing docking (random seed: -2004067187) ... 
0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
**



***Performing docking (random seed: 1975231902) ... 
0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
*



******************************************************************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rmsd u.b.
-----+------------+----------+----------
   1       -6.402          0          0
   2       -6.201      12.92      14.55
   3        -5.86      13.29      14.77
   4        -5.75      13.27      14.97
   5       -5.695       7.69       9.89
   6       -5.591      3.532      6.797
   7        -5.36      13.37      15.62
   8        -5.31      12.09      13.91
   9       -5.189      25.62      28.12
**************

Mol [H]OC1=C([H])C([H])=C(C([H])([H])[C@@]([H])(C(=O)N([H])C([H])([H])C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])C([H])([H])C(=O)N([H])[C@]([H])(C(=O)N([H])C([H])([H])C(=O)N([H])C([H])([H])C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])C2([H])([H])OC2=O)C([H])([H])C2=C([H])C([H])=C([H])C([H])=C2[H])C([H])([H])[H])C([H])([H])C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H])C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])C(=O)N([H])[H])C([H])([H])C2=C([H])NC([H])=[N+]2[H])C([H])([H])C2=C([H])C([H])=C([H])C([H])=C2[H])C([H])([H])C2=C([H])C([H])=C([H])C([H])=C2[H])C([H])([H])O[H])C([H])([H])C(=O)N([H])[H])C([H])([H])C(=O)N([H])[H])C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H])C([H])([H])C2=C([H])C

***writing [(0, -6.402), (1, None)]
*************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rmsd u.b.
-----+------------+----------+----------
   1       -6.654          0          0
   2       -6.122      5.177      8.641
   3       -6.037      2.743      3.675
   4       -5.914      6.391      8.559
   5       -5.857       5.35      8.921
   6       -5.834      13.88      16.25
   7       -5.441      9.273      13.12
   8       -5.364      12.75      15.55
   9       -5.353      14.12      15.76


Mol [HH].[H]OC1=C([H])C([H])=C(C([H])([H])[C@@]([H])(C(=O)N([H])C([H])([H])C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])C([H])([H])C(=O)N([H])[C@]([H])(C(=O)N([H])C([H])([H])C(=O)N([H])C([H])([H])C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])[C@]([H])(C(=O)N([H])C2([H])([H])OC2=O)C([H])([H])C2=C([H])C([H])=C([H])C([H])=C2[H])C([H])([H])[H])C([H])([H])C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H])C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])C(=O)N([H])[H])C([H])([H])C2=C([H])NC([H])=[N+]2[H])C([H])([H])C2=C([H])C([H])=C([H])C([H])=C2[H])C([H])([H])C2=C([H])C([H])=C([H])C([H])=C2[H])C([H])([H])O[H])C([H])([H])C(=O)N([H])[H])C([H])([H])C(=O)N([H])[H])C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H])C([H])([H])C2=C(

Computing Vina grid ... done.




Performing docking (random seed: 1102715549) ... 
0%   10   20   30   40   50   60   70   80   90   100%
|----|----|----|----|----|----|----|----|----|----|
***************************************************

mode |   affinity | dist from best mode
     | (kcal/mol) | rmsd l.b.| rmsd u.b.
-----+------------+----------+----------
   1       -6.785          0          0
   2       -6.782      2.041      2.799
   3       -6.729      15.16       18.2
   4       -6.625      2.412      3.532
   5       -6.562      2.615       3.63
   6       -6.205      2.199      2.948
   7       -6.152      1.885      3.193
   8       -6.076      2.241      3.465
   9       -5.911      16.87      19.64
writing [(2, -6.654), (3, -6.785)]


In [7]:
from pathlib import Path

In [8]:
asd = Path("./KasA_3285_no_min.pdb")

In [9]:
asd.name

'KasA_3285_no_min.pdb'