In [1]:
import sqlite3
import pandas as pd

from contextlib import closing
from rdkit import Chem
from rdkit.Chem import Draw, QED, Descriptors, Lipinski

In [2]:
# db_file = '/workspace/common.sqlite3'
# db_file = '/clara/virtual_screening/dataset.db'
db_file = '/content/dataset.db'
conn = sqlite3.connect(db_file, uri=True)

In [8]:
df = pd.read_sql('''
                 SELECT smiles, embedding, embedding_dim,
                        logp, wt, hdonors, hacceptors, rbonds, qed
                 FROM smiles
                 ''', con=conn)

In [9]:
df.to_hdf('/content/emb_data.h5', 'data')

ImportError: Missing optional dependency 'tables'.  Use pip or conda to install tables.

In [15]:
for smi in df.smiles:
    m = Chem.MolFromSmiles(smi)
    if m is None:
        print(f'invalid smiles {smi}')
        continue
    
    logp = Descriptors.MolLogP(m)
    wt = Descriptors.MolWt(m)
    hdonors = Lipinski.NumHDonors(m)
    hacceptors = Lipinski.NumHAcceptors(m)
    rbonds = Lipinski.NumRotatableBonds(m)
    qed = QED.qed(m)
    # print(logp, wt, hdonors, hacceptors, rbonds, qed)
    with closing(conn.cursor()) as cur:
        cur.execute('''
            UPDATE generated_smiles
            SET logp = ?,
                wt = ?,
                hdonors = ?,
                hacceptors = ?,
                rbonds = ?,
                qed = ?
            WHERE smiles = ?''',
            [logp, wt, hdonors, hacceptors, rbonds, qed, smi])

2.34018 411.3470000000001 2 8 8 0.4892389919649251
1.4968799999999995 530.4340000000003 3 13 9 0.2659785352219255
3.3636800000000027 527.4700000000003 2 10 9 0.313770778798207
3.8018800000000037 526.4620000000003 1 11 9 0.3230780453594245
-0.7191000000000003 456.49100000000016 8 8 4 0.2797112905825898
2.801080000000001 493.4790000000002 1 10 6 0.31888891826733795
2.7049000000000003 585.532 5 12 13 0.09867484188308541
2.4245000000000005 523.5050000000002 2 11 7 0.27460808932406083
4.638400000000004 484.5080000000003 1 8 5 0.3259558247550338
1.6030999999999993 505.42400000000026 3 12 9 0.2836044712186509
1.6064999999999994 529.4460000000004 3 12 9 0.27037340922006814
1.6064999999999994 529.4460000000004 3 12 9 0.27037340922006814
1.1479999999999988 531.4180000000003 3 14 9 0.20327462222051543
1.1720999999999993 477.37000000000023 3 12 9 0.29701704066341345
0.2615 573.3480000000003 5 14 12 0.14415641631780646
1.1720999999999993 477.37000000000023 3 12 9 0.29701704066341345
0.2615 573.3480