### Add entries to the building block database


In [26]:
import pickle as pkl
import sqlite3 as sql
from pathlib import Path
import pandas as pd
from rdkit.Chem import Draw
from rdkit.Chem.rdMolDescriptors import CalcMolFormula, CalcExactMolWt
from rdkit import Chem
from PIL import Image
from util_functions import *
from notebook_config import *

In [27]:
INFO_PATH = OUTPUT_DIR / 'library_constituents_dataframe.pkl'

In [28]:
"""Import data and open DB connection"""
with open(INFO_PATH, 'rb') as file:
    df = pd.read_pickle(file)

with open(COMPOUND_MAPPING_PATH, 'r') as file:
    compound_mapping = {}
    for line in file.readlines():
        compound_mapping[line.split()[0]] = line.split()[1]

con = sql.connect(DB_PATH)
cur = con.cursor()

In [10]:
"""Add entries to DB. Save molecule images to the static directory and add the path to DB"""
for short in compound_mapping.keys():
    long = compound_mapping[short]
    mol = df.loc[df['Compound Name'] == long, 'mol'].item()
    smiles = Chem.MolToSmiles(mol)  # we regenerate SMILES instead of using the value from df to have a canonical representation
    image = Draw.MolToImage(mol)  # TODO change this into the better drawing method
    category = df.loc[df['Compound Name'] == long,'Category'].item()
    # save image to static dir and keep imagepath to add to db
    image_path = DB_STATIC_DIR / 'image' / f'{short}.png'
    image.save(image_path)
    # write this to the db
    cur.execute('INSERT INTO main.buildingblocks(long, short, SMILES, image, category, boc, cbz, tbu, tms) VALUES(?,?,?,?,?,?,?,?,?);', (long, short, smiles, str(image_path.resolve()), category, boc, cbz, tbu, tms))
con.commit()

In [29]:
"""For existing entries, use SMILES and number of protecting groups to calculate probable formulae in LCMS"""
for id, smiles, boc, cbz, tbu, tms in cur.execute('SELECT id, SMILES, boc, cbz, tbu, tms FROM buildingblocks;').fetchall():
    mol = Chem.MolFromSmiles(smiles)
    mol_desalted = desalt_building_block(mol)
    lcms_formula_1 = CalcMolFormula(mol_desalted)
    lcms_mass_1 = CalcExactMolWt(mol_desalted)
    cur.execute('UPDATE buildingblocks SET lcms_formula_1 = ?, lcms_mass_1 = ? WHERE id = ?', (lcms_formula_1, lcms_mass_1, id))
    con.commit()
    additional_formulae = []
    additional_masses = []
    for pg, pgname in zip([boc, cbz, tbu, tms], ['boc', 'cbz', 'tbu', 'tms']):
        for i in range(pg):
            # if boc == 0, this will not evaluate
            additional_formulae.append(string_formula_substraction(lcms_formula_1, pg_dict[pgname][0]))
            additional_masses.append(lcms_mass_1 - pg_dict[pgname][1])
    additional_masses = [f'{i:.4f}' for i in additional_masses]
    if len(additional_formulae) > 0:
        cur.execute('UPDATE buildingblocks SET lcms_formula_alt = ?, lcms_mass_alt = ? WHERE id = ?', (','.join(additional_formulae), ','.join(additional_masses), id))
        con.commit()

In [30]:
con.close()