
#### This has been implemented as python script in inventorytosdf.py

### What does the Schrödinger workflow do?
- Read in sdfs
- MM
- ionize at pH 7
- desalt
- generate tautomers
- retain chirality as specified in input
- reaction-based enumeration
### What is the rdkit equivalent to this?
- take the df of mols
- desalt
- chirality: watch out for weird consequences of the sometimes unassigned spiro center
- reaction enumeration

### What does Yu's second script (SDFtoFormula) do?
- Read in SDFs
- Calculate molecular formula
- write to file

In [None]:
from rdkit import Chem
from rdkit.Chem import Draw, SaltRemover, AllChem, FunctionalGroups, rdFMCS, DetectChemistryProblems
from rdkit.Chem.SimpleEnum.Enumerator import EnumerateReaction
from pathlib import Path
import pandas as pd

In [None]:
DATA_DIR = Path('..', 'data').resolve()
OUTPUT_DIR = DATA_DIR / 'outputs'

In [None]:
compounds = pd.read_csv(OUTPUT_DIR / 'inventory_compounds.csv')
compounds

In [None]:
compounds['mol'] = compounds['SMILES'].apply(Chem.MolFromSmiles)
compounds

In [None]:
compounds['img'] = compounds['mol'].apply(Draw.MolToImage)
compounds

In [None]:
compounds['weigh-in [mg] / 100 µL'] = compounds['MW [g/mol]'].apply(lambda x: round(x * 1e-4 * 0.05 * 1000, 2))
compounds

### Generate outputs

In [None]:
# output to Excel
compounds.drop(columns=['mol', 'img'], inplace=False).to_excel(OUTPUT_DIR / 'inventory_compounds_extended.xlsx')

In [None]:
# output to molecule images
for i, data in compounds.iterrows():
    with open(DATA_DIR / 'images' / ''.join([data.loc['Compound Name'], '.png']), 'wb') as file:
        data.loc['img'].save(file)

In [None]:
# output to SDF
with open(OUTPUT_DIR / 'sdf' / 'initiators.sdf', 'w') as file_i,\
    open(OUTPUT_DIR / 'sdf' / 'monomers.sdf', 'w') as file_m,\
    open(OUTPUT_DIR / 'sdf' / 'terminators.sdf', 'w') as file_t:
    writer_i = Chem.SDWriter(file_i)
    writer_m = Chem.SDWriter(file_m)
    writer_t = Chem.SDWriter(file_t)
    for i, data in compounds.iterrows():
        if data.loc['Category'] == 'I':
            writer_i.write(data.loc['mol'])
        if data.loc['Category'] == 'M':
            writer_m.write(data.loc['mol'])
        if data.loc['Category'] == 'T':
            writer_t.write(data.loc['mol'])

In [None]:
# dump df
compounds.to_pickle(OUTPUT_DIR / 'library_constituents_dataframe.pkl')
