# CHEMICAL SPACE EXAMPLE

In [2]:
suppress_warnings()
from mlchem.chem.visualise.space import ChemicalSpace
from mlchem.chem.manipulation import MolCleaner
from mlchem.chem.calculator import descriptors
from mlchem.ml.preprocessing.dimensional_reduction import Compressor
from mlchem.helper import identify_df_duplicates, add_inchi_to_dataframe, suppress_warnings
import pandas as pd
import numpy as np

In [2]:
path = './'

data = pd.read_csv(path+'data.csv')


cleaner = MolCleaner(input_smiles_list=list(data.SMILES),id_list=list(data.index))
cleaner.full_clean()
df_data_cleaned = data[data.index.isin(list(cleaner.df_accepted.id))]
df_data_cleaned.loc[:,'SMILES'] = list(cleaner.df_accepted.SMILES)

df_data_cleaned = add_inchi_to_dataframe(df_data_cleaned,1,'SMILES')
df_added,df_duplicates = identify_df_duplicates(df_data_cleaned,'INCHI')

df_final = df_added[['SMILES','NAME','CLASS']]
df_final


Initialising SMILES
Removing carbon ions
Removing inorganics
Removing organometallics
Desalting SMILES
Neutralising SMILES
Performing quality check
DONE


Unnamed: 0,SMILES,NAME,CLASS
0,OOC1=CC=CC=C1,mol1,CLASS_1
1,CCCC1=CC=CC=C1,mol2,CLASS_1
2,CCC=CCCC1=CC=CC=C1,mol3,CLASS_1
3,COCCCCC1=CC=CC=C1,mol4,CLASS_1
4,C1=CC=CC=C1,mol5,CLASS_1
...,...,...,...
368,O=C(O)CC1=CC=CC=C1CC1C=C1,mol369,CLASS_3
369,CCCOC1=CC=CC=C1CC(=O)O,mol370,CLASS_3
370,C=NCCCCCCC1=CC=CC=C1CC(=O)O,mol371,CLASS_3
371,CCCC#SCC1=CC=CC=C1CC(=O)O,mol372,CLASS_3


### Calc descriptors

In [3]:
df_desc_rdkit = descriptors.get_rdkitDesc(df_final.SMILES)

df_desc_mordred = descriptors.get_mordredDesc(df_final.SMILES) # don't use if molecules are not cleaned
df_desc_both = descriptors.get_allDesc(df_final.SMILES) # don't use if molecules are not cleaned

df_fp_morgan_2 = descriptors.get_fingerprint_df(df_final.SMILES,fp_type='m',radius=2)
df_fp_morgan_4 = descriptors.get_fingerprint_df(df_final.SMILES,fp_type='m',radius=4)
df_chemotypes = descriptors.get_chemotypes(df_final.SMILES)

##### Force numeric types on descriptors, drop inf and NaN

In [4]:
df_desc_selected = df_desc_both # specify descriptor type: df_desc_rdkit, df_desc_mordred, df_desc_both, df_fp_morgan_2, df_fp_morgan_4, df_chemotypes

columns_to_keep = []
for c in df_desc_selected.columns:
    try:
        pd.to_numeric(df_desc_selected[c])
        columns_to_keep.append(c)
    except Exception:
        pass
df_desc_selected = df_desc_selected[columns_to_keep]

df_desc_selected.replace([np.inf, -np.inf], np.nan, inplace=True)
df_desc_selected.dropna(axis=0,inplace=True) # axis 0: rows, axis 1: columns

### Use ChemicalSpace and Compressor classes

In [14]:
cs = ChemicalSpace(data=df_final,df_descriptors=df_desc_selected)

cs.process(diversity_filter=0.2,collinearity_filter=0.9,standardise=True)

Before filtering: (363, 1349)
After diversity filter: (363, 980)
After collinearity filter: (363, 444)


In [15]:
c = Compressor(cs.df_processed)
c.compress_PCA()
c.compress_TSNE()
cs.prepare(df_compressed=c.dataframe_compressed)

In [16]:
cs.bokeh_dictionary = {'title_location':'above',
              'title_fontsize':'25px',
              'title_align':'center',
              'title_background_fill_colour':'white',
              'title_text_colour':'black',
              'legend_location':'top_left',
              'legend_title':'Class',
              'legend_label_text_font':'times',
              'legend_label_text_font_style':'italic',
              'legend_label_text_colour':'navy',
              'legend_border_line_width':3,
              'legend_border_line_colour':'navy',
              'legend_border_line_alpha':0.5,
              'legend_background_fill_colour':'grey',
              'legend_background_fill_alpha':0.2,
              'xaxis_label':'DIM_1',
              'xaxis_line_width':3,
              'xaxis_line_colour':'grey',
              'xaxis_major_label_text_colour':'navy',
              'xaxis_major_label_orientation':'horizontal',
              'yaxis_label':'DIM_2',
              'yaxis_line_width':3,
              'yaxis_line_colour':'grey',
              'yaxis_major_label_text_colour':'navy',
              'yaxis_major_label_orientation':'vertical',
              'axis_minor_tick_in':-3,
              'axis_minor_tick_out':6,
              }

cs.plot(filename='demo',title='demo',save_html=True,
        colour_list=['blue','magenta','green'],
        )