In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

from rdkit import Chem
from rdkit.Chem import Draw
%matplotlib inline

homedir = os.path.expanduser("~/")
homedir = homedir+"MSE299/silane/data/"
df = pd.read_csv(homedir+"FreeSolv.csv", sep=',')

In [22]:
#df = df.drop('Unnamed: 0', 1)
#df

In [23]:
# Add unique alphanumeric identifier
df['id'] = range(1, len(df.index)+1)
df['id'] = 'molid' + df['id'].astype(str)
print(df.shape)
df.columns

(642, 5)


Index(['iupac', 'smiles', 'expt', 'calc', 'id'], dtype='object')

In [24]:
# Remove extraneous SMILES entry only for tox21
#df = df.join(df['smiles'].str.split(' ', 1, expand=True).rename(columns={0:'pre_smiles', 1:'Extraneous_SMILES'}))
#df

# Check For Invalid Smiles

In [25]:
# Check for invalid SMILES
mol_list = [Chem.MolFromSmiles(x) for x in df['smiles']]
invalid = len([x for x in mol_list if x is None])
print("No. of invalid entries: "+str(invalid))

No. of invalid entries: 0


In [26]:
#mol_list = []
#for x in df['pre_smiles']:
#    if Chem.MolFromSmiles(x) == None:
#        print(x)
#        df = df[df.pre_smiles != x]
#    else:
#        mol_list.append(Chem.MolFromSmiles(x))

KeyError: 'pre_smiles'

# Deal With Duplicate Entries

In [27]:
mask = df.duplicated('smiles', keep=False)

In [28]:
#Separate out unique and duplicate entries
df_uni = df[~mask]
df_dup = df[mask]
print(df.shape, df_uni.shape, df_dup.shape)

(642, 5) (642, 5) (0, 5)


In [29]:
# Compute mean of duplicate entries
avg_df = df_dup.groupby('smiles', as_index=False).mean()
avg_df.head(25)

Unnamed: 0,smiles,expt,calc


In [30]:
# Match up average predictions to SMILES and drop duplicate entries
print(df_dup.shape)
df_dup = df_dup.drop(['expt', 'calc'], axis=1)
df_dup = pd.merge(df_dup, avg_df, how="right", on=["smiles"])
print(df_dup.shape)
df_dup = df_dup.drop_duplicates(subset=['smiles'], keep="first")
print(df_dup.shape)

(0, 5)
(0, 5)
(0, 5)


In [31]:
# Add reliable averaged de-duplicated entries back to unique entries
df2 = pd.concat([df_dup, df_uni], axis=0)
print(df2.shape)
print(df2.smiles.unique().shape)
print(df.smiles.unique().shape)

(642, 5)
(642,)
(642,)


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


In [32]:
# Reset index of df
df2 = df2.reset_index(drop=True)
df2.columns
#df2 = df2.drop('Unnamed: 0',1)
#df2 = df2.drop('Unnamed: 0_x',1)
#df2 = df2.drop('Unnamed: 0_y',1)
df2.head(5)

Unnamed: 0,calc,expt,id,iupac,smiles
0,-9.625,-11.01,molid1,"4-methoxy-N,N-dimethyl-benzamide",CN(C)C(=O)c1ccc(cc1)OC
1,-6.219,-4.87,molid2,methanesulfonyl chloride,CS(=O)(=O)Cl
2,2.452,1.83,molid3,3-methylbut-1-ene,CC(C)C=C
3,-5.809,-5.45,molid4,2-ethylpyrazine,CCc1cnccn1
4,-2.917,-4.21,molid5,heptan-1-ol,CCCCCCCO


In [33]:
print(df2.shape)
df2.to_csv(homedir+"freesolv_all.csv", index=False)

(642, 5)


# Internal Set

In [34]:
df = pd.read_csv(homedir+"freesolv_all.csv")

In [35]:
#construct internal test set
size = 0.10
seed = 6
np.random.seed(seed)

In [36]:
msk = np.random.rand(len(df)) < 0.1
df_tv = df[~msk]
df_int = df[msk]
print(df.shape, df_tv.shape, df_int.shape)

(642, 5) (581, 5) (61, 5)


In [37]:
df_tv.to_csv(homedir+'freesolv_all_trainval.csv', index=False)
df_int.to_csv(homedir+'freesolv_all_int.csv', index=False)

# split data by task

In [39]:
# currently one measurement, flash point
# Check for missing labels
dfInt = pd.read_csv(homedir+"freesolv_all_int.csv")
dfInt['calc'].isnull().sum()
df.shape


(642, 5)

In [41]:
#drop data if this is anything greater than 1
df1Int = dfInt[['id','iupac','smiles','expt']]
df1Int.to_csv(homedir+"freesolv_int_expt.csv", index=False)
df2Int = dfInt[['id','iupac','smiles','calc']]
df2Int.to_csv(homedir+"freesolv_int_calc.csv", index=False)
#df1Int.groupby('flashPoint').count()
#df1Int.shape

In [45]:
dfTrainval = pd.read_csv(homedir+"freesolv_all_trainval.csv")
#print(dfTrainval.head(5))
dfTrainval = dfTrainval[['id','iupac','smiles','expt']]
dfTrainval.to_csv(homedir+"freesolv_tv_expt.csv", index=False)
#dfTrainval.groupby('flashPoint').count()
#dfTrainval.shape
dfTrainval2 = pd.read_csv(homedir+"freesolv_all_trainval.csv")
dfTrainval2 = dfTrainval2[['id','iupac','smiles','calc']]
dfTrainval2.to_csv(homedir+"freesolv_tv_calc.csv", index=False)

    calc   expt      id                             iupac  \
0 -9.625 -11.01  molid1  4-methoxy-N,N-dimethyl-benzamide   
1 -6.219  -4.87  molid2          methanesulfonyl chloride   
2  2.452   1.83  molid3                 3-methylbut-1-ene   
3 -2.917  -4.21  molid5                       heptan-1-ol   
4 -5.444  -6.27  molid6                3,5-dimethylphenol   

                   smiles  
0  CN(C)C(=O)c1ccc(cc1)OC  
1            CS(=O)(=O)Cl  
2                CC(C)C=C  
3                CCCCCCCO  
4         Cc1cc(cc(c1)O)C  


# Prep 2D images

In [46]:
homedir = os.path.expanduser("~/")
archdir = homedir+"MSE299/silane/archive/"
homedir = homedir+"MSE299/silane/data/"

In [47]:
from chem_scripts import cs_compute_features, cs_set_resolution, cs_coords_to_grid, cs_check_grid_boundary
from chem_scripts import cs_channel_mapping, cs_map_atom_to_grid, cs_map_bond_to_grid, cs_grid_to_image

  '{0}.{1}.{2}'.format(*version.hdf5_built_version_tuple)
Using TensorFlow backend.


In [48]:
def gen_image():
    
    exclusion_list = []
    full_array_list = []

    for i in range(0,df.shape[0]):

        # Extract SMILES string
        smiles_string = df["smiles"][i]
        #print(i, smiles_string)

        # Extract ID of molecule
        id_string = df["id"][i]

        # Read SMILES string
        mol = Chem.MolFromSmiles(smiles_string)
        
        # Compute properties
        print(smiles_string)
        mol, df_atom, df_bond, nancheckflag = cs_compute_features(mol)
        
        # Intialize grid
        myarray = cs_set_resolution(gridsize, representation=rep)

        # Map coordinates to grid
        df_atom, atomcheckflag = cs_coords_to_grid(df_atom, dim, res)
        
        # Check if outside grid
        sizecheckflag = cs_check_grid_boundary(df_atom, gridsize)

        if sizecheckflag == True or atomcheckflag == True or nancheckflag == True:

            exclusion_list.append(id_string)
            print("EXCLUSION for "+str(id_string))
            #print('exlusion')

        else:
            # Initialize channels
            channel = cs_channel_mapping()

            # Map atom to grid
            myarray = cs_map_atom_to_grid(myarray, channel, df_atom, representation=rep)

            # Map bond to grid
            myarray = cs_map_bond_to_grid(myarray, channel, df_atom, df_bond, representation=rep)

            # Visualize status every 1000 steps
            #if (i+1)%nskip==0:
               # print("*** PROCESSING "+str(i+1)+": "+str(id_string)+" "+str(smiles_string))
               # cs_grid_to_image(myarray, mol)

            # Generate combined array of raw input
            curr_array = myarray.flatten()
            curr_array_list = curr_array.tolist()
            full_array_list.append(curr_array_list)

    full_array = np.asarray(full_array_list)
    print(full_array.shape)
    print(exclusion_list)

    return(full_array, exclusion_list)

In [49]:
dim = 40       # Size of the box in Angstroms, not radius!
res = 0.5      # Resolution of each pixel
rep = "engA"    # Image representation used
nskip = 500    # How many steps till next visualization

gridsize = int(dim/res)

In [50]:
# Specify dataset name
jobname = "freesolv_int"
taskname = ["calc", "expt"]

for task in taskname:

    print("PROCESSING TASK: "+str(jobname)+" "+str(task))
    
    # Specify input and output csv
    filein  = homedir+jobname+"_"+task+".csv"
    print(filein)
    fileout = homedir+jobname+"_"+task+"_image.csv"
    print(fileout)
    # Specify out npy files
    fileimage = archdir+jobname+"_"+task+"_img_"+rep+".npy" 
    print(fileimage)
    filelabel = archdir+jobname+"_"+task+"_img_label.npy" 
    print(filelabel)
    # Generate image
    df = pd.read_csv(filein)
    print(df.columns)
    full_array, exclusion_list = gen_image()
    
    # Dataset statistics before and after image generation
    print("*** Database Specs:")
    print(df.shape[0], len(exclusion_list), int(df.shape[0])-int(len(exclusion_list)))
    
    # Create csv of final data (after exclusion)
    print("*** Separating Database:")
    mod_df = df[~df["id"].isin(exclusion_list)]
    mod_df.to_csv(fileout, index=False)

    # Save generated images as npy
    np.save(fileimage, full_array)
    print(full_array.shape)
    
    # Save labels as npy
    label_array = mod_df[task].as_matrix().astype("float32")
    np.save(filelabel, label_array)
    print(label_array.shape)

PROCESSING TASK: freesolv_int calc
C:\Users\Nathaniel/MSE299/silane/data/freesolv_int_calc.csv
C:\Users\Nathaniel/MSE299/silane/data/freesolv_int_calc_image.csv
C:\Users\Nathaniel/MSE299/silane/archive/freesolv_int_calc_img_engA.npy
C:\Users\Nathaniel/MSE299/silane/archive/freesolv_int_calc_img_label.npy
Index(['id', 'iupac', 'smiles', 'calc'], dtype='object')
CCc1cnccn1
CC(=O)OCCOC(=O)C
C1CNC1
C(CO[N+](=O)[O-])O
CCCCCCCCBr
c1c(c(=O)[nH]c(=O)[nH]1)Br
C[N+](=O)[O-]
COC(c1ccccc1)(OC)OC
CCCOC
c1cc(cc(c1)O)C#N
CNC
c1ccc2c(c1)ccc3c2cccc3
C(=C/Cl)\Cl
CC(C)Cc1cnccn1
Cc1ccc(cc1)C(C)C
CCOC(=O)CCC(=O)OCC
CCCC=O
c1(=O)[nH]c(=O)[nH]c(=O)[nH]1
c12c(c(c(c(c1Cl)Cl)Cl)Cl)Oc3c(c(c(c(c3Cl)Cl)Cl)Cl)O2
CNc1ccccc1
CC(C)OC(=O)C
CCOCC
CCc1ccc(cc1)O
CCCC(=O)O
C(F)(F)(F)Br
CCCCN
CCCC(=O)OCC
C1CCCCC1
CC#C
C
CC=C(C)C
C(CCl)Cl
CC
COc1ccccc1OC
CBr
c1ccc(c(c1)N)[N+](=O)[O-]
C([C@@H]1[C@H]([C@@H]([C@H]([C@@H](O1)O)O)O)O)O
CCCCBr
CCc1ccc(cc1)C
c1ccc2c(c1)Oc3cccc(c3O2)Cl
c1cc(cc(c1)C(F)(F)F)C(F)(F)F
c1cc(ccc1O)F
Cc1cc



CC(=O)OCCOC(=O)C
C1CNC1
C(CO[N+](=O)[O-])O
CCCCCCCCBr
c1c(c(=O)[nH]c(=O)[nH]1)Br
C[N+](=O)[O-]
COC(c1ccccc1)(OC)OC
CCCOC
c1cc(cc(c1)O)C#N
CNC
c1ccc2c(c1)ccc3c2cccc3
C(=C/Cl)\Cl
CC(C)Cc1cnccn1
Cc1ccc(cc1)C(C)C
CCOC(=O)CCC(=O)OCC
CCCC=O
c1(=O)[nH]c(=O)[nH]c(=O)[nH]1
c12c(c(c(c(c1Cl)Cl)Cl)Cl)Oc3c(c(c(c(c3Cl)Cl)Cl)Cl)O2
CNc1ccccc1
CC(C)OC(=O)C
CCOCC
CCc1ccc(cc1)O
CCCC(=O)O
C(F)(F)(F)Br
CCCCN
CCCC(=O)OCC
C1CCCCC1
CC#C
C
CC=C(C)C
C(CCl)Cl
CC
COc1ccccc1OC
CBr
c1ccc(c(c1)N)[N+](=O)[O-]
C([C@@H]1[C@H]([C@@H]([C@H]([C@@H](O1)O)O)O)O)O
CCCCBr
CCc1ccc(cc1)C
c1ccc2c(c1)Oc3cccc(c3O2)Cl
c1cc(cc(c1)C(F)(F)F)C(F)(F)F
c1cc(ccc1O)F
Cc1ccc(c(c1)C)C
C[C@@H]1CC[C@H](CC1=O)C(=C)C
COP(=S)(OC)Oc1ccc(cc1)[N+](=O)[O-]
CCOP(=S)(OCC)Oc1ccc(cc1)[N+](=O)[O-]
CCC(C)CC
Cc1cnccn1
CN(C)C(=O)Nc1ccccc1
CCCCCC
c1ccc2c(c1)C(=O)c3cccc(c3C2=O)N
CCOP(=S)(OCC)SCSCC
CCCCC[N+](=O)[O-]
c1ccc(cc1)CCCO
CS
C(C(Cl)(Cl)Cl)Cl
C1CCCC1
Cc1c[nH]cn1
CCCCCC(=O)OC
Cc1ccccc1[N+](=O)[O-]
C(CO[N+](=O)[O-])O[N+](=O)[O-]
(61, 25600)
[]
*** Databas

In [51]:
# Specify dataset name
jobname = "freesolv_tv"
taskname = ["expt", "calc"]

for task in taskname:

    print("PROCESSING TASK: "+str(jobname)+" "+str(task))
    
    # Specify input and output csv
    filein  = homedir+jobname+"_"+task+".csv"
    fileout = homedir+jobname+"_"+task+"_image.csv"
    
    # Specify out npy files
    fileimage = archdir+jobname+"_"+task+"_img_"+rep+".npy" 
    filelabel = archdir+jobname+"_"+task+"_img_label.npy" 
    
    # Generate image
    df = pd.read_csv(filein)
    full_array, exclusion_list = gen_image()
    
    # Dataset statistics before and after image generation
    print("*** Database Specs:")
    print(df.shape[0], len(exclusion_list), int(df.shape[0])-int(len(exclusion_list)))
    
    # Create csv of final data (after exclusion)
    print("*** Separating Database:")
    mod_df = df[~df["id"].isin(exclusion_list)]
    mod_df.to_csv(fileout, index=False)

    # Save generated images as npy
    np.save(fileimage, full_array)
    print(full_array.shape)
    
    # Save labels as npy
    label_array = mod_df[task].as_matrix().astype("float32")
    np.save(filelabel, label_array)
    print(label_array.shape)

PROCESSING TASK: freesolv_tv expt
CN(C)C(=O)c1ccc(cc1)OC
CS(=O)(=O)Cl
CC(C)C=C
CCCCCCCO
Cc1cc(cc(c1)O)C
CC(C)C(C)C
CCCC(C)(C)O
C[C@@H]1CCCC[C@@H]1C
CC[C@H](C)O
C(Br)Br
CC[C@H](C(C)C)O
CCc1ccccn1
CCCCC(=O)OCC
c1ccc(cc1)S
CC(=CCC/C(=C\CO)/C)C
c1ccc2c(c1)CCC2
CCOc1ccccc1
c1cc(ccc1O)Br
CCCC(C)(C)C
CCOP(=S)(OCC)SCSP(=S)(OCC)OCC
C1CCCC(CC1)O
COC(=O)C1CC1
c1ccc(cc1)C#N
CCCCC#N
CC(C)(C)O
CC(C)C(=O)C(C)C
CCC=O
CN(C)C=O
Cc1ccc(cc1)C
C=CCC=C
Cc1cccc(c1C)Nc2ccccc2C(=O)O
CN(C)C(=O)c1ccccc1
CCNCC
CC(C)(C)c1ccc(cc1)O
CC(C)CCOC=O
CCCCCCCCCCO
CCC(=O)OCC
CCCCCCCCC
CC(=O)NC
CCCCCCCC=C
c1ccc2cc(ccc2c1)O
c1cc(c(cc1Cl)Cl)Cl
C([C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O)O
CCCC(=O)OC
c1ccc(c(c1)C=O)O
CCCNCCC
c1ccc(cc1)N
C(F)(F)(F)F
CC[C@@H](C)CO
c1ccc(c(c1)O)I
COc1cccc(c1O)OC
CCC#C
c1ccc(cc1)C(F)(F)F
NN
Cc1ccccn1
CCNc1nc(nc(n1)Cl)NCC
c1ccc2c(c1)Oc3cc(c(cc3O2)Cl)Cl
CCCCCCCCN
N
c1ccc(c(c1)C(F)(F)F)C(F)(F)F
COC(=O)c1ccc(cc1)O
CCCCCc1ccccc1
CC(F)F
c1ccc(cc1)n2c(=O)c(c(cn2)N)Cl
C=CC=C
CN(C)C
CCCCCC(=O)N
CC(C)CO[N+](=O)[O

COCCO
COC=O
c1ccc2cc(ccc2c1)N
Cc1cccc(c1)[N+](=O)[O-]
C(CCCl)CCl
CC(=O)CO[N+](=O)[O-]
CC(C)(C)c1ccccc1
C[C@@H](C(F)(F)F)O
CCCCCBr
CCCCCCC=C
CC1=CC(=O)[C@@H](CC1)C(C)C
CC(C)O
CCCCCCN
C(CO[N+](=O)[O-])CO[N+](=O)[O-]
Cc1ccc(c(c1)C)O
CCCCCO
CCC[C@@H](C)O
CCCC[C@@H](C)CC
C[C@@H](c1ccc(cc1)CC(C)C)C(=O)O
CCOC(=O)C[C@H](C(=O)OCC)SP(=S)(OC)OC
Cc1ccc(cc1C)O
Cc1cc(ccc1Cl)O
CCCC/C=C/C
CCCOCCC
C[C@@H]1CC[C@H]([C@@H](C1)O)C(C)C
CCNc1nc(nc(n1)SC)NC(C)(C)C
CC(C)CC(C)(C)C
CCCCC(=O)CCCC
CCCCN(CC)C(=O)SCCC
CCCCCC=C
CC(C)OC=O
CC(OC(=O)C)OC(=O)C
c1c(c(=O)[nH]c(=O)[nH]1)Cl
CC(=C)c1ccccc1
CCC(C)C
CCCCO[N+](=O)[O-]
c1ccc(cc1)Br
CC(Cl)(Cl)Cl
CC(=C)[C@H]1CCC(=CC1)C=O
CCCCCCCI
c1cc2ccc3cccc4c3c2c(c1)cc4
CCCCCCl
CC(C)COC(=O)C
CCC(C)(C)C
c1cc(ccc1N)N(=O)=O
COC(=O)CC#N
COc1ccc(cc1)N
CC(C)Cc1ccccc1
c1ccc(cc1)c2c(cc(cc2Cl)Cl)Cl
CN
c1ccc(c(c1)O)Cl
c1ccc2c(c1)C(=O)c3ccc(cc3C2=O)N
C(=C\Cl)\Cl
CCCCC(=O)C
c1ccc(c(c1)O)F
Cc1c(nc(nc1OC(=O)N(C)C)N(C)C)C
C=Cc1ccccc1
CCOP(=O)(OCC)OCC
C(C(F)(F)F)O
CCCCOC[C@H](C)O
CCCO
Cc1ccccc1



CCCCCCCO
Cc1cc(cc(c1)O)C
CC(C)C(C)C
CCCC(C)(C)O
C[C@@H]1CCCC[C@@H]1C
CC[C@H](C)O
C(Br)Br
CC[C@H](C(C)C)O
CCc1ccccn1
CCCCC(=O)OCC
c1ccc(cc1)S
CC(=CCC/C(=C\CO)/C)C
c1ccc2c(c1)CCC2
CCOc1ccccc1
c1cc(ccc1O)Br
CCCC(C)(C)C
CCOP(=S)(OCC)SCSP(=S)(OCC)OCC
C1CCCC(CC1)O
COC(=O)C1CC1
c1ccc(cc1)C#N
CCCCC#N
CC(C)(C)O
CC(C)C(=O)C(C)C
CCC=O
CN(C)C=O
Cc1ccc(cc1)C
C=CCC=C
Cc1cccc(c1C)Nc2ccccc2C(=O)O
CN(C)C(=O)c1ccccc1
CCNCC
CC(C)(C)c1ccc(cc1)O
CC(C)CCOC=O
CCCCCCCCCCO
CCC(=O)OCC
CCCCCCCCC
CC(=O)NC
CCCCCCCC=C
c1ccc2cc(ccc2c1)O
c1cc(c(cc1Cl)Cl)Cl
C([C@H]([C@H]([C@@H]([C@@H](CO)O)O)O)O)O
CCCC(=O)OC
c1ccc(c(c1)C=O)O
CCCNCCC
c1ccc(cc1)N
C(F)(F)(F)F
CC[C@@H](C)CO
c1ccc(c(c1)O)I
COc1cccc(c1O)OC
CCC#C
c1ccc(cc1)C(F)(F)F
NN
Cc1ccccn1
CCNc1nc(nc(n1)Cl)NCC
c1ccc2c(c1)Oc3cc(c(cc3O2)Cl)Cl
CCCCCCCCN
N
c1ccc(c(c1)C(F)(F)F)C(F)(F)F
COC(=O)c1ccc(cc1)O
CCCCCc1ccccc1
CC(F)F
c1ccc(cc1)n2c(=O)c(c(cn2)N)Cl
C=CC=C
CN(C)C
CCCCCC(=O)N
CC(C)CO[N+](=O)[O-]
c1ccc2c(c1)C(=O)c3cccc(c3C2=O)NCCO
CCCCCCC(=O)C
CN1CCNCC1
CCN
C1C=CC=CC=C1
c

CC(=O)CO[N+](=O)[O-]
CC(C)(C)c1ccccc1
C[C@@H](C(F)(F)F)O
CCCCCBr
CCCCCCC=C
CC1=CC(=O)[C@@H](CC1)C(C)C
CC(C)O
CCCCCCN
C(CO[N+](=O)[O-])CO[N+](=O)[O-]
Cc1ccc(c(c1)C)O
CCCCCO
CCC[C@@H](C)O
CCCC[C@@H](C)CC
C[C@@H](c1ccc(cc1)CC(C)C)C(=O)O
CCOC(=O)C[C@H](C(=O)OCC)SP(=S)(OC)OC
Cc1ccc(cc1C)O
Cc1cc(ccc1Cl)O
CCCC/C=C/C
CCCOCCC
C[C@@H]1CC[C@H]([C@@H](C1)O)C(C)C
CCNc1nc(nc(n1)SC)NC(C)(C)C
CC(C)CC(C)(C)C
CCCCC(=O)CCCC
CCCCN(CC)C(=O)SCCC
CCCCCC=C
CC(C)OC=O
CC(OC(=O)C)OC(=O)C
c1c(c(=O)[nH]c(=O)[nH]1)Cl
CC(=C)c1ccccc1
CCC(C)C
CCCCO[N+](=O)[O-]
c1ccc(cc1)Br
CC(Cl)(Cl)Cl
CC(=C)[C@H]1CCC(=CC1)C=O
CCCCCCCI
c1cc2ccc3cccc4c3c2c(c1)cc4
CCCCCCl
CC(C)COC(=O)C
CCC(C)(C)C
c1cc(ccc1N)N(=O)=O
COC(=O)CC#N
COc1ccc(cc1)N
CC(C)Cc1ccccc1
c1ccc(cc1)c2c(cc(cc2Cl)Cl)Cl
CN
c1ccc(c(c1)O)Cl
c1ccc2c(c1)C(=O)c3ccc(cc3C2=O)N
C(=C\Cl)\Cl
CCCCC(=O)C
c1ccc(c(c1)O)F
Cc1c(nc(nc1OC(=O)N(C)C)N(C)C)C
C=Cc1ccccc1
CCOP(=O)(OCC)OCC
C(C(F)(F)F)O
CCCCOC[C@H](C)O
CCCO
Cc1ccccc1C
CC(C)(C)C
CCCC#C
c1ccc2c(c1)C(=O)NC2=O
CCCCI
Cc1ccc(cc1)O
CC(C)

In [83]:
#m = Chem.MolFromSmiles('F[Si-2](F)(F)(F)(F)F.[NH4+].[NH4+]')
#Draw.MolToImage(m)

ValueError: Null molecule provided