### Reading Data and getting a list of all the chemicals

In [1]:
import pandas as pd
import numpy as np

In [2]:
boiling_data = pd.read_excel('BoilingPointData.xlsx',index_col = 0)
boiling_data.head()

Unnamed: 0,name,molweight,critical temperature (K),acentric factor,boiling point (K)
1,(+)-a-pinene,136.23704,647.0,0.341,428.65
2,(+)-camphene,136.23704,638.0,0.296,432.65
3,(-)-a-pinene,136.23704,647.0,0.341,429.35
4,(-)-b-citronellol,156.2682,656.59,0.612,498.65
5,(-)-camphene,136.23704,638.0,0.296,439.95


### Trying different methods of converting from chemical name to SMILES
#### From research, it was found that the main RDKit cannot convert IUPAC names to SMILEs

#### Method 1: Using Webscraping method (from CIR: Chemical Identifier Resolver)
Link: https://cactus.nci.nih.gov/chemical/structure

In [3]:
from urllib.request import urlopen
from urllib.parse import quote

In [4]:
#A copy and pasted 

def CIRconvert(ids):
    try:
        url = 'http://cactus.nci.nih.gov/chemical/structure/' + quote(ids) + '/smiles'
        ans = urlopen(url).read().decode('utf8')
        return ans
    except:
        return 'Did not work'

identifiers  = ['(S)-(+)-1-amino-2-propanol','(<+->)-2-chlorooctane', '(R)-2-amino-6-methylheptane', '(E)-2,2,5,5-tetramethylhex-3-ene', 'Diethyl sulfate', "1,1'-[methylenebis(thio)]bisethane", '(1-ethyloctadecyl)benzene']

for ids in identifiers :
    
    print(CIRconvert(ids))

C[C@H](O)CN
Did not work
CC(C)CCC[C@@H](C)N
CC(C)(C)\C=C\C(C)(C)C
CCO[S](=O)(=O)OCC
CCSCSCC
CCCCCCCCCCCCCCCCCC(CC)c1ccccc1


In [5]:
#Adapt to my needs: Record down all chemicals that don't work and measure run time.
def CIRconvert_adapted(df,col = 'name'):
    """Inputs chemical data dataframe (and the column name for IUPAC names) and returns a new dataframe with an added Column:
       SMILES. Uses the online CIR API to convert IUPAC names to SMILES strings"""
    data = df
    data['SMILES'] =''
    for i, id in enumerate(df[str(col)],1):
        try:
            url = 'http://cactus.nci.nih.gov/chemical/structure/' + quote(id) + '/smiles'
            ans = urlopen(url).read().decode('utf8')
            data.at[i,'SMILES'] = str(ans)
            print(f"{id} : {ans}")
        except:
            data.at[i,'SMILES'] = str('-')
            print(f"{id} did not work")
            
    return data

Now that we see this works, let's put in the whole chemical data base. With timing

In [6]:
import time

# Grab Currrent Time Before Running the Code
start = time.time()

data_SMILES = CIRconvert_adapted(boiling_data)

end = time.time()

#Subtract Start Time from The End Time
total_time = end - start
print("\n"+ str(total_time))
#First time took 12000 seconds

(+)-a-pinene did not work
(+)-camphene : CC1(C)C2CCC(C2)C1=C
(-)-a-pinene did not work
(-)-b-citronellol did not work
(-)-camphene : CC1(C)C2CCC(C2)C1=C
(1,1-dimethylbutyl)benzene : CCCC(C)(C)c1ccccc1
(1-butylhexadecyl)benzene : CCCCCCCCCCCCCCCC(CCCC)c1ccccc1
(1-ethyl-1-methylpropyl)benzene : CCC(C)(CC)c1ccccc1
(1-ethylbutyl)benzene : CCCC(CC)c1ccccc1
(1-ethyloctadecyl)benzene : CCCCCCCCCCCCCCCCCC(CC)c1ccccc1
(1-hexylheptyl)benzene : CCCCCCC(CCCCCC)c1ccccc1
(1-methylenepropyl)benzene : CCC(=C)c1ccccc1
(1-methylheptyl)benzene : CCCCCCC(C)c1ccccc1
(1-methylnonadecyl)benzene : CCCCCCCCCCCCCCCCCCC(C)c1ccccc1
(1-methylnonyl)benzene : CCCCCCCCC(C)c1ccccc1
(1-methylpentyl)benzene : CCCCC(C)c1ccccc1
(1-octyldodecyl)benzene : CCCCCCCCCCCC(CCCCCCCC)c1ccccc1
(1-propylheptadecyl)benzene : CCCCCCCCCCCCCCCCC(CCC)c1ccccc1
(1-thiapropyl)-benzene : CCSc1ccccc1
(1R)-(-)-menthyl chloride : CC(C)C1CC[C@@H](C)CC1Cl
(1R,2S,5R)-(-)-menthol : CC(C)[C@@H]1CC[C@@H](C)C[C@H]1O
(1S)-(-)-b-pinene did not work
(2,4

1,1,2,2-tetrachloroethane : ClC(Cl)C(Cl)Cl
1,1,2,2-tetrachloropropane : CC(Cl)(Cl)C(Cl)Cl
1,1,2,2-tetrafluoroethane : FC(F)C(F)F
1,1,2,2-tetrafluoropropane : CC(F)(F)C(F)F
1,1,2,2-tetraiodoethane : IC(I)C(I)I
1,1,2,2-tetraiodopropane : CC(I)(I)C(I)I
1,1,2,2-tetramethylcyclopentane : CC1(C)CCCC1(C)C
1,1,2,2-tetraphenylethane : c1ccc(cc1)C(C(c2ccccc2)c3ccccc3)c4ccccc4
1,1,2,3,3,4,4,4-octafluoro-1-butene : FC(F)=C(F)C(F)(F)C(F)(F)F
1,1,2,3,3-pentafluoropropane : FC(F)C(F)C(F)F
1,1,2,3-tetrabromopropane : BrCC(Br)C(Br)Br
1,1,2,3-tetrachloropropane : ClCC(Cl)C(Cl)Cl
1,1,2,3-tetrafluoropropane : FCC(F)C(F)F
1,1,2,3-tetraiodopropane : ICC(I)C(I)I
1,1,2-tribromo-2-methylpropane : CC(C)(Br)C(Br)Br
1,1,2-tribromobutane : CCC(Br)C(Br)Br
1,1,2-tribromoethane : BrCC(Br)Br
1,1,2-tribromopropane : CC(Br)C(Br)Br
1,1,2-trichloro-1,2,2-trifluoroethane : FC(F)(Cl)C(F)(Cl)Cl
1,1,2-trichloro-1,2-difluoroethane : FC(Cl)C(F)(Cl)Cl
1,1,2-trichloro-1-fluoroethane : FC(Cl)(Cl)CCl
1,1,2-trichloro-2-fluoroethane 

1,1-diiodohexane : CCCCCC(I)I
1,1-diiodononane : CCCCCCCCC(I)I
1,1-diiodooctadecane : CCCCCCCCCCCCCCCCCC(I)I
1,1-diiodooctane : CCCCCCCC(I)I
1,1-diiodopentadecane : CCCCCCCCCCCCCCC(I)I
1,1-diiodopentane : CCCCC(I)I
1,1-diiodopropane : CCC(I)I
1,1-diiodotetradecane : CCCCCCCCCCCCCC(I)I
1,1-diiodotridecane : CCCCCCCCCCCCC(I)I
1,1-diiodoundecane : CCCCCCCCCCC(I)I
1,1-dimethyl-2-ethylcyclopentane : CCC1CCCC1(C)C
1,1-dimethyl-3-ethylcyclopentane : CCC1CCC(C)(C)C1
1,1-dimethylbutyl acetate : CCCC(C)(C)OC(C)=O
1,1-dimethylcyclobutane : CC1(C)CCC1
1,1-dimethylcyclohexane : CC1(C)CCCCC1
1,1-dimethylcyclopentane : CC1(C)CCCC1
1,1-dimethylcyclopropane : CC1(C)CC1
1,1-dimethylpropyl 3-methylbutanoate : CCC(C)(C)OC(=O)CC(C)C
1,1-dimethylpropyl butanoate : CCCC(=O)OC(C)(C)CC
1,1-dimethylpropyl formate : CCC(C)(C)OC=O
1,1-dimethylpropyl propanoate : CCC(=O)OC(C)(C)CC
1,1-dimethylpropylamine : CCC(C)(C)N
1,1-diphenyl-1-butene : CCC=C(c1ccccc1)c2ccccc2
1,1-diphenyl-1-decene : CCCCCCCCC=C(c1ccccc1)c2ccc

1,2-bis(ethylthio)ethane : CCSCCSCC
1,2-bis(methylthio)ethane : CSCCSC
1,2-butadiene : C[CH]=[C]=[CH2]
1,2-butanediol : CCC(O)CO
1,2-dibromo-2,3-dimethylbutane : CC(C)C(C)(Br)CBr
1,2-dibromo-2-methylbutane : CCC(C)(Br)CBr
1,2-dibromo-2-methylpropane : CC(C)(Br)CBr
1,2-dibromo-3,3-dimethylbutane : CC(C)(C)C(Br)CBr
1,2-dibromo-3-methylbutane : CC(C)C(Br)CBr
1,2-dibromobutane : CCC(Br)CBr
1,2-dibromoethane : BrCCBr
1,2-dibromoheptane : CCCCCC(Br)CBr
1,2-dibromohexane : CCCCC(Br)CBr
1,2-dibromononane : CCCCCCCC(Br)CBr
1,2-dibromooctane : CCCCCCC(Br)CBr
1,2-dibromopentane : CCCC(Br)CBr
1,2-dibromopropane : CC(Br)CBr
1,2-dibromotetrafluoroethane : FC(F)(Br)C(F)(F)Br
1,2-dichloro-1,1,2,2-tetrafluoroethane : FC(F)(Cl)C(F)(F)Cl
1,2-dichloro-1,1,2-trifluoroethane : FC(Cl)C(F)(F)Cl
1,2-dichloro-1,1-difluoroethane : FC(F)(Cl)CCl
1,2-dichloro-1,2,3,3,4,4-hexafluorocyclobutane : FC1(F)C(F)(F)C(F)(Cl)C1(F)Cl
1,2-dichloro-1,2-difluoroethane : FC(Cl)C(F)Cl
1,2-dichloro-1-fluoroethane : FC(Cl)CCl
1,2-di

1,3-diphenyl-1-propene : C(C=Cc1ccccc1)c2ccccc2
1,3-diphenyl-2-methylpropane : CC(Cc1ccccc1)Cc2ccccc2
1,3-diphenylbutane, (±) did not work
1,3-diphenylpentane : CCC(CCc1ccccc1)c2ccccc2
1,3-diphenylpropane : C(Cc1ccccc1)Cc2ccccc2
1,3-diphenyltriazene : N(N=Nc1ccccc1)c2ccccc2
1,3-eicosanediol : CCCCCCCCCCCCCCCCCC(O)CCO
1,3-hexadiene, cis and trans did not work
1,3-nonadecanediol : CCCCCCCCCCCCCCCCC(O)CCO
1,3-pentadiene; (cis+trans) did not work
1,3-propanediamine : NCCCN
1,3-propanediol : OCCCO
1,3-propylene oxide : C1COC1
1,4,5,8-tetramethylnaphthalene : Cc1ccc(C)c2c(C)ccc(C)c12
1,4,5-trimethylnaphthalene : Cc1ccc(C)c2c(C)cccc12
1,4,6,7-tetramethylnaphthalene : Cc1ccc(C)c2cc(C)c(C)cc12
1,4,6-trimethylnaphthalene : Cc1ccc2c(C)ccc(C)c2c1
1,4-benzenedicarboxaldehyde : O=Cc1ccc(C=O)cc1
1,4-butanediamine : NCCCCN
1,4-cyclohexadiene : C1C=CCC=C1
1,4-cyclohexanedicarboxylic acid : OC(=O)C1CCC(CC1)C(O)=O
1,4-cyclooctadiene : C1CC=CCC=CC1
1,4-di-tert-butylbenzene : CC(C)(C)c1ccc(cc1)C(C)(C)C
1,4

1-bromo-4-methylpentane : CC(C)CCCBr
1-bromo-cis-2-butene : C\C=C/CBr
1-bromo-cis-2-pentene : CC\C=C/CBr
1-bromo-trans-2-butene : C/C=C/CBr
1-bromo-trans-2-pentene : CC/C=C/CBr
1-bromobutane : CCCCBr
1-bromodecane : CCCCCCCCCCBr
1-bromodocosane : CCCCCCCCCCCCCCCCCCCCCCBr
1-bromododecane : CCCCCCCCCCCCBr
1-bromoeicosane : CCCCCCCCCCCCCCCCCCCCBr
1-bromoheneicosane : CCCCCCCCCCCCCCCCCCCCCBr
1-bromoheptadecane : CCCCCCCCCCCCCCCCCBr
1-bromoheptane : CCCCCCCBr
1-bromohexadecane : CCCCCCCCCCCCCCCCBr
1-bromohexane : CCCCCCBr
1-bromonaphthalene : Brc1cccc2ccccc12
1-bromononadecane : CCCCCCCCCCCCCCCCCCCBr
1-bromononane : CCCCCCCCCBr
1-bromooctadecane : CCCCCCCCCCCCCCCCCCBr
1-bromooctane : CCCCCCCCBr
1-bromopentacosane : CCCCCCCCCCCCCCCCCCCCCCCCCBr
1-bromopentadecane : CCCCCCCCCCCCCCCBr
1-bromopentane : CCCCCBr
1-bromopropane : CCCBr
1-bromotetracosane : CCCCCCCCCCCCCCCCCCCCCCCCBr
1-bromotetradecane : CCCCCCCCCCCCCCBr
1-bromotricosane : CCCCCCCCCCCCCCCCCCCCCCCBr
1-bromotridecane : CCCCCCCCCCCCCBr

1-iodoheptane : CCCCCCCI
1-iodohexadecane : CCCCCCCCCCCCCCCCI
1-iodohexane : CCCCCCI
1-iodononadecane : CCCCCCCCCCCCCCCCCCCI
1-iodononane : CCCCCCCCCI
1-iodooctadecane : CCCCCCCCCCCCCCCCCCI
1-iodooctane : CCCCCCCCI
1-iodopentacosane : CCCCCCCCCCCCCCCCCCCCCCCCCI
1-iodopentadecane : CCCCCCCCCCCCCCCI
1-iodopropane : CCCI
1-iodotetracosane : CCCCCCCCCCCCCCCCCCCCCCCCI
1-iodotetradecane : CCCCCCCCCCCCCCI
1-iodotricosane : CCCCCCCCCCCCCCCCCCCCCCCI
1-iodotridecane : CCCCCCCCCCCCCI
1-iodoundecane : CCCCCCCCCCCI
1-isobutylnaphthalene : CC(C)Cc1cccc2ccccc12
1-isopropyl-2,4,7-trimethylnaphthalene : CC(C)c1c(C)cc(C)c2ccc(C)cc12
1-isopropylnaphthalene : CC(C)c1cccc2ccccc12
1-methoxy-2-butanol : CCC(O)COC
1-methoxydecane : CCCCCCCCCCOC
1-methoxyheptane : CCCCCCCOC
1-methyl-1-ethylcyclohexane : CCC1(C)CCCCC1
1-methyl-1-ethylcyclopentane : CCC1(C)CCCC1
1-methyl-1-ethylcyclopropane : CCC1(C)CC1
1-methyl-1-isopropylcyclopentane : CC(C)C1(C)CCCC1
1-methyl-1-propylcyclohexane : CCCC1(C)CCCCC1
1-methyl-1-pr

1-propoxy-2-propanol : CCCOCC(C)O
1-propyl-[1,2,3,4-tetrahydronaphthalene] : CCCC1CCCc2ccccc12
1-propylcyclopentene : CCCC1=CCCC1
1-propylnaphthalene : CCCc1cccc2ccccc12
1-sec-butylnaphthalene : CCC(C)c1cccc2ccccc12
1-tert-butoxy-2-propanol : CC(O)COC(C)(C)C
1-tert-butyl-3,5-dimethylbenzene : Cc1cc(C)cc(c1)C(C)(C)C
1-tert-butylnaphthalene : CC(C)(C)c1cccc2ccccc12
1-tetracosene : CCCCCCCCCCCCCCCCCCCCCCC=C
1-tetracosyne : CCCCCCCCCCCCCCCCCCCCCCC#C
1-tetradecanethiol : CCCCCCCCCCCCCCS
1-tetradecanol : CCCCCCCCCCCCCCO
1-tetradecene : CCCCCCCCCCCCC=C
1-tetradecylcyclopentene : CCCCCCCCCCCCCCC1=CCCC1
1-tetradecyne : CCCCCCCCCCCCC#C
1-tricosene : CCCCCCCCCCCCCCCCCCCCCC=C
1-tricosyne : CCCCCCCCCCCCCCCCCCCCCC#C
1-tridecanal : CCCCCCCCCCCCC=O
1-tridecanethiol : CCCCCCCCCCCCCS
1-tridecanol : CCCCCCCCCCCCCO
1-tridecene : CCCCCCCCCCCC=C
1-tridecylcyclopentene : CCCCCCCCCCCCCC1=CCCC1
1-tridecyne : CCCCCCCCCCCC#C
1-undecanethiol : CCCCCCCCCCCS
1-undecanol : CCCCCCCCCCCO
1-undecene : CCCCCCCCCC=C
1-un

2,2,5,6-tetramethylheptane : CC(C)C(C)CCC(C)(C)C
2,2,5,6-tetramethyloctane : CCC(C)C(C)CCC(C)(C)C
2,2,5,7-tetramethyloctane : CC(C)CC(C)CCC(C)(C)C
2,2,5-trimethyl-3-ethylheptane : CCC(C)CC(CC)C(C)(C)C
2,2,5-trimethyl-3-ethylhexane : CCC(CC(C)C)C(C)(C)C
2,2,5-trimethyl-3-hexanol : CC(C)CC(O)C(C)(C)C
2,2,5-trimethyl-3-isopropylhexane : CC(C)CC(C(C)C)C(C)(C)C
2,2,5-trimethyl-4-ethylheptane : CCC(C)C(CC)CC(C)(C)C
2,2,5-trimethyl-4-ethylhexane : CCC(CC(C)(C)C)C(C)C
2,2,5-trimethyl-4-heptanol : CCC(C)C(O)CC(C)(C)C
2,2,5-trimethyl-4-isopropylhexane : CC(C)C(CC(C)(C)C)C(C)C
2,2,5-trimethyl-5-ethylheptane : CCC(C)(CC)CCC(C)(C)C
2,2,5-trimethylheptane : CCC(C)CCC(C)(C)C
2,2,5-trimethylhexane : CC(C)CCC(C)(C)C
2,2,5-trimethylnonane : CCCCC(C)CCC(C)(C)C
2,2,5-trimethyloctane : CCCC(C)CCC(C)(C)C
2,2,6,6-tetramethylheptane : CC(C)(C)CCCC(C)(C)C
2,2,6,6-tetramethyloctane : CCC(C)(C)CCCC(C)(C)C
2,2,6,7-tetramethyloctane : CC(C)C(C)CCCC(C)(C)C
2,2,6-trimethyl-3-ethylheptane : CCC(CCC(C)C)C(C)(C)C
2,2,6

2,3,4,5-tetramethylheptane : CCC(C)C(C)C(C)C(C)C
2,3,4,5-tetramethylhexane : CC(C)C(C)C(C)C(C)C
2,3,4,5-tetramethyloctane : CCCC(C)C(C)C(C)C(C)C
2,3,4,5-tetramethylphenol : Cc1cc(O)c(C)c(C)c1C
2,3,4,6-tetramethylaniline : Cc1cc(C)c(N)c(C)c1C
2,3,4,6-tetramethylheptane : CC(C)CC(C)C(C)C(C)C
2,3,4,6-tetramethyloctane : CCC(C)CC(C)C(C)C(C)C
2,3,4,6-tetramethylphenol : Cc1cc(C)c(O)c(C)c1C
2,3,4,7-tetramethyloctane : CC(C)CCC(C)C(C)C(C)C
2,3,4-trimethyl-1-pentanol : CC(C)C(C)C(C)CO
2,3,4-trimethyl-1-pentene : CC(C)C(C)C(C)=C
2,3,4-trimethyl-2-hexanol : CCC(C)C(C)C(C)(C)O
2,3,4-trimethyl-2-pentanol : CC(C)C(C)C(C)(C)O
2,3,4-trimethyl-2-pentene : CC(C)C(C)=C(C)C
2,3,4-trimethyl-3-ethylheptane : CCCC(C)C(C)(CC)C(C)C
2,3,4-trimethyl-3-ethylhexane : CCC(C)C(C)(CC)C(C)C
2,3,4-trimethyl-3-hexanol : CCC(C)C(C)(O)C(C)C
2,3,4-trimethyl-3-isopropylhexane : CCC(C)C(C)(C(C)C)C(C)C
2,3,4-trimethyl-3-isopropylpentane : CC(C)C(C)(C(C)C)C(C)C
2,3,4-trimethyl-3-pentanol : CC(C)C(C)(O)C(C)C
2,3,4-trimethyl-4-

2,4,4,6-tetramethyloctane : CCC(C)CC(C)(C)CC(C)C
2,4,4,7-tetramethyloctane : CC(C)CCC(C)(C)CC(C)C
2,4,4-trimethyl-1-pentanol : CC(CO)CC(C)(C)C
2,4,4-trimethyl-1-pentene : CC(=C)CC(C)(C)C
2,4,4-trimethyl-2-hexanol : CCC(C)(C)CC(C)(C)O
2,4,4-trimethyl-2-pentanol : CC(C)(C)CC(C)(C)O
2,4,4-trimethyl-2-pentene : CC(C)=CC(C)(C)C
2,4,4-trimethyl-3-ethylheptane : CCCC(C)(C)C(CC)C(C)C
2,4,4-trimethyl-3-ethylhexane : CCC(C(C)C)C(C)(C)CC
2,4,4-trimethyl-3-hexanol : CCC(C)(C)C(O)C(C)C
2,4,4-trimethyl-3-isopropylhexane : CCC(C)(C)C(C(C)C)C(C)C
2,4,4-trimethyl-5-ethylheptane : CCC(CC)C(C)(C)CC(C)C
2,4,4-trimethylheptane : CCCC(C)(C)CC(C)C
2,4,4-trimethylhexane : CCC(C)(C)CC(C)C
2,4,4-trimethylnonane : CCCCCC(C)(C)CC(C)C
2,4,4-trimethyloctane : CCCCC(C)(C)CC(C)C
2,4,4-trimethylpentene : CC(=C)CC(C)(C)C
2,4,5,5-tetramethylheptane : CCC(C)(C)C(C)CC(C)C
2,4,5,5-tetramethyloctane : CCCC(C)(C)C(C)CC(C)C
2,4,5,6-tetramethyloctane : CCC(C)C(C)C(C)CC(C)C
2,4,5,7-tetramethyloctane : CC(C)CC(C)C(C)CC(C)C
2,4,5

2,5-dimethyl-4-ethylphenol : CCc1cc(C)c(O)cc1C
2,5-dimethyl-4-isopropylheptane : CCC(C)C(CC(C)C)C(C)C
2,5-dimethyl-4-octanol : CCCC(C)C(O)CC(C)C
2,5-dimethyl-4-propylheptane : CCCC(CC(C)C)C(C)CC
2,5-dimethyl-5-ethylheptane : CCC(C)(CC)CCC(C)C
2,5-dimethyl-5-ethyloctane : CCCC(C)(CC)CCC(C)C
2,5-dimethyl-6-ethyloctane : CCC(CC)C(C)CCC(C)C
2,5-dimethyl-6-ethylphenol : CCc1c(C)ccc(C)c1O
2,5-dimethyl-cis-3-hexene : CC(C)\C=C/C(C)C
2,5-dimethyl-trans-3-hexene : CC(C)\C=C\C(C)C
2,5-dimethylaniline : Cc1ccc(C)c(N)c1
2,5-dimethylbiphenyl : Cc1ccc(C)c(c1)c2ccccc2
2,5-dimethyldecane : CCCCCC(C)CCC(C)C
2,5-dimethylfuran : Cc1oc(C)cc1
2,5-dimethylheptane : CCC(C)CCC(C)C
2,5-dimethylhexane : CC(C)CCC(C)C
2,5-dimethylnonane : CCCCC(C)CCC(C)C
2,5-dimethyloctane : CCCC(C)CCC(C)C
2,5-dimethylpyridine : Cc1ccc(C)nc1
2,5-dimethylquinoline : Cc1ccc2c(C)cccc2n1
2,5-dimethyltetrahydrofuran : CC1CCC(C)O1
2,5-dimethylthiophene : Cc1sc(C)cc1
2,5-dimethylthiophenol : Cc1ccc(C)c(S)c1
2,5-dinitrotoluene : Cc1cc(cc

2-chloro-1-butene : CCC(Cl)=C
2-chloro-1-heptene : CCCCCC(Cl)=C
2-chloro-1-hexene : CCCCC(Cl)=C
2-chloro-1-octene : CCCCCCC(Cl)=C
2-chloro-1-pentene : CCCC(Cl)=C
2-chloro-1-propene : CC(Cl)=C
2-chloro-2,3,3-trimethylbutane : CC(C)(C)C(C)(C)Cl
2-chloro-2,3-dimethylbutane : CC(C)C(C)(C)Cl
2-chloro-2,3-dimethylpentane : CCC(C)C(C)(C)Cl
2-chloro-2,4,4-trimethylpentane : CC(C)(C)CC(C)(C)Cl
2-chloro-2,4-dimethylpentane : CC(C)CC(C)(C)Cl
2-chloro-2,5-dimethylhexane : CC(C)CCC(C)(C)Cl
2-chloro-2-butene : CC=C(C)Cl
2-chloro-2-methylheptane : CCCCCC(C)(C)Cl
2-chloro-2-methylhexane : CCCCC(C)(C)Cl
2-chloro-2-methylpentane : CCCC(C)(C)Cl
2-chloro-2-methylpropane : CC(C)(C)Cl
2-chloro-2-octene : CCCCCC=C(C)Cl
2-chloro-2-pentene : CCC=C(C)Cl
2-chloro-3,3-dimethylbutane : CC(Cl)C(C)(C)C
2-chloro-3-methyl-1-butene : CC(C)C(Cl)=C
2-chloro-3-methyl-2-butene : CC(C)=C(C)Cl
2-chloro-3-methylpentane : CCC(C)C(C)Cl
2-chloro-4-methylpentane : CC(C)CC(C)Cl
2-chloro-5-methylhexane : CC(C)CCC(C)Cl
2-chloro-6-me

2-methyl-1-hexanol : CCCCC(C)CO
2-methyl-1-hexanol, (±) did not work
2-methyl-1-hexene : CCCCC(C)=C
2-methyl-1-isopropylnaphthalene : CC(C)c1c(C)ccc2ccccc12
2-methyl-1-nonadecene : CCCCCCCCCCCCCCCCCC(C)=C
2-methyl-1-nonanol : CCCCCCCC(C)CO
2-methyl-1-nonene : CCCCCCCC(C)=C
2-methyl-1-octadecene : CCCCCCCCCCCCCCCCC(C)=C
2-methyl-1-octanol : CCCCCCC(C)CO
2-methyl-1-octene : CCCCCCC(C)=C
2-methyl-1-pentadecanol : CCCCCCCCCCCCCC(C)CO
2-methyl-1-pentadecene : CCCCCCCCCCCCCC(C)=C
2-methyl-1-pentanol : CCCC(C)CO
2-methyl-1-pentene : CCCC(C)=C
2-methyl-1-pentyl acetate : CCCC(C)COC(C)=O
2-methyl-1-propene, tetramer did not work
2-methyl-1-propoxypropane : CCCOCC(C)C
2-methyl-1-propylnaphthalene : CCCc1c(C)ccc2ccccc12
2-methyl-1-tetradecene : CCCCCCCCCCCCC(C)=C
2-methyl-1-tridecene : CCCCCCCCCCCC(C)=C
2-methyl-1-undecanol : CCCCCCCCCC(C)CO
2-methyl-1-undecene : CCCCCCCCCC(C)=C
2-methyl-2,3-butanediol : CC(O)C(C)(C)O
2-methyl-2,3-pentadiene : C[CH]=[C]=[C](C)C
2-methyl-2,3-pentanediol : CCC(O)C(

2-nitro-2-methylpentane : CCCC(C)(C)[N+]([O-])=O
2-nitro-2-methylpropane : CC(C)(C)[N+]([O-])=O
2-nitro-3,3-dimethylbutane : CC([N+]([O-])=O)C(C)(C)C
2-nitro-3-methylpentane : CCC(C)C(C)[N+]([O-])=O
2-nitro-4-methylpentane : CC(C)CC(C)[N+]([O-])=O
2-nitro-N-phenylaniline : [O-][N+](=O)c1ccccc1Nc2ccccc2
2-nitrobutane : CCC(C)[N+]([O-])=O
2-nitrobutane, (±) did not work
2-nitrohexane : CCCCC(C)[N+]([O-])=O
2-nitrooctane, (±) did not work
2-nitropropane : CC(C)[N+]([O-])=O
2-nonadecanethiol : CCCCCCCCCCCCCCCCCC(C)S
2-nonadecanol : CCCCCCCCCCCCCCCCCC(C)O
2-nonadecanone : CCCCCCCCCCCCCCCCCC(C)=O
2-nonadecyne : CCCCCCCCCCCCCCCCC#CC
2-nonanethiol : CCCCCCCC(C)S
2-nonanol : CCCCCCCC(C)O
2-nonanol, (±) did not work
2-nonanone : CCCCCCCC(C)=O
2-nonyl-[ 1,2,3,4-tetrahydronaphthalene] : CCCCCCCCCC1CCc2ccccc2C1
2-nonylnaphthalene : CCCCCCCCCc1ccc2ccccc2c1
2-nonyne : CCCCCCC#CC
2-norbornene : C1CC2CC1C=C2
2-octadecanethiol : CCCCCCCCCCCCCCCCC(C)S
2-octadecanol : CCCCCCCCCCCCCCCCC(C)O
2-octadecanone 

3,4,4,5-tetramethylheptane : CCC(C)C(C)(C)C(C)CC
3,4,4,5-tetramethyloctane : CCCC(C)C(C)(C)C(C)CC
3,4,4,6-tetramethyloctane : CCC(C)CC(C)(C)C(C)CC
3,4,4-trimethyl-1-hexanol : CCC(C)(C)C(C)CCO
3,4,4-trimethyl-1-pentanol : CC(CCO)C(C)(C)C
3,4,4-trimethyl-1-pentene : CC(C=C)C(C)(C)C
3,4,4-trimethyl-2-pentanol : CC(O)C(C)C(C)(C)C
3,4,4-trimethyl-2-pentene : CC=C(C)C(C)(C)C
3,4,4-trimethyl-3-ethylheptane : CCCC(C)(C)C(C)(CC)CC
3,4,4-trimethyl-3-hexanol : CCC(C)(C)C(C)(O)CC
3,4,4-trimethyl-5-ethylheptane : CCC(C)C(C)(C)C(CC)CC
3,4,4-trimethyl-cis-2-pentene did not work
3,4,4-trimethyl-trans-2-pentene did not work
3,4,4-trimethylheptane : CCCC(C)(C)C(C)CC
3,4,4-trimethylnonane : CCCCCC(C)(C)C(C)CC
3,4,4-trimethyloctane : CCCCC(C)(C)C(C)CC
3,4,5,5-tetramethyl-3-hexanol : CCC(C)(O)C(C)C(C)(C)C
3,4,5,5-tetramethyloctane : CCCC(C)(C)C(C)C(C)CC
3,4,5,6-tetramethyloctane : CCC(C)C(C)C(C)C(C)CC
3,4,5-trimethyl-3-ethylheptane : CCC(C)C(C)C(C)(CC)CC
3,4,5-trimethyl-4-ethylheptane : CCC(C)C(C)(CC)C(C)C

3-amino-1-propanol : NCCCO
3-amino-2-butanol : CC(N)C(C)O
3-amino-2-methylpentane : CCC(N)C(C)C
3-amino-3-methylpentane : CCC(C)(N)CC
3-aminoheptane : CCCCC(N)CC
3-aminohexane : CCCC(N)CC
3-bromo-1-butene : CC(Br)C=C
3-bromo-1-pentene : CCC(Br)C=C
3-bromo-1-propene : BrCC=C
3-bromo-2-ethyl-1-propene : CCC(=C)CBr
3-bromo-2-methyl-1-butene : CC(Br)C(C)=C
3-bromo-2-methyl-1-propene : CC(=C)CBr
3-bromo-2-methylpentane : CCC(Br)C(C)C
3-bromo-2-pentene : CCC(Br)=CC
3-bromo-3-methyl-1-butene : CC(C)(Br)C=C
3-bromo-3-methylpentane : CCC(C)(Br)CC
3-bromo-cis-2-pentene did not work
3-bromo-trans-2-pentene did not work
3-bromohexane : CCCC(Br)CC
3-bromopentane : CCC(Br)CC
3-butenylbenzene : CCC=Cc1ccccc1
3-butylphenol : CCCCc1cccc(O)c1
3-chloro-1-butene : CC(Cl)C=C
3-chloro-1-pentene : CCC(Cl)C=C
3-chloro-1-propene : ClCC=C
3-chloro-2,2,3-trimethylhexane : CCCC(C)(Cl)C(C)(C)C
3-chloro-2,3-dimethylhexane : CCCC(C)(Cl)C(C)C
3-chloro-2,3-dimethylpentane : CCC(C)(Cl)C(C)C
3-chloro-2-(chloromethyl)-1-

3-methyl-2-hexanol : CCCC(C)C(C)O
3-methyl-2-hexanone : CCCC(C)C(C)=O
3-methyl-2-isopropyl-1-butanol : CC(C)C(CO)C(C)C
3-methyl-2-isopropyl-1-butene : CC(C)C(=C)C(C)C
3-methyl-2-isopropylnaphthalene : CC(C)c1cc2ccccc2cc1C
3-methyl-2-isopropylphenol : CC(C)c1c(C)cccc1O
3-methyl-2-nonanol : CCCCCCC(C)C(C)O
3-methyl-2-octanol : CCCCCC(C)C(C)O
3-methyl-2-octanone : CCCCCC(C)C(C)=O
3-methyl-2-octene : CCCCCC(C)=CC
3-methyl-2-pentanol : CCC(C)C(C)O
3-methyl-2-pentanone : CCC(C)C(C)=O
3-methyl-2-pentanone, (±) did not work
3-methyl-2-pentene, cis and trans did not work
3-methyl-2-propylnaphthalene : CCCc1cc2ccccc2cc1C
3-methyl-2-propylphenol : CCCc1c(C)cccc1O
3-methyl-2-thiahexane : CCCC(C)SC
3-methyl-2-thiapentane : CCC(C)SC
3-methyl-3,4-diethylheptane : CCCC(CC)C(C)(CC)CC
3-methyl-3,4-diethylhexane : CCC(CC)C(C)(CC)CC
3-methyl-3,5-diethylheptane : CCC(CC)CC(C)(CC)CC
3-methyl-3-butenoic acid : CC(=C)CC(O)=O
3-methyl-3-ethyl-1-pentanol : CCC(C)(CC)CCO
3-methyl-3-ethyl-1-pentene : CCC(C)(CC)C=

4,5-dibromooctane : CCCC(Br)C(Br)CCC
4,5-diethyloctane : CCCC(CC)C(CC)CCC
4,5-dimethyl-1-hexanol : CC(C)C(C)CCCO
4,5-dimethyl-1-hexene : CC(C)C(C)CC=C
4,5-dimethyl-1-octanol : CCCC(C)C(C)CCCO
4,5-dimethyl-2-hexanol : CC(C)C(C)CC(C)O
4,5-dimethyl-2-hexene : CC=CC(C)C(C)C
4,5-dimethyl-3-ethyloctane : CCCC(C)C(C)C(CC)CC
4,5-dimethyl-3-hexanol : CCC(O)C(C)C(C)C
4,5-dimethyl-4-ethyloctane : CCCC(C)C(C)(CC)CCC
4,5-dimethyl-cis-2-hexene : C\C=C/C(C)C(C)C
4,5-dimethyl-trans-2-hexene : C/C=C/C(C)C(C)C
4,5-dimethyldecane : CCCCCC(C)C(C)CCC
4,5-dimethylnonane : CCCCC(C)C(C)CCC
4,5-dimethyloctane : CCCC(C)C(C)CCC
4,5-dimethylquinoline : Cc1cccc2nccc(C)c12
4,6,6-trimethyl-2-heptanol : CC(O)CC(C)CC(C)(C)C
4,6-dimethyl-1-ethylnaphthalene : CCc1ccc(C)c2cc(C)ccc12
4,6-dimethyl-1-heptanol : CC(C)CC(C)CCCO
4,6-dimethyl-1-octanol : CCC(C)CC(C)CCCO
4,6-dimethyl-2-heptanol : CC(C)CC(C)CC(C)O
4,6-dimethyl-4-octanol : CCCC(C)(O)CC(C)CC
4,6-dimethyldecane : CCCCC(C)CC(C)CCC
4,6-dimethylnonane : CCCC(C)CC(C)CCC

4-methyl-6-ethyloctane : CCCC(C)CC(CC)CC
4-methyl-7-ethylnonane : CCCC(C)CCC(CC)CC
4-methyl-N,N-dimethylaniline : CN(C)c1ccc(C)cc1
4-methyl-N-isopropylaniline : CC(C)Nc1ccc(C)cc1
4-methyl-N-propylaniline : CCCNc1ccc(C)cc1
4-methyl-cis-2-heptene : CCCC(C)\C=C/C
4-methyl-cis-2-hexene : CCC(C)\C=C/C
4-methyl-cis-2-pentene : C\C=C/C(C)C
4-methyl-cis-3-heptene did not work
4-methyl-gamma-butyrolactone : CC1CCC(=O)O1
4-methyl-trans-2-heptene : CCCC(C)\C=C\C
4-methyl-trans-2-hexene : CCC(C)\C=C\C
4-methyl-trans-2-pentene : C/C=C/C(C)C
4-methyl-trans-3-heptene did not work
4-methylbenzenemethanol : Cc1ccc(CO)cc1
4-methylbiphenyl : Cc1ccc(cc1)c2ccccc2
4-methylcyclohexanol; (cis+trans) did not work
4-methylcyclohexene : CC1CCC=CC1
4-methylcyclohexylamine : CC1CCC(N)CC1
4-methylcyclopentene : CC1CC=CC1
4-methyldecane : CCCCCCC(C)CCC
4-methyldiphenylmethane : Cc1ccc(Cc2ccccc2)cc1
4-methylheptane : CCCC(C)CCC
4-methylhexanoic acid, (±) did not work
4-methylisoquinoline : Cc1cncc2ccccc12
4-methylnon

6-methyl-3-octanol : CCC(C)CCC(O)CC
6-methyl-4-octanol : CCCC(O)CC(C)CC
6-methyl-6-pentadecanol : CCCCCCCCCC(C)(O)CCCCC
6-methyl-N-isopentyl-2-heptanamine : CC(C)CCCC(C)NCCC(C)C
6-methyl-cis-2-heptene : C\C=C/CCC(C)C
6-methyl-cis-3-heptene : CC\C=C/CC(C)C
6-methyl-trans-2-heptene : C/C=C/CCC(C)C
6-methyl-trans-3-heptene : CC/C=C/CC(C)C
6-methyldecanoic acid : CCCCC(C)CCCCC(O)=O
6-methylisoquinoline : Cc1ccc2cnccc2c1
6-methylquinoline : Cc1ccc2ncccc2c1
6-methylundecane : CCCCCC(C)CCCCC
6-n-amyl-m-cresol : CCCCCc1ccc(C)cc1O
6-undecanol : CCCCCC(O)CCCCC
6-undecanone : CCCCCC(=O)CCCCC
7,7-dimethyl-1-octanol : CC(C)(C)CCCCCCO
7-ethyl-2-methyl-4-undecanone : CCCCC(CC)CCC(=O)CC(C)C
7-hexyltridecane : CCCCCCC(CCCCCC)CCCCCC
7-methyl-1-isopropylnaphthalene : CC(C)c1cccc2ccc(C)cc12
7-methyl-1-nonanol : CCC(C)CCCCCCO
7-methyl-1-octanol : CC(C)CCCCCCO
7-methyl-1-octene : CC(C)CCCCC=C
7-methyl-1H-indene : Cc1cccc2C=CCc12
7-methyl-2-nonanol : CCC(C)CCCCC(C)O
7-methyl-2-octanol : CC(C)CCCCC(C)O
7-meth

benzyl benzoate : O=C(OCc1ccccc1)c2ccccc2
benzyl chloride : ClCc1ccccc1
benzyl dichloride : ClC(Cl)c1ccccc1
benzyl ethyl ether : CCOCc1ccccc1
benzyl formate : O=COCc1ccccc1
benzylamine : NCc1ccccc1
beryllium bromide : [Be++].[Br-].[Br-]
beryllium chloride : [Be++].[Cl-].[Cl-]
beryllium iodide : [Be++].[I-].[I-]
beta-phellandrene : CC(C)C1CCC(=C)C=C1
beta-pinene : CC1(C)C2CCC(=C)C1C2
beta-pinene, (1R) did not work
beta-propiolactone : O=C1CCO1
biacetylene : C#CC#C
bicyclohexyl : C1CCC(CC1)C2CCCCC2
bis(2-chloroethyl) ether : ClCCOCCCl
bis(2-ethylhexyl) adipate : CCCCC(CC)COC(=O)CCCCC(=O)OCC(CC)CCCC
bis(2-ethylhexyl) ether : CCCCC(CC)COCC(CC)CCCC
bis(2-ethylhexyl) terephthalate : CCCCC(CC)COC(=O)c1ccc(cc1)C(=O)OCC(CC)CCCC
bis(2-ethylhexyl)amine : CCCCC(CC)CNCC(CC)CCCC
bis(2-methylbutyl) sulfide : CCC(C)CSCC(C)CC
bis(chloromethyl) ether : ClCOCCl
bis(cyanoethyl) ether : N#CCCOCCC#N
bis(isopropyl)naphthalene : CC(C)c1ccc2ccccc2c1C(C)C
bis(methylthio)methane : CSCSC
bismuth pentafluoride : F

cis-1-iodo-1-hexene : CCCC\C=C/I
cis-1-iodo-1-nonadecene : CCCCCCCCCCCCCCCCC\C=C/I
cis-1-iodo-1-nonene : CCCCCCC\C=C/I
cis-1-iodo-1-octadecene : CCCCCCCCCCCCCCCC\C=C/I
cis-1-iodo-1-octene : CCCCCC\C=C/I
cis-1-iodo-1-pentadecene : CCCCCCCCCCCCC\C=C/I
cis-1-iodo-1-pentene : CCC\C=C/I
cis-1-iodo-1-propene : C\C=C/I
cis-1-iodo-1-tetradecene : CCCCCCCCCCCC\C=C/I
cis-1-iodo-1-tridecene : CCCCCCCCCCC\C=C/I
cis-1-iodo-1-undecene : CCCCCCCCC\C=C/I
cis-1-iodo-2-methyl-1-butene did not work
cis-1-iodo-3-methyl-1-butene : CC(C)\C=C/I
cis-1-methyl-2-(1-propenyl)benzene : C\C=C/c1ccccc1C
cis-1-methyl-3-(1-propenyl)benzene : C\C=C/c1cccc(C)c1
cis-1-methyl-4-(1-propenyl)benzene : C\C=C/c1ccc(C)cc1
cis-1-methyl-4-isopropylcyclohexane : C[C@@H]1CC[C@@H](CC1)C(C)C
cis-1-propenylbenzene : C\C=C/c1ccccc1
cis-2,3-dichloro-2-butene did not work
cis-2,3-diphenyl-2-butene did not work
cis-2,cis-4-hexadiene did not work
cis-2,trans-4-hexadiene did not work
cis-2-butene : C\C=C/C
cis-2-butenoic acid : C\C=C/C(O)

diethylene glycol ethyl ether acetate : CCOCCOCCOC(C)=O
diethylene glycol monobutyl ether : CCCCOCCOCCO
diethylene glycol monobutyl ether acetate : CCCCOCCOCCOC(C)=O
diethylene glycol monopropyl ether : CCCOCCOCCO
diethylene triamine : NCCNCCN
diethylheneicosylamine : CCCCCCCCCCCCCCCCCCCCCN(CC)CC
diethylheptadecylamine : CCCCCCCCCCCCCCCCCN(CC)CC
diethylheptylamine : CCCCCCCN(CC)CC
diethylhexadecylamine : CCCCCCCCCCCCCCCCN(CC)CC
diethylhexylamine : CCCCCCN(CC)CC
diethylisopropylamine : CCN(CC)C(C)C
diethylnonadecylamine : CCCCCCCCCCCCCCCCCCCN(CC)CC
diethylnonylamine : CCCCCCCCCN(CC)CC
diethyloctadecylamine : CCCCCCCCCCCCCCCCCCN(CC)CC
diethyloctylamine : CCCCCCCCN(CC)CC
diethylpentadecylamine : CCCCCCCCCCCCCCCN(CC)CC
diethylpentylamine : CCCCCN(CC)CC
diethylpropylamine : CCCN(CC)CC
diethyltetradecylamine : CCCCCCCCCCCCCCN(CC)CC
diethyltridecylamine : CCCCCCCCCCCCCN(CC)CC
diethylundecylamine : CCCCCCCCCCCN(CC)CC
difluoromethane : FCF
digermane : [GeH3][GeH3]
diglycolic acid : OC(=O)COCC(O

ethyl mercaptan : CCS
ethyl methacrylate : CCOC(=O)C(C)=C
ethyl methyl sulfate : CCO[S](=O)(=O)OC
ethyl nonanoate : CCCCCCCCC(=O)OCC
ethyl nonyl sulfide : CCCCCCCCCSCC
ethyl octadecyl sulfide : CCCCCCCCCCCCCCCCCCSCC
ethyl octanoate : CCCCCCCC(=O)OCC
ethyl octyl ether : CCCCCCCCOCC
ethyl octyl sulfide : CCCCCCCCSCC
ethyl pentadecyl sulfide : CCCCCCCCCCCCCCCSCC
ethyl pentanoate : CCCCC(=O)OCC
ethyl pentyl ether : CCCCCOCC
ethyl pentyl sulfide : CCCCCSCC
ethyl propanoate : CCOC(=O)CC
ethyl propenyl ether : CCOC=CC
ethyl propyl ether : CCCOCC
ethyl propyl sulfide : CCCSCC
ethyl sec-butyl ether : CCOC(C)CC
ethyl sulfate : CCO[S]([O-])(=O)=O
ethyl tert-butyl ether : CCOC(C)(C)C
ethyl tetradecyl sulfide : CCCCCCCCCCCCCCSCC
ethyl trans-2-butenoate : CCOC(=O)\C=C\C
ethyl trans-2-methyl-2-butenoate : CCOC(=O)C(/C)=C/C
ethyl tridecyl sulfide : CCCCCCCCCCCCCSCC
ethyl undecanoate : CCCCCCCCCCC(=O)OCC
ethyl undecyl sulfide : CCCCCCCCCCCSCC
ethyl vanillin : CCOc1cc(C=O)ccc1O
ethyl vinyl ether : CCOC=

isobutyl isobutyrate : CC(C)COC(=O)C(C)C
isobutyl isocyanate : CC(C)CN=C=O
isobutyl isocyanide : CC(C)C[N+]#[C-]
isobutyl mercaptan : CC(C)CS
isobutyl methacrylate : CC(C)COC(=O)C(C)=C
isobutyl pentanoate : CCCCC(=O)OCC(C)C
isobutyl propanoate : CCC(=O)OCC(C)C
isobutyl vinyl ether : CC(C)COC=C
isobutylamine : CC(C)CN
isobutylbenzene : CC(C)Cc1ccccc1
isobutylcyclohexane : CC(C)CC1CCCCC1
isobutylcyclopentane : CC(C)CC1CCCC1
isobutyraldehyde : CC(C)C=O
isobutyric acid : CC(C)C(O)=O
isobutyronitrile : CC(C)C#N
isodecanal : CC(C)CCCCCCC=O
isodecanol : CC(C)CCCCCCCO
isooctanol : CC(C)CCCCCO
isopentyl 2-methylpropanoate : CC(C)CCOC(=O)C(C)C
isopentyl acetate : CC(C)CCOC(C)=O
isopentyl formate : CC(C)CCOC=O
isopentyl hexanoate : CCCCCC(=O)OCCC(C)C
isopentyl isovalerate : CC(C)CCOC(=O)CC(C)C
isopentyl pentanoate : CCCCC(=O)OCCC(C)C
isopentyl propanoate : CCC(=O)OCCC(C)C
isopentylcyclohexane : CC(C)CCC1CCCCC1
isophorone : CC1=CC(=O)CC(C)(C)C1
isophorone diisocyanate : CC1(C)CC(CC(C)(CN=C=O)C1)N=

methylcyclopentane : CC1CCCC1
methylcyclopropane : CC1CC1
methyldecylamine : CCCCCCCCCCNC
methyldiethylamine : CCN(C)CC
methyldiisopropylamine : CC(C)N(C)C(C)C
methyldioctylamine : CCCCCCCCN(C)CCCCCCCC
methyldipropylamine : CCCN(C)CCC
methyldocosylamine : CCCCCCCCCCCCCCCCCCCCCCNC
methyldodecylamine : CCCCCCCCCCCCNC
methyleicosylamine : CCCCCCCCCCCCCCCCCCCCNC
methylethanolamine : CC(O)CN
methylethyl-sec-butylamine : CCC(C)N(C)CC
methylethyl-tert-butylamine : CCN(C)C(C)(C)C
methylethylamine : CCNC
methylethylbutylamine : CCCCN(C)CC
methylethylisobutylamine : CCN(C)CC(C)C
methylethylisopropylamine : CCN(C)C(C)C
methylethylpropylamine : CCCN(C)CC
methylglutaronitrile : CC(CCC#N)C#N
methylheneicosylamine : CCCCCCCCCCCCCCCCCCCCCNC
methylheptadecylamine : CCCCCCCCCCCCCCCCCNC
methylheptylamine : CCCCCCCNC
methylhexadecylamine : CCCCCCCCCCCCCCCCNC
methylhexylamine : CCCCCCNC
methylisobutylamine : CNCC(C)C
methylisopropylamine : CNC(C)C
methylnaphthalene : Cc1cccc2ccccc12
methylnonadecylamine : 

pentadecylamine : CCCCCCCCCCCCCCCN
pentadecylbenzene : CCCCCCCCCCCCCCCc1ccccc1
pentadecylcyclohexane : CCCCCCCCCCCCCCCC1CCCCC1
pentadecylcyclopentane : CCCCCCCCCCCCCCCC1CCCC1
pentaethylbenzene : CCc1cc(CC)c(CC)c(CC)c1CC
pentafluoroethane : FC(F)C(F)(F)F
pentafluoroethyl trifluorovinyl ether : FC(F)=C(F)OC(F)(F)C(F)(F)F
pentamethylbenzene : Cc1cc(C)c(C)c(C)c1C
pentamethylphenol : Cc1c(C)c(C)c(O)c(C)c1C
pentanal : CCCCC=O
pentanedial : O=CCCCC=O
pentanoic acid : CCCCC(O)=O
pentanol : CCCCCO
pentyl acetate : CCCCCOC(C)=O
pentyl butanoate : CCCCCOC(=O)CCC
pentyl formate : CCCCCOC=O
pentyl heptanoate : CCCCCCC(=O)OCCCCC
pentyl hexanoate : CCCCCOC(=O)CCCCC
pentyl mercaptan : CCCCCS
pentyl octanoate : CCCCCCCC(=O)OCCCCC
pentyl pentanoate : CCCCCOC(=O)CCCC
pentyl propanoate : CCCCCOC(=O)CC
pentylamine : CCCCCN
pentylbenzene : CCCCCc1ccccc1
pentylcyclohexane : CCCCCC1CCCCC1
pentylcyclopentane : CCCCCC1CCCC1
perchloric acid : O[Cl](=O)(=O)=O
perchloryl fluoride : F[Cl](=O)(=O)=O
perfluoroethyl m

tetracosylamine : CCCCCCCCCCCCCCCCCCCCCCCCN
tetradecamethylhexasiloxane : C[Si](C)(C)O[Si](C)(C)O[Si](C)(C)O[Si](C)(C)O[Si](C)(C)O[Si](C)(C)C
tetradecanal : CCCCCCCCCCCCCC=O
tetradecane : CCCCCCCCCCCCCC
tetradecanenitrile : CCCCCCCCCCCCCC#N
tetradecanoic acid : CCCCCCCCCCCCCC(O)=O
tetradecanol; mixed isomers did not work
tetradecylamine : CCCCCCCCCCCCCCN
tetradecylcyclohexane : CCCCCCCCCCCCCCC1CCCCC1
tetradecylcyclopentane : CCCCCCCCCCCCCCC1CCCC1
tetraethyl lead : CC[Pb](CC)(CC)CC
tetraethylene glycol : OCCOCCOCCOCCO
tetraethylene glycol dimethyl ether : COCCOCCOCCOCCOC
tetraethylsilane : CC[Si](CC)(CC)CC
tetrafluorodimethyl ether did not work
tetrafluoroethylene : FC(F)=C(F)F
tetrafluorohydrazine : FN(F)N(F)F
tetrahydrofuran : C1CCOC1
tetrahydrofurfuryl alcohol : OCC1CCCO1
tetrahydroneral did not work
tetrahydrothiophene : C1CCSC1
tetrakis(trimethylsilyloxy)silane : C[Si](C)(C)O[Si](O[Si](C)(C)C)(O[Si](C)(C)C)O[Si](C)(C)C
tetramethylsilane : C[Si](C)(C)C
tetranitromethane : [O-][N+](=

trans-5-undecene : CCCCC/C=C/CCCC
trans-7-tetradecene : CCCCCC\C=C\CCCCCC
trans-cinnamic acid : OC(=O)\C=C\c1ccccc1
trans-crotonaldehyde : C/C=C/C=O
trans-crotonitrile did not work
trans-decahydronaphthalene : C1CC[C@@H]2CCCC[C@H]2C1
trans-dicyano-1-butene did not work
trans-stilbene : c1ccc(cc1)\C=C\c2ccccc2
tributyl borate : CCCCOB(OCCCC)OCCCC
tributylamine : CCCCN(CCCC)CCCC
trichloroacetaldehyde : ClC(Cl)(Cl)C=O
trichloroacetic acid : OC(=O)C(Cl)(Cl)Cl
trichloroacetyl chloride : ClC(=O)C(Cl)(Cl)Cl
trichlorobenzene : Clc1cccc(Cl)c1Cl
trichloroethylene : ClC=C(Cl)Cl
trichloroethylsilane : [SiH3]CC(Cl)(Cl)Cl
trichlorofluoromethane : FC(Cl)(Cl)Cl
trichlorophenylsilane : Cl[Si](Cl)(Cl)c1ccccc1
trichlorovinylsilane : [SiH3]C(Cl)=C(Cl)Cl
tricosane : CCCCCCCCCCCCCCCCCCCCCCC
tricosylamine : CCCCCCCCCCCCCCCCCCCCCCCN
tricyclo[3.3.1.13,7]decane : C1C2CC3CC1CC(C2)C3
tridecane : CCCCCCCCCCCCC
tridecanenitrile : CCCCCCCCCCCCC#N
tridecanoic acid : CCCCCCCCCCCCC(O)=O
tridecyl formate : CCCCCCCCCCCCC

In [8]:
data_SMILES.to_csv('boiling_data_smiles.csv',index=False)

In [10]:
len(data_SMILES[data_SMILES['SMILES'] == '-'])
#386 Failed Chemicals

386

#### Method 2: Using PubChemPy

In [14]:
import pubchempy as pcp

Playing around with the package:

In [15]:
results_aspirin = pcp.get_compounds('Aspirin','name')
results_pinene = pcp.get_compounds('(+)-a-pinene','name')

In [16]:
print(results_aspirin)
print(results_aspirin[0].isomeric_smiles)
#Empty list suggests no results were found.
#Try running all 6000 chemicals into this and find all chemicals with no result

[Compound(2244)]
CC(=O)OC1=CC=CC=C1C(=O)O


In [None]:
#If pubchempy method is to be used for whole data set, this block will be used.
failed_pubchempy = []
SMILES_pubchempy = []
more_than_1 = []

for chemical in boiling_data['name']:
    res = pcp.get_compounds(chemical,'name')

    if len(res) == 1:
        SMILES_pubchempy.append([chemical, res[0].isomeric_smiles])
        print(f'{chemical} : {res[0].isomeric_smiles}')
    elif len(res) == 0:
        failed_pubchempy.append(chemical)
        print(f'{chemical} failed!')
    else:      
        more_than_1.append(chemical)
        print(f'{chemical}: Found more than 1 result')
print(len(more_than_1))
print(len(failed_pubchempy))

In [23]:
def get_SMILES_pubchempy(chemical):
    res = pcp.get_compounds(chemical,'name')

    try:
        print(f'{chemical} : {res[0].isomeric_smiles}')
        return str(res[0].isomeric_smiles)
    except:
        print(f'{chemical} failed!')
        return '-'

#### Now: Combine the 2 methods together
I took the failed chemicals list from CIR and then tried to put them through pubchempy to maximise the number of chemicals I can work with

In [18]:
data_SMILES = pd.read_csv('boiling_data_smiles.csv')

In [20]:
failed_indices = data_SMILES.index[data_SMILES['SMILES'] == '-'].tolist()

In [24]:
len(failed_indices)

386

In [29]:
failed_chemicals = data_SMILES.iloc[failed_indices]['name']

In [30]:
failed_chemicals

0             (+)-a-pinene
2             (-)-a-pinene
3        (-)-b-citronellol
21       (1S)-(-)-b-pinene
28        (<+->)-2-butanol
               ...        
5958    triethylene glycol
6011              vanillin
6027               xylenes
6028     zirconium bromide
6030      zirconium iodide
Name: name, Length: 386, dtype: object

In [33]:
for i,chemical in enumerate(failed_chemicals):
    fail_index = failed_indices[i]
    data_SMILES.at[fail_index, 'SMILES']=get_SMILES_pubchempy(chemical)

(+)-a-pinene failed!
(-)-a-pinene : CC1=CC[C@H]2C[C@@H]1C2(C)C
(-)-b-citronellol failed!
(1S)-(-)-b-pinene failed!
(<+->)-2-butanol failed!
(<+->)-2-chlorooctane failed!
(R)-(+)-b-citronellol : C[C@H](CCC=C(C)C)CCO
(±)-a-pinene failed!
(±)-b-citronellol failed!
(±)-glycidol failed!
1,1',1''-(1-ethanyl-2-ylidene)trisbenzene failed!
1,1,3-trimethylcyclopentane : CC1CCC(C1)(C)C
1,1,4-tribromobutane : C(CC(Br)Br)CBr
1,1,4-trichlorobutane : C(CC(Cl)Cl)CCl
1,1,4-trifluorobutane : C(CC(F)F)CF
1,1,4-triiodobutane : C(CC(I)I)CI
1,1,4-trimethylcyclohexane : CC1CCC(CC1)(C)C
1,1-diphenylundecane : CCCCCCCCCCC(C1=CC=CC=C1)C2=CC=CC=C2
1,11-dibromoundecane : C(CCCCCBr)CCCCCBr
1,12-dibromododecane : C(CCCCCCBr)CCCCCBr
1,2,3-benzenetriol : C1=CC(=C(C(=C1)O)O)O
1,2,4-trichloro-5-methylbenzene : CC1=CC(=C(C=C1Cl)Cl)Cl
1,2-dichloro-1,2,3,3,4,4-hexafluorocyclobutane : C1(C(C(C1(F)Cl)(F)Cl)(F)F)(F)F
1,2-dichloro-3-methylbutane : CC(C)C(CCl)Cl
1,2-diiodobutane : CCC(CI)I
1,2-dimethylpropylamine : CC(C)C(C)N


3,4-dimethyl-4-octanol failed!
3,5-diethylheptane : CCC(CC)CC(CC)CC
3,5-dimethyl-3-hexanol : CCC(C)(CC(C)C)O
3,5-dimethyl-3-hexanol, (±) failed!
3,6-diethyloctane : CCC(CC)CCC(CC)CC
3,6-dimethyl-4-ethyloctane failed!
3,7-dimethyl-1-octanol, (±) failed!
3,7-dimethyl-1-octene : CC(C)CCCC(C)C=C
3,7-dimethyl-2-octanol : CC(C)CCCC(C)C(C)O
3,7-dimethyl-3-octanol : CCC(C)(CCCC(C)C)O
3,7-dimethyl-3-octanol, (±) failed!
3,7-dimethyldecane : CCCC(C)CCCC(C)CC
3,7-dimethylnonane : CCC(C)CCCC(C)CC
3,7-dimethyloctanyl acetate : CC(C)CCCC(C)CCOC(=O)C
3,7-dimethyloctanyl butyrate : CCCC(=O)OCCC(C)CCCC(C)C
3,7-dimethylquinoline : CC1=CC2=C(C=C1)C=C(C=N2)C
3,8-dimethyldecane : CCC(C)CCCCC(C)CC
3,8-dimethylquinoline : CC1=C2C(=CC=C1)C=C(C=N2)C
3,cis-4-dimethylcyclohexene failed!
3,cis-4-dimethylcyclopentene failed!
3,cis-4-dimethylthiacyclohexane failed!
3,cis-4-dimethylthiacyclopentane failed!
3-aminoheptane : CCCCC(CC)N
3-aminohexane : CCCC(CC)N
3-chloro-2-methyl-1-pentene : CCC(C(=C)C)Cl
3-chloro-2-me

In [34]:
len(data_SMILES[data_SMILES['SMILES'] == '-'])

123

Now only 123 did not return a SMILES value!

In [35]:
data_SMILES.to_csv('boiling_data_smiles.csv',index=False)