In [6]:
import requests
from rdkit import Chem

In [7]:
def get_smiles_from_name(chemical_name):
    # Convert chemical name to SMILES using PubChem
    base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
    input_url = f"{base_url}/compound/name/{chemical_name}/property/CanonicalSMILES/JSON"
    
    response = requests.get(input_url)
    if response.status_code != 200:
        return None

    data = response.json()
    if 'PropertyTable' not in data or 'Properties' not in data['PropertyTable']:
        return None

    smiles = data['PropertyTable']['Properties'][0]['CanonicalSMILES']
    return smiles

def canonicalize_smiles(smiles):
    molecule = Chem.MolFromSmiles(smiles)
    if molecule is None:
        return None
    canonical_smiles = Chem.MolToSmiles(molecule, canonical=True)
    return canonical_smiles

def get_canonical_smiles_from_name(chemical_name):
    smiles = get_smiles_from_name(chemical_name)
    if not smiles:
        return f"Error: Unable to retrieve SMILES for {chemical_name}"

    canonical_smiles = canonicalize_smiles(smiles)
    if not canonical_smiles:
        return f"Error: Unable to canonicalize SMILES for {chemical_name}"

    return canonical_smiles

In [8]:
chemical_names = ["2-propanol", "isopropanol", "n-propan-2-ol"]
canonical_smiles = {name: get_canonical_smiles_from_name(name) for name in chemical_names}

for name, smiles in canonical_smiles.items():
    print(f"The canonical SMILES for {name} is: {smiles}")

The canonical SMILES for 2-propanol is: CC(C)O
The canonical SMILES for isopropanol is: CC(C)O
The canonical SMILES for n-propan-2-ol is: CC(C)O


In [12]:
# Dictionary of chemical names to trouble shoot and verify the function of the functions above
chemical_names = {
    "Ethanol": ["Ethanol", "Ethyl alcohol", "Grain alcohol"],
    "Acetaminophen": ["Acetaminophen", "Paracetamol", "APAP", "Tylenol"],
    "Sodium bicarbonate": ["Sodium bicarbonate", "Baking soda", "Bicarbonate of soda"],
    "Aspirin": ["Aspirin", "Acetylsalicylic acid", "ASA"],
    "Methanol": ["Methanol", "Methyl alcohol", "Wood alcohol"],
    "Isopropanol": ["Isopropanol", "Isopropyl alcohol", "2-propanol", "Rubbing alcohol"],
    "Glucose": ["Glucose", "Dextrose", "Grape sugar"],
    "Sodium chloride": ["Sodium chloride", "Table salt", "Halite", "NaCl"],
    "Acetic acid": ["Acetic acid", "Ethanoic acid", "Vinegar"]
}

# Dictionary to store canonical SMILES
canonical_smiles = {}

# Populating canonical_smiles
for common_name, names in chemical_names.items():
    for name in names:
        canonical_smiles[name] = get_canonical_smiles_from_name(name)

# Reporting part
for common_name, names in chemical_names.items():
    print(f"Common name: {common_name}")
    for name in names:
        smiles = canonical_smiles.get(name, "Error: SMILES not found")
        print(f"  {name}: {smiles}")
    print()


Common name: Ethanol
  Ethanol: CCO
  Ethyl alcohol: CCO
  Grain alcohol: CCO

Common name: Acetaminophen
  Acetaminophen: CC(=O)Nc1ccc(O)cc1
  Paracetamol: CC(=O)Nc1ccc(O)cc1
  APAP: CC(=O)Nc1ccc(O)cc1
  Tylenol: CC(=O)Nc1ccc(O)cc1

Common name: Sodium bicarbonate
  Sodium bicarbonate: O=C([O-])O.[Na+]
  Baking soda: O=C([O-])O.[Na+]
  Bicarbonate of soda: O=C([O-])O.[Na+]

Common name: Aspirin
  Aspirin: CC(=O)Oc1ccccc1C(=O)O
  Acetylsalicylic acid: CC(=O)Oc1ccccc1C(=O)O
  ASA: CC(=O)Oc1ccccc1C(=O)O

Common name: Methanol
  Methanol: CO
  Methyl alcohol: CO
  Wood alcohol: CO

Common name: Isopropanol
  Isopropanol: CC(C)O
  Isopropyl alcohol: CC(C)O
  2-propanol: CC(C)O
  Rubbing alcohol: CC(C)O

Common name: Glucose
  Glucose: OCC1OC(O)C(O)C(O)C1O
  Dextrose: OCC1OC(O)C(O)C(O)C1O
  Grape sugar: OCC1OC(O)C(O)C(O)C1O

Common name: Sodium chloride
  Sodium chloride: [Cl-].[Na+]
  Table salt: [Cl-].[Na+]
  Halite: [Cl-].[Na+]
  NaCl: [Cl-].[Na+]

Common name: Acetic acid
  Acetic acid:

## Known limitations of this code: The molecular-to-SMILES code above cannot handle molecular formulas (e.g., CH3CH2OH)

In [16]:
# Example of known limitation
chemical_names = {
    "Sodium bicarbonate": ["Sodium bicarbonate", "Baking soda", "Bicarbonate of soda", "NaHCO₃"],
    "Acetic acid": ["Acetic acid", "Ethanoic acid", "Vinegar", "CH₃COOH"],
}

# Dictionary to store canonical SMILES
canonical_smiles = {}

# Populating canonical_smiles
for common_name, names in chemical_names.items():
    for name in names:
        canonical_smiles[name] = get_canonical_smiles_from_name(name)

# Reporting part
for common_name, names in chemical_names.items():
    print(f"Common name: {common_name}")
    for name in names:
        smiles = canonical_smiles.get(name, "Error: SMILES not found")
        print(f"  {name}: {smiles}")
    print()

Common name: Sodium bicarbonate
  Sodium bicarbonate: O=C([O-])O.[Na+]
  Baking soda: O=C([O-])O.[Na+]
  Bicarbonate of soda: O=C([O-])O.[Na+]
  NaHCO₃: Error: Unable to retrieve SMILES for NaHCO₃

Common name: Acetic acid
  Acetic acid: CC(=O)O
  Ethanoic acid: CC(=O)O
  Vinegar: CC(=O)O
  CH₃COOH: Error: Unable to retrieve SMILES for CH₃COOH

