<a href="https://colab.research.google.com/github/ryan-saloma/teaching-python/blob/main/MoleculeDimensionEstimation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install rdkit-pypi &> /dev/null

# Make necessary imports
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
import re

# Estimate the dimensions of a molecule from its SMILES string
def estimateDimensionsOfMolecule(smilesString):

  # Check that input is a string
  if not isinstance(smilesString, str):
        raise TypeError("estimateDimensionsOfMolecule expects a string.")

  # Check that string doesn't contain hydrogen
  if 'H' in smilesString:
    raise TypeError("\nestimateDimensionsOfMolecule expects a string without hydrogen atoms")

  # Remove any trailing and tailing spaces or double quotes
  smilesString = re.sub(r'[\" ]', '', smilesString)

  # Method for creating RDKit molecule object from SMILES str
  molecule = Chem.MolFromSmiles(smilesString)

  # Generate a 3D conformer
  AllChem.EmbedMolecule(molecule, randomSeed=42)
  AllChem.UFFOptimizeMolecule(molecule)

  # Get the 3D coordinates of each atom in the molecule
  conf = molecule.GetConformer()

  # Extract the coordinates as a numpy array
  coordinates = np.array([
      list(conf.GetAtomPosition(i)) for i in range(conf.GetNumAtoms())
  ])

  # Calculate the bounding box dimensions
  min_coords = np.min(coordinates, axis=0)
  max_coords = np.max(coordinates, axis=0)
  dimensions = max_coords - min_coords

  return(dimensions)

# Get SMILES string
smiles = "OCC1OC(O)C(O)C(O)C1O" # glucose
smiles = input("Enter a SMILES string:")

# Estimate dimensions of molecule
try:
  length, width, height = estimateDimensionsOfMolecule(smiles)

  # Print estimated values
  print("\nEstimated dimensions of the molecule (in Angstroms)")
  print(f"Length: {length:.2f}")
  print(f"Width: {width:.2f}")
  print(f"Height: {height:.2f}")

except TypeError as e:
  print("An error occurred:", e)



