# Quantum Chemistry Demo

In [1]:
# Install PySCF and rdkit
!pip install pyscf rdkit-pypi pandas



In [2]:
# Load Libraries
from rdkit import Chem
from rdkit.Chem import AllChem
from pyscf import gto, scf
import pandas as pd
import numpy as np

**PySCF:** Python-based Simulations of Chemistry Framework (PySCF) is an ab initio computational chemistry program natively implemented in Python program language.

**RDKit** is an open-source cheminformatics toolkit implemented in Python and C++. It provides robust tools for molecular representation, structure manipulation, and property calculation, enabling efficient handling of chemical information and integration into Python-based workflows.

**NumPy** is a fundamental package for scientific computing in Python. It provides efficient support for large, multi-dimensional arrays and matrices, along with a comprehensive collection of mathematical functions to perform element-wise operations, linear algebra, Fourier analysis, and more, forming the core of many numerical workflows in computational science.

**pandas** is a powerful, open-source data analysis and manipulation library for Python. It provides high-performance data structures like DataFrame and Series, enabling intuitive handling, transformation, and analysis of structured data within scientific and statistical workflows.

In [None]:
# Define molecules
molecules = {
    "water": "O",
    "ammonia": "N",
    "hydrogen_fluoride": "[H]F",
    "carbon_monoxide": "[C-]#[O+]",  # Charge-separated CO
    "methane": "C"
}

In [9]:
results = []

for name, smiles in molecules.items():
    # Generate 3D geometry
    mol_rdkit = Chem.AddHs(Chem.MolFromSmiles(smiles))
    AllChem.EmbedMolecule(mol_rdkit, AllChem.ETKDG())
    AllChem.UFFOptimizeMolecule(mol_rdkit)
    conf = mol_rdkit.GetConformer()

    # Convert to PySCF format
    atoms = []
    for atom in mol_rdkit.GetAtoms():
        pos = conf.GetAtomPosition(atom.GetIdx())
        atoms.append(f"{atom.GetSymbol()} {pos.x:.6f} {pos.y:.6f} {pos.z:.6f}")

    # PySCF Molecule
    mol = gto.Mole()
    mol.atom = "\n".join(atoms)
    mol.basis = "def2svp"
    mol.charge = 0
    mol.spin = 0
    mol.verbose = 0
    mol.build()

    # Hartree–Fock calculation
    mf = scf.RHF(mol)
    energy = mf.kernel()

    # Dipole moment
    dip = mf.dip_moment()  # returns (dx, dy, dz)
    dip_magnitude = np.linalg.norm(dip)

    # Store results
    results.append({
        "Molecule": name,
        "HF Energy (Hartree)": energy,
        "Dipole Moment (D)": f"({dip[0]:.3f}, {dip[1]:.3f}, {dip[2]:.3f})",
        "Dipole |μ| (D)": dip_magnitude
    })

# Display as DataFrame
df = pd.DataFrame(results)
df

Running ORCA for glycine...
Finished glycine, files stored in 'outputs/glycine_files'.
Running ORCA for alanine...
Finished alanine, files stored in 'outputs/alanine_files'.
Running ORCA for valine...
Finished valine, files stored in 'outputs/valine_files'.
Running ORCA for leucine...
Finished leucine, files stored in 'outputs/leucine_files'.
Running ORCA for isoleucine...
Finished isoleucine, files stored in 'outputs/isoleucine_files'.
Running ORCA for serine...
Finished serine, files stored in 'outputs/serine_files'.
Running ORCA for threonine...
Finished threonine, files stored in 'outputs/threonine_files'.
Running ORCA for cysteine...
Finished cysteine, files stored in 'outputs/cysteine_files'.
Running ORCA for methionine...
Finished methionine, files stored in 'outputs/methionine_files'.
Running ORCA for phenylalanine...
Finished phenylalanine, files stored in 'outputs/phenylalanine_files'.
Running ORCA for tyrosine...
Finished tyrosine, files stored in 'outputs/tyrosine_files'.
R

In [10]:
# Extract Dipole moments from .out files
base_dir = "outputs"
dipole_results = {}

# Loop through each subdirectory (e.g., water_files, ammonia_files)
for subfolder in os.listdir(base_dir):
    subfolder_path = os.path.join(base_dir, subfolder)
    if not os.path.isdir(subfolder_path):
        continue

    # Look for the .out file (same name as subfolder but without "_files")
    molecule_name = subfolder.replace("_files", "")
    out_file = os.path.join(subfolder_path, f"{molecule_name}.out")

    if not os.path.exists(out_file):
        dipole_results[molecule_name] = "Output file not found"
        continue

    dipole_debye = None
    with open(out_file, "r", encoding="utf-8", errors="ignore") as f:
        for line in f:
            if "Magnitude (Debye)" in line:
                try:
                    dipole_debye = float(line.split(":")[1].strip())
                except:
                    dipole_debye = "Parse error"
                break

    dipole_results[molecule_name] = dipole_debye if dipole_debye is not None else "Not found"

# Print summary
print("Dipole Moments (Debye):")
for mol, value in dipole_results.items():
    print(f"{mol:<20}: {value}")

Dipole Moments (Debye):
valine              : 3.277208766
cysteine            : 4.084389069
threonine           : 1.096854022
isoleucine          : 1.685428413
tyrosine            : 3.382793772
methionine          : 3.195025048
leucine             : 1.850149658
glycine             : 1.967008951
tryptophan          : 4.12220321
aspartic_acid       : 4.005712205
phenylalanine       : 2.718485942
alanine             : 2.099251329
arginine            : 12.977062
histidine           : 3.599770427
lysine              : 10.261972048
asparagine          : 6.239866949
serine              : 1.979255567
proline             : 3.44367101
glutamic_acid       : 6.719069211
glutamine           : 7.098986848
