# Molecular Properties Analysis with NovoMD

This notebook explores the 32+ molecular properties calculated by NovoMD from 3D coordinates.

## Property Categories

| Category | Count | Properties |
|----------|-------|------------|
| Geometry | 7 | radius_of_gyration, asphericity, eccentricity, inertia_shape_factor, span_r, pmi1, pmi2 |
| Energy | 6 | conformer_energy, vdw_energy, electrostatic_energy, torsion_strain, angle_strain, optimization_delta |
| Electrostatics | 6 | dipole_moment, total_charge, max_partial_charge, min_partial_charge, charge_span, electrostatic_potential |
| Surface/Volume | 4 | sasa, molecular_volume, globularity, surface_to_volume_ratio |
| Atom Counts | 2 | num_atoms_with_h, num_heavy_atoms |
| Visualization | 5+ | coords_x, coords_y, coords_z, atom_types, bonds |

In [None]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Configuration
BASE_URL = "http://localhost:8010"
API_KEY = "your-api-key"
headers = {"Content-Type": "application/json", "X-API-Key": API_KEY}

def get_properties(smiles, force_field="AMBER"):
    """Fetch molecular properties from NovoMD API"""
    response = requests.post(
        f"{BASE_URL}/smiles-to-omd",
        headers=headers,
        json={"smiles": smiles, "force_field": force_field}
    )
    result = response.json()
    return result['metadata'] if result['success'] else None

## 1. Compare Properties Across Molecules

Let's analyze a series of molecules and compare their properties:

In [None]:
# Dataset of common molecules
molecules = {
    "Methane": "C",
    "Ethanol": "CCO",
    "Benzene": "c1ccccc1",
    "Aspirin": "CC(=O)OC1=CC=CC=C1C(=O)O",
    "Caffeine": "CN1C=NC2=C1C(=O)N(C(=O)N2C)C",
    "Ibuprofen": "CC(C)CC1=CC=C(C=C1)C(C)C(=O)O",
    "Glucose": "OC[C@H]1OC(O)[C@H](O)[C@@H](O)[C@@H]1O",
    "Cholesterol": "CC(C)CCCC(C)C1CCC2C1(CCC3C2CC=C4C3(CCC(C4)O)C)C"
}

# Collect properties
data = []
for name, smiles in molecules.items():
    props = get_properties(smiles)
    if props:
        props['name'] = name
        props['smiles'] = smiles
        data.append(props)

df = pd.DataFrame(data)
print(f"Collected properties for {len(df)} molecules")
df[['name', 'molecular_weight', 'num_atoms_with_h', 'num_heavy_atoms']]

## 2. Geometry Properties Analysis

In [None]:
geometry_cols = ['name', 'radius_of_gyration', 'asphericity', 'eccentricity', 'span_r', 'globularity']
df[geometry_cols].round(3)

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(14, 4))

# Radius of Gyration vs Molecular Weight
axes[0].scatter(df['molecular_weight'], df['radius_of_gyration'], s=100, alpha=0.7)
for i, row in df.iterrows():
    axes[0].annotate(row['name'], (row['molecular_weight'], row['radius_of_gyration']), fontsize=8)
axes[0].set_xlabel('Molecular Weight (Da)')
axes[0].set_ylabel('Radius of Gyration (Å)')
axes[0].set_title('Size vs Weight')

# Asphericity distribution
axes[1].barh(df['name'], df['asphericity'], color='steelblue')
axes[1].set_xlabel('Asphericity')
axes[1].set_title('Molecular Shape (Asphericity)')

# Globularity distribution
axes[2].barh(df['name'], df['globularity'], color='coral')
axes[2].set_xlabel('Globularity')
axes[2].set_title('Molecular Globularity')

plt.tight_layout()
plt.show()

## 3. Surface and Volume Properties

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# SASA vs Volume
axes[0].scatter(df['molecular_volume'], df['sasa'], s=100, c=df['molecular_weight'], cmap='viridis', alpha=0.7)
for i, row in df.iterrows():
    axes[0].annotate(row['name'], (row['molecular_volume'], row['sasa']), fontsize=8)
axes[0].set_xlabel('Molecular Volume (Å³)')
axes[0].set_ylabel('SASA (Å²)')
axes[0].set_title('Surface Area vs Volume')

# Surface to Volume Ratio
axes[1].barh(df['name'], df['surface_to_volume_ratio'], color='green', alpha=0.7)
axes[1].set_xlabel('Surface/Volume Ratio')
axes[1].set_title('Surface to Volume Ratio')

plt.tight_layout()
plt.show()

## 4. Electrostatic Properties

In [None]:
electro_cols = ['name', 'dipole_moment', 'total_charge', 'max_partial_charge', 'min_partial_charge', 'charge_span']
df[electro_cols].round(4)

In [None]:
fig, ax = plt.subplots(figsize=(10, 6))

x = np.arange(len(df))
width = 0.35

bars1 = ax.bar(x - width/2, df['max_partial_charge'], width, label='Max Partial Charge', color='red', alpha=0.7)
bars2 = ax.bar(x + width/2, df['min_partial_charge'], width, label='Min Partial Charge', color='blue', alpha=0.7)

ax.set_xlabel('Molecule')
ax.set_ylabel('Partial Charge')
ax.set_title('Charge Distribution by Molecule')
ax.set_xticks(x)
ax.set_xticklabels(df['name'], rotation=45, ha='right')
ax.legend()
ax.axhline(y=0, color='black', linestyle='-', linewidth=0.5)

plt.tight_layout()
plt.show()

## 5. Energy Properties

In [None]:
energy_cols = ['name', 'conformer_energy', 'vdw_energy', 'electrostatic_energy', 'torsion_strain', 'angle_strain']
df[energy_cols].round(2)

## 6. Property Correlation Matrix

In [None]:
# Select numeric properties for correlation
numeric_cols = ['molecular_weight', 'num_atoms_with_h', 'radius_of_gyration', 
                'asphericity', 'globularity', 'sasa', 'molecular_volume',
                'dipole_moment', 'charge_span']

correlation = df[numeric_cols].corr()

fig, ax = plt.subplots(figsize=(10, 8))
im = ax.imshow(correlation, cmap='RdBu_r', vmin=-1, vmax=1)

ax.set_xticks(np.arange(len(numeric_cols)))
ax.set_yticks(np.arange(len(numeric_cols)))
ax.set_xticklabels(numeric_cols, rotation=45, ha='right')
ax.set_yticklabels(numeric_cols)

# Add correlation values
for i in range(len(numeric_cols)):
    for j in range(len(numeric_cols)):
        text = ax.text(j, i, f'{correlation.iloc[i, j]:.2f}',
                       ha='center', va='center', fontsize=8)

plt.colorbar(im)
plt.title('Property Correlation Matrix')
plt.tight_layout()
plt.show()

## 7. Export Results

In [None]:
# Export to CSV
export_cols = ['name', 'smiles', 'molecular_weight', 'num_atoms_with_h', 'num_heavy_atoms',
               'radius_of_gyration', 'asphericity', 'globularity', 'sasa', 'molecular_volume',
               'dipole_moment', 'total_charge', 'charge_span']

df[export_cols].to_csv('molecular_properties.csv', index=False)
print("Results exported to molecular_properties.csv")