# Molecular Descriptors with Mordred

This notebook computes **molecular descriptors** using Mordred and RDKit.


In [None]:
!pip install mordred rdkit-pypi

In [None]:
from rdkit import Chem
from mordred import Calculator, descriptors
import pandas as pd
from google.colab import files

In [None]:
def load_smiles_file(filepath, smiles_col='SMILES'):
    df = pd.read_csv(filepath)
    if smiles_col not in df.columns:
        raise ValueError(f"Column '{smiles_col}' not found in file.")
    mols, valid_idx = [], []
    for i, s in enumerate(df[smiles_col]):
        m = Chem.MolFromSmiles(s)
        if m is None:
            print(f"Warning: invalid SMILES at row {i}: {s}")
        else:
            mols.append(m)
            valid_idx.append(i)
    return df, mols, valid_idx

def compute_descriptors(mols, ignore_3D=True):
    calc = Calculator(descriptors, ignore_3D=ignore_3D)
    return calc.map(mols)


In [None]:
print('Upload a CSV file with a column named SMILES.')
uploaded = files.upload()
fname = list(uploaded.keys())[0]
print(f'Loaded file: {fname}')
df_input, mols, valid_idx = load_smiles_file(fname)
if not mols:
    raise RuntimeError('No valid molecules loaded — check your SMILES column.')
print('Computing descriptors...')
df_desc = pd.DataFrame(compute_descriptors(mols, ignore_3D=True))
df_valid = df_input.iloc[valid_idx].reset_index(drop=True)
df_out = pd.concat([df_valid, df_desc.reset_index(drop=True)], axis=1)
out_name = 'molecular_descriptors_output.csv'
df_out.to_csv(out_name, index=False)
print(f'Saved descriptors to {out_name}')
files.download(out_name)
