# User API examples

Setup python environment and install posebusters to run this notebook.

```bash
conda create -n posebusters python=3.10 jupyter notebook
conda activate posebusters
pip install posebusters --upgrade
```

In [1]:
from posebusters import PoseBusters
from pathlib import Path

In [2]:
pred_file = Path("inputs/generated_molecules.sdf") # predicted or generated molecules
true_file = Path("inputs/crystal_ligand.sdf") # "ground truth" molecules
cond_file = Path("inputs/protein.pdb") # conditioning molecule

## PoseBusters default configs

### redock
The `redock' mode is for ligands docked into their cognate receptor crystal structures.

In [3]:
# by default only the binary test report columns are returned
buster = PoseBusters(config="redock")
df = buster.bust([pred_file], true_file, cond_file)
print(df.shape)
df




(3, 25)


Unnamed: 0_level_0,Unnamed: 1_level_0,mol_pred_loaded,mol_true_loaded,mol_cond_loaded,sanitization,all_atoms_connected,molecular_formula,molecular_bonds,double_bond_stereochemistry,tetrahedral_chirality,bond_lengths,...,protein-ligand_maximum_distance,minimum_distance_to_protein,minimum_distance_to_organic_cofactors,minimum_distance_to_inorganic_cofactors,minimum_distance_to_waters,volume_overlap_with_protein,volume_overlap_with_organic_cofactors,volume_overlap_with_inorganic_cofactors,volume_overlap_with_waters,rmsd_≤_2å
file,molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
inputs/generated_molecules.sdf,molecule_1,True,True,True,True,True,False,False,True,False,False,...,False,True,True,True,True,True,True,True,True,False
inputs/generated_molecules.sdf,molecule_2,True,True,True,True,True,False,False,True,True,True,...,False,True,True,True,True,True,True,True,True,False
inputs/generated_molecules.sdf,molecule_3,True,True,True,True,True,False,False,True,True,True,...,False,True,True,True,True,True,True,True,True,False


### dock
The `dock` mode is for *de-novo* generated molecules for a given receptor or for ligands docked into a non-cognate receptor.

In [4]:
buster = PoseBusters(config="dock")
df = buster.bust([pred_file], true_file, cond_file)
print(df.shape)
df

(3, 19)


Unnamed: 0_level_0,Unnamed: 1_level_0,mol_pred_loaded,mol_cond_loaded,sanitization,all_atoms_connected,bond_lengths,bond_angles,internal_steric_clash,aromatic_ring_flatness,double_bond_flatness,internal_energy,protein-ligand_maximum_distance,minimum_distance_to_protein,minimum_distance_to_organic_cofactors,minimum_distance_to_inorganic_cofactors,minimum_distance_to_waters,volume_overlap_with_protein,volume_overlap_with_organic_cofactors,volume_overlap_with_inorganic_cofactors,volume_overlap_with_waters
file,molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
inputs/generated_molecules.sdf,molecule_1,True,True,True,True,False,False,True,True,True,True,False,True,True,True,True,True,True,True,True
inputs/generated_molecules.sdf,molecule_2,True,True,True,True,True,True,True,True,True,True,False,True,True,True,True,True,True,True,True
inputs/generated_molecules.sdf,molecule_3,True,True,True,True,True,True,True,True,True,True,False,True,True,True,True,True,True,True,True


### mol
The `mol` mode is for *de-novo* generated molecules or for generated molecular conformations.

In [5]:
buster = PoseBusters(config="mol")
df = buster.bust([pred_file], None, None)
print(df.shape)
df

(3, 9)


Unnamed: 0_level_0,Unnamed: 1_level_0,mol_pred_loaded,sanitization,all_atoms_connected,bond_lengths,bond_angles,internal_steric_clash,aromatic_ring_flatness,double_bond_flatness,internal_energy
file,molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
inputs/generated_molecules.sdf,molecule_1,True,True,True,False,False,True,True,True,True
inputs/generated_molecules.sdf,molecule_2,True,True,True,True,True,True,True,True,True
inputs/generated_molecules.sdf,molecule_3,True,True,True,True,True,True,True,True,True


## Output formatting

### full report
The `full_report` option of `bust` will return all columns of the test reports, not only the binary columns. This is useful for debugging and for further analysis of the results.

In [6]:
buster = PoseBusters(config="mol")
df = buster.bust([pred_file], None, None, full_report=True)
print(df.shape)
df

(3, 36)


Unnamed: 0_level_0,Unnamed: 1_level_0,mol_pred_loaded,sanitization,all_atoms_connected,bond_lengths,bond_angles,internal_steric_clash,aromatic_ring_flatness,double_bond_flatness,internal_energy,mol_true_loaded,...,number_valid_noncov_pairs,number_aromatic_rings_checked,number_aromatic_rings_pass,aromatic_ring_maximum_distance_from_plane,number_double_bonds_checked,number_double_bonds_pass,double_bond_maximum_distance_from_plane,ensemble_avg_energy,mol_pred_energy,energy_ratio
file,molecule,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
inputs/generated_molecules.sdf,molecule_1,True,True,True,False,False,True,True,True,True,False,...,235,0,0,,0,0,,608.632249,1377.86973,2.263879
inputs/generated_molecules.sdf,molecule_2,True,True,True,True,True,True,True,True,True,False,...,260,1,1,0.078919,0,0,,259.822517,253.508495,0.975699
inputs/generated_molecules.sdf,molecule_3,True,True,True,True,True,True,True,True,True,False,...,257,1,1,0.091866,0,0,,301.579457,341.205292,1.131394
