In [5]:
%reload_ext autoreload
%autoreload 2

In [11]:
import logging
from typing import List, Optional
import pandas as pd

from polymetrix.featurizers.polymer import Polymer
from polymetrix.featurizers.molecule import Molecule
from polymetrix.featurizers.chemical_featurizer import (
    NumHBondDonors,
    NumHBondAcceptors,
    NumRotatableBonds,
    NumRings,
    NumNonAromaticRings,
    NumAromaticRings,
    NumAtoms,
    TopologicalSurfaceArea,
    FractionBicyclicRings,
    NumAliphaticHeterocycles,
    SlogPVSA1,
    BalabanJIndex,
    MolecularWeight,
    Sp3CarbonCountFeaturizer,
    Sp2CarbonCountFeaturizer,
    MaxEStateIndex,
    SmrVSA5,
    FpDensityMorgan1,
    HalogenCounts,
    BondCounts,
    BridgingRingsCount,
    MaxRingSize,
    HeteroatomCount,
    HeteroatomDensity,
)
from polymetrix.featurizers.sidechain_backbone_featurizer import (
    SideChainFeaturizer,
    NumSideChainFeaturizer,
    BackBoneFeaturizer,
    NumBackBoneFeaturizer,
    FullPolymerFeaturizer,
    SidechainLengthToStarAttachmentDistanceRatioFeaturizer,
    StarToSidechainMinDistanceFeaturizer,
    SidechainDiversityFeaturizer,
)

from polymetrix.featurizers.molecule import FullMolecularFeaturizer

from polymetrix.featurizers.multiple_featurizer import MultipleFeaturizer

from polymetrix.featurizers.comparator import PolymerMoleculeComparator

### Full Polymer Featurization

In [6]:
psmiles_list = [
    "c1ccccc1[*]CCO[*]",
    "CC[*]CCCC[*]",
]

full_featurizers = [
    FullPolymerFeaturizer(NumRings()),
    FullPolymerFeaturizer(MolecularWeight()),
    FullPolymerFeaturizer(TopologicalSurfaceArea()),
]
full_multi_featurizer = MultipleFeaturizer(full_featurizers)

for psmiles in psmiles_list:
    polymer = Polymer.from_psmiles(psmiles)
    features = full_multi_featurizer.featurize(polymer)
    labels = full_multi_featurizer.feature_labels()

    for label, value in zip(labels, features):
        print(f"{label}: {value:.2f}")

num_rings_sum_fullpolymerfeaturizer: 1.00
molecular_weight_sum_fullpolymerfeaturizer: 121.07
topological_surface_area_sum_fullpolymerfeaturizer: 9.23
num_rings_sum_fullpolymerfeaturizer: 0.00
molecular_weight_sum_fullpolymerfeaturizer: 85.10
topological_surface_area_sum_fullpolymerfeaturizer: 0.00


### Side Chain Featurization

In [5]:
sidechain_featurizers = [
    NumSideChainFeaturizer(),
    SideChainFeaturizer(NumAtoms(agg=["sum"])),
    SideChainFeaturizer(NumHBondDonors(agg=["sum"])),
    SideChainFeaturizer(NumRotatableBonds(agg=["sum"])),
]
sidechain_multi_featurizer = MultipleFeaturizer(sidechain_featurizers)

for psmiles in psmiles_list:
    polymer = Polymer.from_psmiles(psmiles)
    features = sidechain_multi_featurizer.featurize(polymer)
    labels = sidechain_multi_featurizer.feature_labels()
    
    for label, value in zip(labels, features):
        print(f"{label}: {value:.2f}")


numsidechainfeaturizer: 1.00
num_atoms_sidechainfeaturizer_sum: 6.00
num_hbond_donors_sidechainfeaturizer_sum: 0.00
num_rotatable_bonds_sidechainfeaturizer_sum: 0.00
numsidechainfeaturizer: 1.00
num_atoms_sidechainfeaturizer_sum: 2.00
num_hbond_donors_sidechainfeaturizer_sum: 0.00
num_rotatable_bonds_sidechainfeaturizer_sum: 0.00


### Backbone Featurization

In [None]:
backbone_featurizers = [
    NumBackBoneFeaturizer(),
    BackBoneFeaturizer(NumRings()),
    BackBoneFeaturizer(NumAtoms()),
    BackBoneFeaturizer(TopologicalSurfaceArea()),
]
backbone_multi_featurizer = MultipleFeaturizer(backbone_featurizers)

for psmiles in psmiles_list:
    polymer = Polymer.from_psmiles(psmiles)
    features = backbone_multi_featurizer.featurize(polymer)
    labels = backbone_multi_featurizer.feature_labels()
    
    for label, value in zip(labels, features):
        print(f"{label}: {value:.2f}")

numbackbonefeaturizer: 1.00
num_rings_sum_backbonefeaturizer: 0.00
num_atoms_sum_backbonefeaturizer: 5.00
topological_surface_area_sum_backbonefeaturizer: 9.23
numbackbonefeaturizer: 1.00
num_rings_sum_backbonefeaturizer: 0.00
num_atoms_sum_backbonefeaturizer: 6.00
topological_surface_area_sum_backbonefeaturizer: 0.00


### Full Molecular Featurization

In [27]:
psmiles_list = [
    "CCCC",
    "NC(=O)c1ccc2c(c1)nc(C1CCC(O)CC1)n2CCCO",
    "CNC(=S)Nc1cccc(-c2cnc3ccccc3n2)c1",
    "C#Cc1ccc(-c2nc(-c3cc[nH]c(=O)c3)c(-c3ccc(F)cc3)[nH]2)cc1",
]

full_featurizers = [
    FullMolecularFeaturizer(NumRings()),
    FullMolecularFeaturizer(MolecularWeight()),
    FullMolecularFeaturizer(TopologicalSurfaceArea()),
]
full_multi_featurizer = MultipleFeaturizer(full_featurizers)

for psmiles in psmiles_list:
    polymer = Molecule.from_smiles(psmiles)
    features = full_multi_featurizer.featurize(polymer)
    labels = full_multi_featurizer.feature_labels()

    for label, value in zip(labels, features):
        print(f"{label}: {value:.2f}")

num_rings_sum_fullmolecularfeaturizer: 0.00
molecular_weight_sum_fullmolecularfeaturizer: 58.08
topological_surface_area_sum_fullmolecularfeaturizer: 0.00
num_rings_sum_fullmolecularfeaturizer: 3.00
molecular_weight_sum_fullmolecularfeaturizer: 317.17
topological_surface_area_sum_fullmolecularfeaturizer: 101.37
num_rings_sum_fullmolecularfeaturizer: 3.00
molecular_weight_sum_fullmolecularfeaturizer: 294.09
topological_surface_area_sum_fullmolecularfeaturizer: 49.84
num_rings_sum_fullmolecularfeaturizer: 4.00
molecular_weight_sum_fullmolecularfeaturizer: 355.11
topological_surface_area_sum_fullmolecularfeaturizer: 61.54


### Using Comparators to Compare Polymer and Molecule Features

In [12]:
polymer = Polymer.from_psmiles('*CCCCCCNC(=O)c1ccc(C(=O)N*)c(Sc2ccccc2)c1')
molecule = Molecule.from_smiles('CC(=O)OC1=CC=CC=C1C(=O)O')

polymer_featurizers = [
    FullPolymerFeaturizer(MolecularWeight()),
    FullPolymerFeaturizer(NumHBondDonors()),
    FullPolymerFeaturizer(NumHBondAcceptors()),
    FullPolymerFeaturizer(NumRotatableBonds())
]

molecule_featurizers = [
    FullMolecularFeaturizer(MolecularWeight()),
    FullMolecularFeaturizer(NumHBondDonors()),
    FullMolecularFeaturizer(NumHBondAcceptors()),
    FullMolecularFeaturizer(NumRotatableBonds())
]

polymer_multi = MultipleFeaturizer(polymer_featurizers)
molecule_multi = MultipleFeaturizer(molecule_featurizers)

comparator = PolymerMoleculeComparator(polymer_multi, molecule_multi)

difference = comparator.compare(polymer, molecule)

labels = comparator.feature_labels()

print(f"Labels: {labels}")
print(f"Differences: {difference}")

# Print feature-wise results
for label, diff in zip(labels, difference):
    print(f"  {label}: {diff}")

Labels: ['molecular_weight_sum_fullpolymerfeaturizer_difference', 'num_hbond_donors_sum_fullpolymerfeaturizer_difference', 'num_hbond_acceptors_sum_fullpolymerfeaturizer_difference', 'num_rotatable_bonds_sum_fullpolymerfeaturizer_difference']
Differences: [174.09794021   1.           0.           8.        ]
  molecular_weight_sum_fullpolymerfeaturizer_difference: 174.097940208
  num_hbond_donors_sum_fullpolymerfeaturizer_difference: 1.0
  num_hbond_acceptors_sum_fullpolymerfeaturizer_difference: 0.0
  num_rotatable_bonds_sum_fullpolymerfeaturizer_difference: 8.0
