In [1]:
%load_ext autoreload
%autoreload 2

import datamol as dm

Any Molfeat featurizer should be serializable. This makes it easy to share a specific instantiation of a featurizer.
Any featurizer can thus be created from - or saved to - a state dictionary. We support native support to save these state dicts to both YAML and JSON

## Example

In [2]:
from molfeat.trans import MoleculeTransformer

trans = MoleculeTransformer("ecfp")

In [3]:
print(trans.to_state_yaml())

_molfeat_version: 0.0.1
args:
  dtype: null
  featurizer: ecfp
  n_jobs: 1
  parallel_kwargs: null
  verbose: false
name: MoleculeTransformer



In [4]:
print(trans.to_state_json())

{"name": "MoleculeTransformer", "args": {"featurizer": "ecfp", "n_jobs": 1, "verbose": false, "dtype": null, "parallel_kwargs": null}, "_molfeat_version": "0.0.1"}


In [5]:
print(trans.to_state_dict())

{'name': 'MoleculeTransformer', 'args': {'featurizer': 'ecfp', 'n_jobs': 1, 'verbose': False, 'dtype': None, 'parallel_kwargs': None}, '_molfeat_version': '0.0.1'}


In [6]:
print(trans.to_state_json())

{"name": "MoleculeTransformer", "args": {"featurizer": "ecfp", "n_jobs": 1, "verbose": false, "dtype": null, "parallel_kwargs": null}, "_molfeat_version": "0.0.1"}


In [7]:
# You can also directly save to files:
import tempfile

with tempfile.TemporaryDirectory() as tmpdir:
    path = dm.fs.join(tmpdir, "state_dict.yml")
    trans.to_state_yaml_file(path)
    trans = MoleculeTransformer.from_state_yaml_file(path)

## Loop over multiple featurizers
One implication of this, is that despite the various different interfaces, you can eaisly loop over multiple featurizers.

In [8]:
FEATURIZERS = [
    {"name": "MoleculeTransformer", "args": {"featurizer": "ecfp", "n_jobs": 1, "verbose": False, "dtype": None, "parallel_kwargs": None}, "_molfeat_version": "0.0.1"},
    {"name": "MoleculeTransformer", "args": {"featurizer": "maccs", "n_jobs": 1, "verbose": False, "dtype": None, "parallel_kwargs": None}, "_molfeat_version": "0.0.1"},
    {"name": "MoleculeTransformer", "args": {"featurizer": "fcfp", "n_jobs": 1, "verbose": False, "dtype": None, "parallel_kwargs": None}, "_molfeat_version": "0.0.1"},
]

In [9]:
for state_dict in FEATURIZERS:
    trans = MoleculeTransformer.from_state_dict(state_dict)
    # ... Work your magic!