In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from collections import defaultdict
import itertools
import os
import pandas as pd
import matplotlib.pyplot as plt 
import numpy as np
from IPython.display import Markdown
from matminer.datasets import load_dataset
from pymatgen.core import Composition

from modnet.preprocessing import MODData
#from modnet.models import MODNetModel
from modnet.featurizers import MODFeaturizer
from modnet.featurizers.presets import DeBreuck2020Featurizer

In [3]:
from gptchem.data import get_matbench_glass

In [4]:
df = get_matbench_glass()

In [5]:
import modnet
modnet.__version__

'0.1.12'

In [19]:
class CompositionOnlyFeaturizer(DeBreuck2020Featurizer):
    def __init__(self):
        super().__init__()
        self.oxid_composition_featurizers = ()
        self.structure_featurizers = ()
        self.site_featurizers = ()

In [22]:
PRECOMPUTED_MODDATA = "./precomputed/glass_benchmark_moddata.pkl.gz"

if os.path.isfile(PRECOMPUTED_MODDATA):
    data = MODData.load(PRECOMPUTED_MODDATA)
else:
    # Use a fresh copy of the dataset
    #df = load_dataset("matbench_glass")
    #df["composition"] = df["composition"].map(Composition)
    df["structure"] = df["composition"].map(Composition)
    
    data = MODData(
        materials=df["structure"].tolist(), 
        targets=df["gfa"].tolist(), 
        target_names=["gfa"],
        featurizer=CompositionOnlyFeaturizer(),
       # num_classes = {'gfa':2}
    )
    data.featurize()
    # As this is a small data/feature set, order all features 
    data.feature_selection(n=-1)
    data.save(PRECOMPUTED_MODDATA)

2023-02-06 15:08:30,725 - modnet - INFO - Loaded CompositionOnlyFeaturizer featurizer.
2023-02-06 15:08:30,731 - modnet - INFO - Computing features, this can take time...
2023-02-06 15:08:30,740 - modnet - INFO - Applying composition featurizers...
2023-02-06 15:08:30,752 - modnet - INFO - Applying featurizers (AtomicOrbitals(), AtomicPackingEfficiency(), BandCenter(), ElementFraction(), ElementProperty(data_source=<matminer.utils.data.MagpieData object at 0x296346550>,
                features=['Number', 'MendeleevNumber', 'AtomicWeight',
                          'MeltingT', 'Column', 'Row', 'CovalentRadius',
                          'Electronegativity', 'NsValence', 'NpValence',
                          'NdValence', 'NfValence', 'NValence', 'NsUnfilled',
                          'NpUnfilled', 'NdUnfilled', 'NfUnfilled', 'NUnfilled',
                          'GSvolume_pa', 'GSbandgap', 'GSmagmom',
                          'SpaceGroupNumber'],
                stats=['minimum', 'm

MultipleFeaturizer:   0%|          | 0/5680 [00:00<?, ?it/s]

  alp_a = np.multiply(1.5, np.power(v_a, 2 / 3)) / reduce(lambda x, y: 1 / x + 1 / y, np.power(n_ws, 1 / 3))
  alp_a = np.multiply(1.5, np.power(v_a, 2 / 3)) / reduce(lambda x, y: 1 / x + 1 / y, np.power(n_ws, 1 / 3))
  alp_a = np.multiply(1.5, np.power(v_a, 2 / 3)) / reduce(lambda x, y: 1 / x + 1 / y, np.power(n_ws, 1 / 3))
  alp_a = np.multiply(1.5, np.power(v_a, 2 / 3)) / reduce(lambda x, y: 1 / x + 1 / y, np.power(n_ws, 1 / 3))
  alp_a = np.multiply(1.5, np.power(v_a, 2 / 3)) / reduce(lambda x, y: 1 / x + 1 / y, np.power(n_ws, 1 / 3))
  alp_a = np.multiply(1.5, np.power(v_a, 2 / 3)) / reduce(lambda x, y: 1 / x + 1 / y, np.power(n_ws, 1 / 3))
  alp_a = np.multiply(1.5, np.power(v_a, 2 / 3)) / reduce(lambda x, y: 1 / x + 1 / y, np.power(n_ws, 1 / 3))
  alp_a = np.multiply(1.5, np.power(v_a, 2 / 3)) / reduce(lambda x, y: 1 / x + 1 / y, np.power(n_ws, 1 / 3))
  alp_a = np.multiply(1.5, np.power(v_a, 2 / 3)) / reduce(lambda x, y: 1 / x + 1 / y, np.power(n_ws, 1 / 3))
  alp_a = np.multip

In [None]:
data.df_featurized

In [None]:
best_settings = {
    "increase_bs":True,
    "num_neurons": [[128], [64], [16], []],
    "n_feat": 150,
    "lr": 0.002,
    "epochs": 200,
    "verbose": 0,
    "act": "elu",
    "batch_size": 64,
    "num_classes": {'gfa':2},
    "loss": "categorical_crossentropy",
    #"xscale": "standard",
}