# MODNet (v0.1.10)

For now, this benchmark file simply loads our existing full benchmark results (265 MB) from [ml-evs/modnet-matbench](https://github.com/ml-evs/modnet-matbench) and exports them in the matbench format. Code for featurisation, hyperparameter optimisation and the final predictions themselves can be found in the aforementioned repository or in the illusrative run.ipynb notebook.

In [6]:
def download_and_extract(url, fname):
    import urllib
    from zipfile import ZipFile
    import os
    if os.path.exists(fname):
        print(f"File {fname} already found, will not redownload.")
        return
    
    response = urllib.request.urlretrieve(url, fname)
    with ZipFile(fname, "r") as _zip:
        _zip.extractall(".")
        
version = "0.2.1"
fname = f"modnet-matbench-{version}"
url = f"https://github.com/ml-evs/modnet-matbench/archive/refs/tags/v{version}.zip"

download_and_extract(url, fname + ".zip")

In [7]:
import pickle
import numpy as np
from matbench.bench import MatbenchBenchmark
from matbench.constants import CLF_KEY

mb = MatbenchBenchmark(
    autoload=False, 
    subset=[
        'matbench_dielectric', 
        'matbench_jdft2d', 
        'matbench_steels', 
        'matbench_expt_gap', 
        'matbench_phonons',
        'matbench_log_gvrh',
        'matbench_log_kvrh',
        # 'matbench_glass', # classification tasks omitted due to ROC_AUC score issue, see GitHub issues.
        # 'matbench_expt_is_metal', 
    ],
)

results_locs = {task.dataset_name: f"{fname}/{task.dataset_name}/results/{task.dataset_name}_results.pkl" for task in mb.tasks}
results_locs["matbench_log_gvrh"] = results_locs["matbench_log_kvrh"] = f"{fname}/matbench_elastic/results/matbench_elastic_results.pkl"
target_key_map = {"matbench_log_gvrh": "log10G_VRH", "matbench_log_kvrh": "log10K_VRH"}

2021-08-07 14:16:06 INFO     Initialized benchmark 'matbench_v0.1' with 7 tasks: 
['matbench_dielectric',
 'matbench_jdft2d',
 'matbench_steels',
 'matbench_expt_gap',
 'matbench_phonons',
 'matbench_log_gvrh',
 'matbench_log_kvrh']


In [8]:
for task in mb.tasks:

    task.load()
    
    with open(results_locs[task.dataset_name], "rb") as f:
        results = pickle.load(f)
        
    for fold_ind, fold in enumerate(task.folds):

        # Classification tasks require different loading as two values are reported per prediction
        if task.metadata.task_type == CLF_KEY:
            predictions = results["predictions"][fold_ind].values[:, 0].astype(bool).flatten()
        else:
            # Handle predictions that were made with joint/multitarget learning
            if task.dataset_name in target_key_map:
                predictions = results["predictions"][fold_ind][target_key_map[task.dataset_name]].values.flatten()
            else:
                predictions = results["predictions"][fold_ind].values.flatten()

            
        task.record(fold, predictions)
        
    if task.metadata.task_type == CLF_KEY:
        print(f"{task.dataset_name}: Accuracy score {task.scores['accuracy']['mean']}")
    else:
        print(f"{task.dataset_name}: MAE {task.scores['mae']['mean']}")

    task.df = None

2021-08-07 14:16:07 INFO     Loading dataset 'matbench_dielectric'...


Reading file /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_log_kvrh.json.gz: 10987it [01:39, 110.35it/s]  
Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_log_kvrh.json.gz: 100%|##########| 10987/10987 [01:39<00:00, 110.35it/s] 
Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_dielectric.json.gz:  98%|#########7| 4649/4764 [00:04<00:00, 1804.81it/s]

2021-08-07 14:16:11 INFO     Dataset 'matbench_dielectric loaded.
2021-08-07 14:16:11 INFO     Recorded fold matbench_dielectric-0 successfully.
2021-08-07 14:16:11 INFO     Recorded fold matbench_dielectric-1 successfully.
2021-08-07 14:16:11 INFO     Recorded fold matbench_dielectric-2 successfully.
2021-08-07 14:16:11 INFO     Recorded fold matbench_dielectric-3 successfully.
2021-08-07 14:16:11 INFO     Recorded fold matbench_dielectric-4 successfully.
matbench_dielectric: MAE 0.2969698688737498
2021-08-07 14:16:11 INFO     Loading dataset 'matbench_jdft2d'...


Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_jdft2d.json.gz:  76%|#######5  | 481/636 [00:00<00:00, 2730.56it/s]

2021-08-07 14:16:12 INFO     Dataset 'matbench_jdft2d loaded.
2021-08-07 14:16:12 INFO     Recorded fold matbench_jdft2d-0 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_jdft2d-1 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_jdft2d-2 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_jdft2d-3 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_jdft2d-4 successfully.
matbench_jdft2d: MAE 34.53678641963336
2021-08-07 14:16:12 INFO     Loading dataset 'matbench_steels'...


Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_steels.json.gz: 0it [00:00, ?it/s]

2021-08-07 14:16:12 INFO     Dataset 'matbench_steels loaded.


Reading file /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_steels.json.gz: 0it [00:00, ?it/s]
Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_steels.json.gz: 0it [00:00, ?it/s]

2021-08-07 14:16:12 INFO     Recorded fold matbench_steels-0 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_steels-1 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_steels-2 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_steels-3 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_steels-4 successfully.
matbench_steels: MAE 96.21387590993324
2021-08-07 14:16:12 INFO     Loading dataset 'matbench_expt_gap'...



Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_expt_gap.json.gz: 0it [00:00, ?it/s]

2021-08-07 14:16:12 INFO     Dataset 'matbench_expt_gap loaded.
2021-08-07 14:16:12 INFO     Recorded fold matbench_expt_gap-0 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_expt_gap-1 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_expt_gap-2 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_expt_gap-3 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_expt_gap-4 successfully.
matbench_expt_gap: MAE 0.3470153653294551
2021-08-07 14:16:12 INFO     Loading dataset 'matbench_phonons'...


Reading file /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_dielectric.json.gz: 4764it [00:05, 882.79it/s] 
Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_dielectric.json.gz: 100%|##########| 4764/4764 [00:05<00:00, 882.82it/s] 
Reading file /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_jdft2d.json.gz: 636it [00:00, 699.69it/s]
Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_jdft2d.json.gz: 100%|##########| 636/636 [00:00<00:00, 700.19it/s] 
Reading file /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_expt_gap.json.gz: 0it [00:00, ?it/s]
Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_expt_gap.json.gz: 0it [00:00, ?it/s]

2021-08-07 14:16:12 INFO     Dataset 'matbench_phonons loaded.
2021-08-07 14:16:12 INFO     Recorded fold matbench_phonons-0 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_phonons-1 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_phonons-2 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_phonons-3 successfully.
2021-08-07 14:16:12 INFO     Recorded fold matbench_phonons-4 successfully.
matbench_phonons: MAE 38.7524344203875
2021-08-07 14:16:12 INFO     Loading dataset 'matbench_log_gvrh'...



Reading file /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_phonons.json.gz: 1265it [00:00, 1419.27it/s]
Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_phonons.json.gz: 100%|##########| 1265/1265 [00:00<00:00, 1419.82it/s]
Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_log_gvrh.json.gz: 100%|#########9| 10937/10987 [00:05<00:00, 2905.96it/s]

2021-08-07 14:16:18 INFO     Dataset 'matbench_log_gvrh loaded.
2021-08-07 14:16:18 INFO     Recorded fold matbench_log_gvrh-0 successfully.
2021-08-07 14:16:18 INFO     Recorded fold matbench_log_gvrh-1 successfully.
2021-08-07 14:16:18 INFO     Recorded fold matbench_log_gvrh-2 successfully.
2021-08-07 14:16:18 INFO     Recorded fold matbench_log_gvrh-3 successfully.
2021-08-07 14:16:18 INFO     Recorded fold matbench_log_gvrh-4 successfully.
matbench_log_gvrh: MAE 0.07311620406947483
2021-08-07 14:16:18 INFO     Loading dataset 'matbench_log_kvrh'...


Reading file /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_log_gvrh.json.gz: 10987it [00:06, 1774.05it/s]
Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_log_gvrh.json.gz: 100%|##########| 10987/10987 [00:06<00:00, 1774.07it/s]
Decoding objects from /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_log_kvrh.json.gz:  98%|#########8| 10768/10987 [00:05<00:00, 1986.07it/s]

2021-08-07 14:16:23 INFO     Dataset 'matbench_log_kvrh loaded.
2021-08-07 14:16:23 INFO     Recorded fold matbench_log_kvrh-0 successfully.
2021-08-07 14:16:23 INFO     Recorded fold matbench_log_kvrh-1 successfully.
2021-08-07 14:16:23 INFO     Recorded fold matbench_log_kvrh-2 successfully.
2021-08-07 14:16:23 INFO     Recorded fold matbench_log_kvrh-3 successfully.
2021-08-07 14:16:24 INFO     Recorded fold matbench_log_kvrh-4 successfully.
matbench_log_kvrh: MAE 0.05477001646276852


Reading file /Users/ppdebreuck/anaconda3/envs/modnet/lib/python3.8/site-packages/matminer/datasets/matbench_log_kvrh.json.gz: 10987it [00:17, 12705.05it/s]

In [9]:
mb.to_file("results.json.gz")

2021-08-07 14:17:17 INFO     Successfully wrote MatbenchBenchmark to file 'results.json.gz'.
