# MODNet (v0.1.12)

For now, this benchmark file simply loads our existing full benchmark results (~100 MB) from [ml-evs/modnet-matbench](https://github.com/ml-evs/modnet-matbench) and exports them in the matbench format. Code for featurisation, hyperparameter optimisation and the final predictions themselves can be found in the aforementioned repository or in the illustrative run.py file.

In [1]:
def download_and_extract(url, fname):
    import urllib
    from zipfile import ZipFile
    import os

    if os.path.exists(fname):
        print(f"File {fname} already found, will not redownload.")
        return

    print(url)

    response = urllib.request.urlretrieve(url, fname)
    with ZipFile(fname, "r") as _zip:
        _zip.extractall(".")


repo = "ml-evs/modnet-matbench"
version = "v0.4.0"
fname = f'modnet-matbench-{version.replace("#", "-").replace("/", "-").replace("v", "")}'

if version.startswith("v"):
    url = f"https://github.com/{repo}/archive/refs/tags/{version}.zip"
else:
    url = f'https://github.com/{repo}/archive/refs/heads/{version.replace("#", "%23")}.zip'


download_and_extract(url, fname + ".zip")

File modnet-matbench-0.4.0.zip already found, will not redownload.


In [2]:
import pickle
from matbench.bench import MatbenchBenchmark
from matbench.constants import CLF_KEY

mb = MatbenchBenchmark(
    autoload=False,
    subset=[
        "matbench_dielectric",
        "matbench_jdft2d",
        "matbench_steels",
        "matbench_expt_gap",
        "matbench_phonons",
        "matbench_log_gvrh",
        "matbench_log_kvrh",
        "matbench_glass",
        "matbench_expt_is_metal",
        "matbench_perovskites",
        "matbench_mp_gap",
        "matbench_mp_is_metal",
        "matbench_mp_e_form",
    ],
)

results_locs = {
    task.dataset_name: f"{fname}/{task.dataset_name}/results/{task.dataset_name}_results.pkl"
    for task in mb.tasks
}
# Remap filename for elastic tasks as they were joint-learned
results_locs["matbench_log_gvrh"] = results_locs["matbench_log_kvrh"] = f"{fname}/matbench_elastic/results/matbench_elastic_results.pkl"
target_key_map = {
    "matbench_log_gvrh": "log10G_VRH",
    "matbench_log_kvrh": "log10K_VRH",
}


2022-03-18 16:35:53 INFO     Initialized benchmark 'matbench_v0.1' with 13 tasks: 
['matbench_dielectric',
 'matbench_jdft2d',
 'matbench_steels',
 'matbench_expt_gap',
 'matbench_phonons',
 'matbench_log_gvrh',
 'matbench_log_kvrh',
 'matbench_glass',
 'matbench_expt_is_metal',
 'matbench_perovskites',
 'matbench_mp_gap',
 'matbench_mp_is_metal',
 'matbench_mp_e_form']


In [3]:
for task in mb.tasks:
    task.load()
    with open(results_locs[task.dataset_name], "rb") as f:
        results = pickle.load(f)
        
    for fold_ind, fold in enumerate(task.folds):

        # Handle predictions that were made with joint/multitarget learning
        if task.dataset_name in target_key_map:
            predictions = results["predictions"][fold_ind][target_key_map[task.dataset_name]].values
            stds = results["stds"][fold_ind][target_key_map[task.dataset_name]].values
        else:
            predictions = results["predictions"][fold_ind].values
            stds = results["stds"][fold_ind].values
        
        # Classification tasks must be recorded with labels and not group probabilities
        if task.metadata.task_type == CLF_KEY:
            predictions = predictions[:, 1]
            stds = None

        predictions = predictions.flatten()
        if stds is not None:
            stds = stds.flatten()

        task.record(
            fold, 
            predictions,
            std=stds,
        )
            
        
    if task.metadata.task_type == CLF_KEY:
        print(f"{task.dataset_name}: Accuracy score {task.scores['accuracy']['mean']}")
        print(f"{task.dataset_name}: ROC score {task.scores['rocauc']['mean']}")
    else:
        print(f"{task.dataset_name}: MAE {task.scores['mae']['mean']}")

    task.df = None

2022-03-18 16:35:53 INFO     Loading dataset 'matbench_dielectric'...
2022-03-18 16:35:58 INFO     Dataset 'matbench_dielectric loaded.
2022-03-18 16:35:58 INFO     Recorded fold matbench_dielectric-0 successfully.
2022-03-18 16:35:58 INFO     Recorded fold matbench_dielectric-1 successfully.
2022-03-18 16:35:58 INFO     Recorded fold matbench_dielectric-2 successfully.
2022-03-18 16:35:58 INFO     Recorded fold matbench_dielectric-3 successfully.
2022-03-18 16:35:58 INFO     Recorded fold matbench_dielectric-4 successfully.
matbench_dielectric: MAE 0.2711019241663236
2022-03-18 16:35:58 INFO     Loading dataset 'matbench_jdft2d'...
2022-03-18 16:35:58 INFO     Dataset 'matbench_jdft2d loaded.
2022-03-18 16:35:58 INFO     Recorded fold matbench_jdft2d-0 successfully.
2022-03-18 16:35:58 INFO     Recorded fold matbench_jdft2d-1 successfully.
2022-03-18 16:35:58 INFO     Recorded fold matbench_jdft2d-2 successfully.
2022-03-18 16:35:58 INFO     Recorded fold matbench_jdft2d-3 successfull

In [4]:
mb.to_file("results.json.gz")

2022-03-18 16:43:47 INFO     Successfully wrote MatbenchBenchmark to file 'results.json.gz'.
