Firstly, we import necessarcy packages, eg. MatCalc, MatGL and Pymatgen.

In [1]:
from __future__ import annotations

import json
import random
import warnings

import matgl
import numpy as np
from matcalc.elasticity import ElasticityCalc
from matcalc.phonon import PhononCalc
from matgl.ext.ase import PESCalculator
from pymatgen.core import Structure

# To suppress warnings for clearer output
warnings.simplefilter("ignore")

We leverage the physical constants provided by SciPy to convert stress units, ensuring compatibility with the ASE calculator.

In [2]:
from scipy import constants

eVA3ToGPa = constants.e/(constants.angstrom)**3/constants.giga

Next, I will show you how to run MatCalc-Bench. Here, TensorNet-MatPES-PBE-v2025.1-PES and M3GNet-MatPES-PBE-v2025.1-PES are used to perform elastic modulus and constant-volume heat capacity calculations. The approach is similar for other models and other benchmarks.

For demonstration purposes only, I randomly sample 1% of the entire test dataset for benchmarking. Consequently, the values presented here may differ from those reported on our website and in the paper. 

In [3]:
# -------------------------------
# Elastic Property Benchmarking
# -------------------------------
# Loop over the two different potential models to benchmark their performance on elastic properties,
# specifically the bulk and shear moduli.
from __future__ import annotations

for model_name in [
    "TensorNet-MatPES-PBE-v2025.1-PES",
    "M3GNet-MatPES-PBE-v2025.1-PES",
]:
    # Load the ML potential model specified by model_name.
    potential = matgl.load_model(model_name)

    # Initialize the PES calculator with the loaded potential.
    # The 'stress_weight' parameter is set using the conversion factor to convert stress to GPa.
    calculator = PESCalculator(potential, stress_weight=1/eVA3ToGPa)

    # Output the model name for clarity in the benchmarking log.
    print(model_name)

    # Load benchmark elasticity data from a JSON file.
    # The file contains mp_ids, formulas, structures and reference values for bulk and shear moduli.
    with open("matcalc-bench-data/elastic.json") as f:
        entries = json.load(f)

    random.seed(2025)
    # Randomly sample 1% of the available entries to reduce computational cost.
    sampled_entries = random.sample(entries, len(entries)//100)

    # Create an instance of the elasticity calculator.
    # fmax sets the maximum force tolerance and relax_structure=True enables structure relaxation during calculations.
    elastc_calc = ElasticityCalc(calculator, fmax=0.05, relax_structure=True)

    # Lists to store absolute errors for bulk and shear moduli for each benchmark entry.
    bulk_modulus_ae = []
    shear_modulus_ae = []

    # Iterate over each sampled benchmark entry.
    for entry in sampled_entries:
        # Convert the stored dictionary representation of the structure into a pymatgen Structure object.
        structure = Structure.from_dict(entry["structure"])

        # Calculate elastic moduli for the given structure.
        properties = elastc_calc.calc(structure)

        # Compute the absolute error for the bulk modulus.
        # The calculated value is converted from eV/Å^3 to GPa for direct comparison with the benchmark.
        bulk_modulus_err = abs(entry["bulk_modulus_vrh"]-properties["bulk_modulus_vrh"]*eVA3ToGPa)
        # Compute the absolute error for the shear modulus similarly.
        shear_modulus_err = abs(entry["shear_modulus_vrh"]-properties["shear_modulus_vrh"]*eVA3ToGPa)

        # Append the error values for this entry to the lists.
        bulk_modulus_ae.append(bulk_modulus_err)
        shear_modulus_ae.append(shear_modulus_err)

    # Compute the mean absolute error (MAE) for both bulk and shear moduli,
    bulk_modulus_mae = np.mean(bulk_modulus_ae)
    shear_modulus_mae = np.mean(shear_modulus_ae)

    # Print out the MAE results for the current model.
    print(f"Bulk Modulus: {bulk_modulus_mae}")
    print(f"Shear Modulus: {shear_modulus_mae}")

TensorNet-MatPES-PBE-v2025.1-PES
Bulk Modulus: 14.198907191944855
Shear Modulus: 13.262304643180377
M3GNet-MatPES-PBE-v2025.1-PES
Bulk Modulus: 29.734312380439942
Shear Modulus: 16.055060961437153


In [4]:
# -------------------------------
# Phonon Property Benchmarking
# -------------------------------
# Loop over the same two potential models to benchmark their performance on phonon properties,
# specifically the constant-volume heat capacity.
from __future__ import annotations

for model_name in [
    "TensorNet-MatPES-PBE-v2025.1-PES",
    "M3GNet-MatPES-PBE-v2025.1-PES",
]:
    # Load the ML potential model.
    potential = matgl.load_model(model_name)

    # Initialize the PES calculator with the loaded potential.
    calculator = PESCalculator(potential, stress_weight=1/eVA3ToGPa)

    # Output the model name for clarity in the benchmarking log.
    print(model_name)

    # Load benchmark phonon data from a JSON file.
    # The file contains mp_ids, formulas, structures and reference values for constant-volume heat capacity.
    with open("matcalc-bench-data/phonon.json") as f:
        entries = json.load(f)

    random.seed(2025)
    # Randomly sample 1% of the available phonon entries.
    sampled_entries = random.sample(entries, len(entries)//100)

    # Create an instance of the phonon calculator.
    # fmax sets the force tolerance, relax_structure=True allows for relaxation, and
    # write_phonon=False disables writing phonon output files.
    phonon_calc = PhononCalc(calculator, fmax=0.05, relax_structure=True, write_phonon=False)

    # List to store the absolute error in constant-volume heat capacity for each entry.
    heat_capacity_ae = []

    # Iterate over each sampled benchmark entry.
    for entry in sampled_entries:
        # Convert the structure dictionary into a pymatgen Structure object.
        structure = Structure.from_dict(entry["structure"])

        # Calculate the phonon properties for the structure.
        properties = phonon_calc.calc(structure)

        # Compute the absolute error for the constant-volume heat capacity at 300 K.
        heat_capacity_err = abs(entry["heat_capacity"]-properties["thermal_properties"]["heat_capacity"][30])

        # Append the error for this entry to the list.
        heat_capacity_ae.append(heat_capacity_err)

    # Compute the mean absolute error (MAE) for the constant-volume heat capacity, ignoring NaN values.
    heat_capacity_mae = np.mean(heat_capacity_ae)

    # Print out the MAE result for the current model.
    print(f"Constant-Volume Heat Capacity: {heat_capacity_mae}")

TensorNet-MatPES-PBE-v2025.1-PES
Constant-Volume Heat Capacity: 9.051091815387984
M3GNet-MatPES-PBE-v2025.1-PES
Constant-Volume Heat Capacity: 12.975157587868738


You can certainly perform the full benchmark by slightly modifying the codes provided below, but please be aware that the whole process is considerably time consuming. Therefore, I recommend submitting separate jobs to your cluster for every material—or every few materials, depending on your resource availability and workflow preferences.