This notebook is used to generate a small stats datafile for display on the front end.

In [None]:
from __future__ import annotations

import collections
import itertools
import json

import numpy as np

from matpes.db import MatPESDB

In [None]:
matpes = MatPESDB()

for fun in ["pbe", "r2scan"]:
    df = matpes.get_df(fun)
    df["abs_forces"] = df["forces"].map(lambda a: np.linalg.norm(a, axis=1))
    df = df.drop(["matpes_id", "bandgap", "energy", "forces", "formation_energy_per_atom", "formula_pretty"], axis=1)
    stats = {"nstructures": len(df)}
    stats["element_counts"] = dict(collections.Counter(itertools.chain.from_iterable(df["elements"])))
    for c in ["cohesive_energy_per_atom", "nsites"]:
        counts, bins = np.histogram(df[c], bins=50)
        stats[c] = {"counts": counts.tolist(), "bins": bins.tolist()}
    counts, bins = np.histogram(list(itertools.chain(*df["abs_forces"])), bins=50)
    stats["abs_forces"] = {"counts": counts.tolist(), "bins": bins.tolist()}
    counts, bins = np.histogram(df["nelements"], bins=np.arange(0.5, 9.5, 1))
    stats["nelements"] = {"counts": counts.tolist(), "bins": bins.tolist()}
    with open(f"../pages/{fun}_stats.json", "w") as f:
        json.dump(stats, f)
    print(stats)
    # df.to_pickle(f"../pages/{f}_stats.pkl")
    # print(df.columns)

{'nstructures': 434712, 'element_counts': {'Ti': 19582, 'Y': 11913, 'N': 46032, 'Zn': 15747, 'Sn': 16844, 'U': 9978, 'Ni': 29953, 'Zr': 12234, 'Ga': 14424, 'Gd': 4073, 'O': 117733, 'P': 20139, 'C': 25549, 'Th': 6839, 'Tc': 7282, 'Sb': 17902, 'Cd': 11301, 'La': 12460, 'Te': 13663, 'Cu': 24215, 'S': 21187, 'Ag': 11239, 'F': 18416, 'B': 17186, 'As': 11601, 'Ge': 15543, 'Li': 29705, 'Tl': 9729, 'Os': 10237, 'Si': 25229, 'Mo': 13265, 'V': 18753, 'Pd': 12550, 'Hg': 9439, 'Pu': 9842, 'W': 14791, 'K': 14456, 'Al': 20315, 'Nb': 13854, 'Rh': 12674, 'Ce': 9680, 'Ru': 12588, 'Sc': 9416, 'Co': 27048, 'Be': 7618, 'Au': 12663, 'Mg': 33692, 'Eu': 6954, 'Cl': 11168, 'Fe': 36116, 'Cr': 18516, 'Mn': 30568, 'Br': 7690, 'H': 15441, 'Re': 9901, 'Se': 14238, 'Sr': 16097, 'In': 12260, 'Pt': 12678, 'Cs': 8554, 'Ir': 9692, 'Hf': 10140, 'I': 7016, 'Bi': 13712, 'Pa': 4577, 'Np': 5453, 'Ac': 2708, 'Na': 15718, 'Lu': 3398, 'Kr': 26, 'Ta': 12552, 'Rb': 9213, 'Ca': 15258, 'Pb': 9609, 'He': 88, 'Tm': 2961, 'Yb': 7452,