In [None]:
"""
Read the source ACWF verification files and prepare a single JSON file that contains all the
relevant data and will be included in the application (as opposed to queried directly).

The source files are listed in the `labels.json` file and contain the following keys:

[
    BM_fit_data, completely_off, eos_data, failed_wfs, missing_outputs,
    num_atoms_in_sim_cell, script_version, set_name, stress_data, uuid_mapping
]

This current script will process these files into a single json with the format of
{
    metadata : {
        date : <date>, 
        methods: { <code> : {
            oxides : {filename: <x>, md5: <x>},
            unaries : {filename: <x>, md5: <x>},
            short_label : <x>
    }}}
    data : {
        <element> : { <crystalType (X2O, X2O3, X/BCC, ...)> : { <code> : {
            eos_data_per_atom: <x>,
            bm_fit_per_atom: <x>
        }}}
    }
}

where in `eos_data_per_atom` and `bm_fit_per_atom` the energy and volume are scaled to per atom.
If "per formula unit" is needed, just multiply accordingly.
"""

import requests
import os
from datetime import datetime
import hashlib
import json
import collections

def nested_dict():
    return collections.defaultdict(nested_dict)

def scale_bm_fit(bm_fit, num_atoms_in_cell):
    """Scale BM fit such that it is "per atom"
    """
    res = {
        "E0": bm_fit["E0"] / num_atoms_in_cell,
        "bulk_deriv": bm_fit["bulk_deriv"],
        "bulk_modulus_ev_ang3": bm_fit["bulk_modulus_ev_ang3"],
        "min_volume": bm_fit["min_volume"] / num_atoms_in_cell,
        "residuals": bm_fit["residuals"],
    }
    return res

def scale_eos_data(eos_data, num_atoms_in_cell):
    """Scale eos_data such that it is "per atom"
    """
    return [[x[0]/num_atoms_in_cell, x[1]/num_atoms_in_cell] for x in eos_data]

def process_json(source_json, current_data, code):
    """ Process the source json file into the specified format and populate current_data
    
    contains the data for a specific <code> and <oxides/unaries>
    
    The BM fit and EOS data is scaled to be "per atom".
    """
    
    # Process BM fit
    
    for elem_crystal in source_json["BM_fit_data"]:
        elem, crystal_type = elem_crystal.split("-")
        bm_fit = source_json["BM_fit_data"][elem_crystal]
        num_atoms_in_sim_cell = source_json["num_atoms_in_sim_cell"][elem_crystal]
        
        if bm_fit == None:
            print(f"  {code} BM_fit_data {elem_crystal} is None, skipping.")
            continue
        
        current_data[elem][crystal_type][code]["bm_fit_per_atom"] = scale_bm_fit(bm_fit, num_atoms_in_sim_cell)
    
    # Process EOS data
    if "eos_data" in source_json:
        for elem_crystal in source_json["eos_data"]:
            elem, crystal_type = elem_crystal.split("-")
            eos_data = source_json["eos_data"][elem_crystal]
            num_atoms_in_sim_cell = source_json["num_atoms_in_sim_cell"][elem_crystal]

            if eos_data == None:
                print(f"  {code} eos_data {elem_crystal} is None, skipping.")
                continue

            current_data[elem][crystal_type][code]["eos_data_per_atom"] = scale_eos_data(eos_data, num_atoms_in_sim_cell)

In [None]:
data_folder_url = "https://raw.githubusercontent.com/aiidateam/acwf-verification-scripts/main/acwf_paper_plots/code-data/"
labels_file = data_folder_url + "/labels.json"

r = requests.get(labels_file)
labels_json = r.json()

In [None]:
meta = {"date": datetime.today().strftime('%Y-%m-%d'), "methods": nested_dict()}
data = nested_dict()

methods = labels_json['references'] | labels_json['methods-main']

for method in methods:
    
    if "short_label" in methods[method]:
        meta["methods"][method]["short_label"] = methods[method]["short_label"]
    elif method == "all-electron average":
        meta["methods"][method]["short_label"] = "AE average"
    
    for crystal_type in ['unaries', 'oxides']:
        
        fname = methods[method][crystal_type]
        
        print(f'Loading {fname}')
    
        r = requests.get(data_folder_url + fname)
        md5 = hashlib.md5(r.content).hexdigest()
        meta["methods"][method][crystal_type]["filename"] = fname
        meta["methods"][method][crystal_type]["md5"] = md5

        process_json(r.json(), data, method)

In [None]:
final_data = {
    "metadata": meta,
    "data": data
}

fname = "data.json"
with open(fname, 'w') as f:
    json.dump(final_data, f)
    #json.dump(final_data, f, indent=2)
    print(f"Wrote {fname}!")