In [2]:
import os, json
import numpy as np

from monty.re import regrep
from pymatgen.io.vasp.inputs import Incar
from ipywidgets import interact, fixed, FloatSlider, Text

# Processing the output into one datafile

The function below was used to produce `.json` files for each of the systems we have studied. It can take a bit of time to run, as it needs to parse all the `OUTCAR`'s of the various calculations. Hence, directly loading the data from a `.json` file is faster, and that is used in [the parallel_analysis.ipynb notebook](parallel_analysis.ipynb).

In [3]:
def process_parallel(data_dir, output_filename="parallel.json"):
    timing_list = []

    cores_pattern = r"\s+running\son\s+(\S+)\stotal\scores"
    loop_pattern = r"\s+LOOP:\s+cpu\stime\s+\S+:\sreal\stime\s+(\S+)"

    for nodes_dir in os.listdir(data_dir):

        nodes = int(nodes_dir.strip("nodes"))

        for kpar_dir in os.listdir(os.path.join(data_dir, nodes_dir)):

            kpar = int(kpar_dir.strip("kpar"))

            for npar_dir in os.listdir(os.path.join(data_dir, nodes_dir, kpar_dir)):

                npar = int(npar_dir.strip("npar"))

                nelmdl = np.abs(Incar.from_file(
                    os.path.join(data_dir, nodes_dir, kpar_dir, npar_dir, "INCAR")
                ).get("NELMDL", 5))

                try:
                    loop_timing = regrep(
                        filename=os.path.join(data_dir, nodes_dir, kpar_dir, npar_dir, "OUTCAR"),
                        patterns={"loop": loop_pattern})["loop"]

                    if len(loop_timing) > nelmdl:
                        average_loop = np.mean([float(e[0][0]) for e in loop_timing][nelmdl:])
                        total_cores = int(regrep(
                            filename=os.path.join(data_dir, nodes_dir, kpar_dir, npar_dir, "OUTCAR"),
                            patterns={"cores": cores_pattern})["cores"][0][0][0])
                        ncore = total_cores // kpar // npar

                        timing_list.append(
                            {"nodes": nodes, "kpar": kpar, "ncore": ncore,
                             "npar": npar, "timing": average_loop}
                        )
                except:
                    print("Failed: " + str(nodes) + " " + str(npar) + " "  + str(kpar))

    with open(output_filename, "w") as file:
        file.write(json.dumps(timing_list))

The cells below process each process one set of data into a corresponding json file and save it to the `data` directory.

**Note: The notebook does not have access any of the `data_dir`'s used below when running on Binder. It it simply added to the repository for completeness**

### Leibniz

In [16]:
process_parallel("/mnt/data/mbercx/leibniz/U4O8/84bands_455kpoints/", "data/leibniz_UO2_A12_K455_pbe.json")

In [15]:
process_parallel("/mnt/data/mbercx/leibniz/U60O140/1008bands_6kpoints/", "data/leibniz_U3O7_A200_K6_pbe.json")

### Breniac

In [4]:
process_parallel("/mnt/data/mbercx/breniac/U4O8/84bands_455kpoints/", "data/breniac_UO2_A12_K455_pbe.json")

In [6]:
process_parallel("/mnt/data/mbercx/breniac/U12O32/420bands_60kpoints/", "data/breniac_U3O8_A44_K60_pbe.json")

Failed: 16 7 2


In [22]:
process_parallel("/mnt/data/mbercx/breniac/U60O140/1008bands_6kpoints/", "data/breniac_U3O7_A200_K6_pbe.json")

In [17]:
process_parallel("/mnt/data/mbercx/breniac/Al2CuS4_hse/56bands_40kpoints/", "data/breniac_Al2CuS4_A8_K40_hse.json")