In [None]:
import ase.io 
import glob 
from os.path import join
import re
import json
from random import shuffle

In [None]:
#test_vasprun_file = "./aln_gan_sic_vaspruns/equil/aln/533_orthog/vasprun.xml"
test_vasprun_file = "./aln_gan_sic_vaspruns/aimd/interface_aln-gan_aln-lp/set1_nvt/300K/vasprun1443.xml"
metal_al_vasprun = "./aln_gan_sic_vaspruns/sample_al_vasprun.xml"


In [None]:
test_structure = ase.io.read(test_vasprun_file)
al_structure = ase.io.read(metal_al_vasprun)

In [None]:
test_structure.get_total_energy()

-2891.63088361
compared with:
```
2976   <energy>
2977    <i name="e_fr_energy">  -2891.63088361 </i>
2978    <i name="e_wo_entrp">  -2891.63088361 </i>
2979    <i name="e_0_energy">     -0.00000000 </i>
```

In [None]:
ase.io.write("test_vasprun_AlN-GaN_set1_300K_1443.xyz", test_structure, format="extxyz")

In [None]:
calc = test_structure.get_calculator()
calc.get_property("free_energy")
test_structure.get_potential_energy()

In [None]:
print(al_structure.get_potential_energy())
print(al_structure.get_potential_energy(force_consistent=True))
print(al_structure.get_calculator().get_property("free_energy"))
print(al_structure.calc.results)

Al (metallic) system does have different free energies vs energy --> 0
-537.64340722
-537.64368131
-537.64368131
{'energy': -537.64340722, 'forces': array([[-0.,  0., -0.],
...
       [ 0., -0., -0.]]), 'stress': array([-0.02452045, -0.03148586,  0.00181242, -0.        , -0.00148918,
       -0.        ]), 'free_energy': -537.64368131}

In [None]:
al_structure.calc.results["energy"] = 1.0
print(al_structure.get_potential_energy())
print(al_structure.get_potential_energy(force_consistent=True))


In [None]:
def compute_binding_energy(raw_atoms, atomic_energy_dict):
    elems = raw_atoms.get_chemical_symbols()

    #For MLIP fitting, only care about free energies (consistent w/ forces)
    #See: https://libatoms.github.io/GAP/gap_fit.html

    pot_eng = raw_atoms.get_potential_energy(force_consistent=True)
    assert pot_eng == raw_atoms.get_calculator().get_property("free_energy") # for my own sanity

    atomic_e_contrib = 0.0
    for elem in elems:
        atomic_e_contrib += atomic_energy_dict[elem]
    
    cohesive_eng = pot_eng - atomic_e_contrib

    return cohesive_eng


In [None]:
atomic_energy_dict = {"Al": -0.01050279,
                      "Ga": -0.01074943, 
                      "N" : -0.01594183,
                      "Si": -0.01624861, 
                      "C" : -0.01677834,}

In [None]:
test_structure.calc.results["energy"] = compute_binding_energy(test_structure, atomic_energy_dict)

In [None]:
ase.io.write("test2_vasprun_AlN-GaN_set1_300K_1443.xyz", test_structure, format="extxyz")

In [None]:
test_folder = "./aln_gan_sic_vaspruns/aimd/interface_aln-gan_aln-lp/set1_nvt/300K/"
vasprun_fnames = join(test_folder, "vasprun*.xml")
vasprun_files = sorted(glob.glob(vasprun_fnames), key=lambda x: int(re.match(r".*vasprun(\d+)\.xml$",x).group(1)))

In [None]:
configs = []
for vasp_xml_fname in vasprun_files:
    cfg = ase.io.read(vasp_xml_fname)
    cfg.calc.results["energy"] = compute_binding_energy(cfg,atomic_energy_dict)
    configs.append(cfg)

In [None]:
ase.io.write("multiple_files_test.xyz", configs, format="extxyz")

In [None]:
# from scripts I used to convert vaspruns to raw arrays/numpy arrays
def get_xml_files(xmldir, file_nums):
    filelist = []
    if file_nums  == '*':
        filelist = glob.glob(join(xmldir, 'vasprun*.xml'))

    elif isinstance(file_nums,list):
        for num_range in file_nums:
            single_num = re.match(r'^(\d+)$', num_range)
            number_range = re.match(r'^(\d+)\.\.(\d+)$', num_range)

            if single_num:
               filelist.append(join(xmldir,'vasprun{:d}.xml'.format(int(single_num[1]))))
            elif number_range and (int(number_range[1]) < int(number_range[2])):
               just_vaspruns = glob.glob(join(xmldir, 'vasprun*.xml'))
               filelist = filelist + [f  for f in just_vaspruns \
                                      if ( int(number_range[1]) <= int(re.findall(r'\d+', f)[-1]) <= int(number_range[2]) ) ]
            else:
                print("ERROR: incorrectly specifiec number range for xml files for " + xmldir)
    else:
       print("ERROR: incorrectly specifiec file_nums dict value for " + xmldir)

    filelist = sorted(filelist, key=lambda s: list(map(int, re.findall(r'\d+', s))))
    return filelist

In [None]:
settype_dict = {"trainval": "simple_sic-gan-aln_trainval_data.json", 
                "test"    : "simple_sic-gan-aln_test_data.json"}
xyz_repo_dir = "./xyz_files"
for (settype, json_fname) in settype_dict.items():
    print(settype)
    outdir = join(xyz_repo_dir, settype)

    with open(json_fname, "r") as inputjsonfile:
        set_spec = json.load(inputjsonfile)
    for system in set_spec["systems"]:
        print(system["name"])

        sys_xml_files = []
        for file_set in system["trainval"]:
            xml_files = get_xml_files(file_set["xml_dir"],file_set["file_nums"])
            sys_xml_files += xml_files
        print(sys_xml_files)
            
        shuffle(sys_xml_files)
        
        if settype == "trainval":
            equil_vasprun = system["equil"][0]["xml_file"]
            sys_xml_files = [equil_vasprun] + sys_xml_files

        sys_configs = []
        for sys_xml_file in sys_xml_files:
            cfg = ase.io.read(sys_xml_file)
            cfg.calc.results["energy"] = compute_binding_energy(cfg,atomic_energy_dict)
            sys_configs.append(cfg)
        
        out_fname = f'{system["name"]}_{settype}.xyz'
        print(join(outdir,out_fname))
        ase.io.write(join(outdir,out_fname), sys_configs, format="extxyz")
        