In [1]:
import numpy as np
from pymatgen.io.vasp.outputs import Vasprun, Outcar
from pymatgen.io.ase import AseAtomsAdaptor
from ase.io import read
from pymatgen.io.vasp import Chgcar, Potcar

path = "/root/github_dev/github_pyiron/pyiron_atomistics/tests/static/vasp_test_files/full_job_sample"

def parse_vasp_output_pymatgen(path):
    # Parse the vasprun.xml file
    vr = Vasprun(filename=f"{path}/vasprun.xml",
                 parse_projected_eigen=True,
                 separate_spins=False)

    # Initialize the output dictionary
    output = {}
    output["generic"] = {}
    output["description"] = 'This contains all the output static from this particular VASP run'

    # 1. Get ASE atoms object via pymatgen
    output["structure"] = AseAtomsAdaptor.get_atoms(vr.final_structure)

    # # 2. Get ASE atoms object from ASE
    # output["structure"] = read(f"{path}/CONTCAR")

    # 3. Get charge density from CHGCAR
    # For charge density, I'm not too clear on the details of what exactly is happening,
    # but it appears it is spawned from pymatgen's parser originally anyway (per their comments in vasp.volumetric_data)
    output["charge_density"] = Chgcar.from_file(f"{path}/CHGCAR")

    # 4. Extract generic information
    output["generic"]["temperature"] = vr.parameters['TEBEG'] # this is just wrong in pyiron's current parsing. defaults to 0, is not 0.
    # this is NOT actually 0, vasp does calculations at finite temperatures, even in DFT. This is for faster convergence - 0.0001 K is the default

    # Here we use the convention that positive stress is compressive, as is done in ASE
    # Units here are eV/A^3
    output["generic"]["stresses"] = [-np.array(step["stress"])/1600.21766208 for step in vr.ionic_steps]
    # Don't use pressures entry anymore, I have no idea what it even is - just for the love of god, just let it die!!!
    output["generic"]["pressures"] = output["generic"]["stresses"]
    output["generic"]["forces"] = [step['forces'] for step in vr.ionic_steps]
    # Similarly to pressures, the cell object in generic interface just needs to die - all useful information is in the structure
    # The edge case is where structures are extremely large and might not fit into memory, but cells do,
    # but it's so niche and pollutes the interface for no good reason. 
    # I feel like most users aren't doing calculations with structures that don't fit into memory
    output["generic"]["cells"] = [AseAtomsAdaptor.get_atoms(step["structure"]).cell for step in vr.ionic_steps]
    # See above - is this necessary?
    output["generic"]["volume"] = [step["structure"].lattice.volume for step in vr.ionic_steps]
    # output_pyiron["generic"]["energy_pot"] This is e_fr_energy in pymatgen
    # in pymatgen
    # 'e_fr_energy': -17.73798679,
    #  'e_wo_entrp': -17.72353582,
    #  'e_0_energy': -17.7331698}
    # pyiron: 
    # output_pyiron["generic"]["energy_pot"] -> array([-17.73798679])
    # output_pyiron["generic"]["energy_tot"] -> array([-17.73798679])
    output["generic"]["energy_pot"] = [step['electronic_steps'][-1]['e_wo_entrp'] for step in vr.ionic_steps]
    # output_pyiron["generic"]["energy_tot"] This is also equivalent to e_fr_energy in pymatgen (e_tot = e_pot (!?))
    output["generic"]["energy_tot"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]
    # output["generic"]["energy_tot"] = [step['electronic_steps'][-1]['e_0_energy'] for step in vasprun.ionic_steps] # energy sigma-> 0 ("real" free energy at 0K)

    # For some reason steps is acquired by calling np.arange(len(steps)) in current pyiron parser. Surely returning a list when 
    # the expected value is an integer is nonsensical? arange also generates 0 in the event of len=1...
    output["generic"]["steps"] = len(vr.ionic_steps)
    # Another grievous offender that pollutes the interface for no good reason...
    # introducing "positions", when "structures" is present in output is clearly enough. Doubling storage in hdf for no new information is just nasty work
    output["generic"]["positions"] = [AseAtomsAdaptor.get_atoms(step["structure"]).positions for step in vr.ionic_steps]

    # 5. Read elastic constants from OUTCAR
    try:
        elastic_constants = Outcar(filename=f"{path}/OUTCAR").read_elastic_tensor()
    except Exception as e:
        elastic_constants = None
    output["generic"]["elastic_constants"] = elastic_constants

    # 6. Extract DFT information
    # Notes: here is the limitations of pymatgen parsing - they only parse the final step.
    outcar = Outcar(filename=f"{path}/OUTCAR")
    output["generic"]["dft"] = {}
    output["generic"]["dft"]["n_elect"] = vr.parameters["NELECT"]

    # NOTE: Ahmed look here (?) probably needs to just use our own parser
    output["generic"]["dft"]["potentiostat_output"] = [] 

    output["generic"]["dft"]["magnetization"] = [outcar.magnetization]
    # pymatgen Outcar parser alternative only allows us to parse last step, but does not affect final_magmoms here
    output["generic"]["dft"]["final_magmoms"] = [ionic_entry["tot"] for ionic_entry in outcar.magnetization]
    # pymatgen only allows the calculation of the fermi energy on the last step
    output["generic"]["dft"]["e_fermi_list"] = np.array(vr.calculate_efermi())
    # pymatgen only allows the calculation of the valence band maximum on the last step
    # NOTE: SERIOUS PROBLEMS - VBM CALCULATION IS DIFFERENT IN PYIRON VS PYMATGEN - (I (Han) have no idea :))
    output["generic"]["dft"]["vbm_list"] = vr.eigenvalue_band_properties[1]
    # pymatgen only allows the calculation of the conduction band minimum on the last step
    # NOTE: SERIOUS PROBLEMS - CBM CALCULATION IS DIFFERENT IN PYIRON VS PYMATGEN - (I (Han) have no idea :))
    output["generic"]["dft"]["cbm_list"] = vr.eigenvalue_band_properties[2]
    # pymatgen only allows parsing of the final total energy contribution...
    output["generic"]["dft"]["ediel_sol"] = outcar.final_energy_contribs["Ediel_sol"]
 

    output["generic"]["dft"]["ediel_sol"] = outcar.final_energy_contribs["Ediel_sol"]
    output["generic"]["dft"]["potentiostat_output"] = []

    # Read POTCAR file for valence charges - no support in pymatgen for extracting valence from vasprun (although possible)
    # NOTE: Commented out because we don't ahve valid POTCAR for testing here
    # potcar = Potcar.from_file(f"{path}/POTCAR")
    #output["generic"]["dft"]["valence_charges"] = [ps.nelectrons for ps in potcar]

    # SCF energies
    output["generic"]["dft"]["scf_dipole_mom"] = [] # TODO: Likely needs to default to OUTCAR parsing via pyiron OUTCAR
    output["generic"]["dft"]["scf_energy_int"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]
    output["generic"]["dft"]["scf_energy_free"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]
    output["generic"]["dft"]["scf_energy_zero"] = [step['electronic_steps'][-1]['e_0_energy'] for step in vr.ionic_steps]
    output["generic"]["dft"]["energy_int"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]
    output["generic"]["dft"]["energy_free"] = [step['electronic_steps'][-1]['e_fr_energy'] for step in vr.ionic_steps]
    output["generic"]["dft"]["energy_zero"] = [step['electronic_steps'][-1]['e_0_energy'] for step in vr.ionic_steps]

    # 7. Extract band structure and DOS
    output["generic"]["dft"]["bands"] = {}
    # NOTE: I'm in favour of just doing:
    # output["generic"]["dft"]["bands"] = vr.get_band_structure().as_dict()
    # BUT if you want to keep the prior structured output
    vr_bs_dict = vr.get_band_structure().as_dict()
    output["generic"]["dft"]["bands"]["k_points"] = vr.actual_kpoints
    return output


In [2]:
path = "/root/github_dev/github_pyiron/pyiron_atomistics/tests/static/vasp_test_files/full_job_sample"

In [3]:
%%timeit -n100
output = parse_vasp_output_pymatgen(path)

/root/github_dev/github_pyiron/pyiron_atomistics/tests/static/vasp_test_files/full_job_sample/POTCAR


33.4 ms ± 665 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [4]:
from pyiron_atomistics.vasp.output import parse_vasp_output

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
%%timeit -n100
output_pyiron = parse_vasp_output(path)

21.3 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [6]:
path_real = "/root/github_dev/dev_vasp_scraper/As-30-d-2.5"

In [7]:
%%timeit -n1
output = parse_vasp_output_pymatgen(path_real)

3.34 s ± 104 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [8]:
%%timeit -n1
output_pyiron = parse_vasp_output(path_real)

2.92 s ± 61.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
