# Exercise 2: Working with pyiron tables analyze data

The `PyironTable` class is used to analyze data genreated in the previous notebook! Make sure you have run the first notebook before you go over this one.

In [1]:
import numpy as np
import matplotlib.pylab as plt

In [2]:
from pyiron import Project
from pyiron.table.datamining import PyironTable

In [3]:
pr = Project("demo/potential_scan")

## Creating a pyiron table

The idea behind using pyiron tables is to analyze a dataset by applying certain functions on them in a highly automated way

In [4]:
pt = PyironTable(pr, "murn_table")

### Defining a 'filter' function

A filtering of the dataser is necessary before functions (some of which are expensive) are applied on the dataset

In [14]:
def filter_function(job):
    return (job.status == "finished") & ("murn" in job.job_name)

In [15]:
pt.filter_function = filter_function

## Defining the set of functions

The following functions are going to be used on the dataset

In [7]:
def get_lattice_parameter(job):
    return job["output/equilibrium_volume"] ** (1/3)

def get_bm(job):
    return job["output/equilibrium_bulk_modulus"]

def get_pot(job):
    child = job.project.inspect(job["output/id"][0])
    return child["input/potential/Name"]

def get_bulk_energy_size(job, size=4):
    a = get_lattice_parameter(job)
    el = job["output/structure/species"][0]
    ref_supercell = pr.create_ase_bulk(el, a=a).repeat(size)
    pot = get_pot(job)
    ref_job_name = "rj_{}_s_{}".format(pot, size).replace("-", "_")
    df = pr.job_table()
    if not ref_job_name in df[df.status=="finished"].job.to_list():
        ref_job = pr.create_job(pr.job_type.Lammps, ref_job_name)
        ref_job.structure = ref_supercell
        ref_job.potential = pot
        ref_job.calc_minimize()
        ref_job.run()
    e_ref = pr.inspect(ref_job_name)["output/generic/energy_tot"][-1]
    n_ref = pr.inspect(ref_job_name)["output/generic/positions"].shape[1]
    return e_ref, n_ref 

def get_vac_formation_energy(job, size=4):
    a = get_lattice_parameter(job)
    el = job["output/structure/species"][0]
    ref_supercell = pr.create_ase_bulk(el, a=a).repeat(size)
    def_supercell = ref_supercell[0:-1]
    pot = get_pot(job)
    ref_job_name = "rj_{}_s_{}".format(pot, size).replace("-", "_")
    def_job_name = "dj_{}_s_{}".format(pot, size).replace("-", "_")
    df = pr.job_table()
    if not ref_job_name in df[df.status=="finished"].job.to_list():
        ref_job = pr.create_job(pr.job_type.Lammps, ref_job_name)
        ref_job.structure = ref_supercell
        ref_job.potential = pot
        ref_job.calc_minimize()
        ref_job.run()
    e_ref = pr.inspect(ref_job_name)["output/generic/energy_tot"][-1]
    n_ref = pr.inspect(ref_job_name)["output/generic/positions"].shape[1]
    if not def_job_name in df[df.status=="finished"].job.to_list():
        def_job = pr.create_job(pr.job_type.Lammps, def_job_name)
        def_job.structure = def_supercell
        def_job.potential = pot
        def_job.calc_minimize()
        def_job.run()
    e_def = pr.inspect(def_job_name)["output/generic/energy_tot"][-1]
    n_def = pr.inspect(def_job_name)["output/generic/positions"].shape[1]
    return (e_def - e_ref * (n_def/n_ref))

def get_cohesive_energy(job, size=4):
    e_bulk, n_bulk = get_bulk_energy_size(job, size)
    pot = get_pot(job)
    atom_job_name = "aj_{}_s_{}".format(pot, size).replace("-", "_")
    df = pr.job_table()
    if not atom_job_name in df[df.status=="finished"].job.to_list():
        atom_job = pr.create_job(pr.job_type.Lammps, atom_job_name)
        el = job["output/structure/species"][0]
        atom_job.structure = pr.create_atoms(el, cell=np.eye(3)* 20, scaled_positions=[[0.5, 0.5, 0.5]])
        atom_job.potential = pot
        atom_job.calc_static()
        atom_job.run()
    e_atom = pr.inspect(atom_job_name)["output/generic/energy_tot"][-1]
    return e_bulk / n_bulk - e_atom


### The functions are appended to the table

In [8]:
pt.add["a_eq"] = get_lattice_parameter
pt.add["bulk_modulus"] = get_bm
pt.add["potential"] = get_pot
pt.add["vac_formation"] = get_vac_formation_energy
pt.add["ecoh"] = get_cohesive_energy

In [16]:
# Decides if the individual jobs are loaded or not during the creation of the table
pt.convert_to_object = False

### Creation of the table

All functions are now applied

In [18]:
pt.create_table()

100%|██████████| 9/9 [00:03<00:00,  2.33it/s]


In [None]:
### Output the computed dataset as a pandas dataframe

In [11]:
df = pt.get_dataframe().drop(["col_0", "col_1", "col_2"], axis=1)

In [12]:
df = df.set_index("potential")

In [13]:
df
#df.to_csv("potential_validation_Al.csv")

Unnamed: 0_level_0,job_id,a_eq,bulk_modulus,vac_formation,ecoh
potential,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Al_Mg_Mendelev_eam,732,4.045415,89.015487,0.667786,-3.410657
Zope_Ti_Al_2003_eam,744,4.049946,80.836779,0.720309,-3.298766
Al_H_Ni_Angelo_eam,756,4.049954,81.040445,0.546216,-3.36
2000--Landa-A--Al-Pb--LAMMPS--ipr1,770,4.031246,78.213776,0.688258,-3.35928
2004--Zhou-X-W--Al--LAMMPS--ipr2,782,4.050316,71.546634,0.66806,-3.579979
2003--Zope-R-R--Al--LAMMPS--ipr1,794,4.049946,80.836777,0.720309,-3.353921
2015--Mendelev-M-I--Al-Sm--LAMMPS--ipr1,806,4.041196,85.017411,0.76364,-3.905149
1997--Liu-X-Y--Al-Mg--LAMMPS--ipr1,818,4.032659,83.49856,0.70411,-3.360052
2004--Zhou-X-W--Al--LAMMPS--ipr1,830,4.050315,71.546946,0.668085,-3.579978
