In [23]:
import os, ase
from ase.db import connect
from ase import Atoms
from ase.io import read
import glob, re

# Functions in the cell below is for writing relaxed structures ase db

In [2]:
def is_converged(dir=".") -> bool:
    if not os.path.exists(dir):
        raise FileNotFoundError(f"{dir} directory does not exist !")
    else:
        start_dir = os.getcwd()
        os.chdir(dir)
        if not os.path.isfile("OUTCAR"):
            raise FileNotFoundError(f"No OUTCAR file was found inside {dir} directory !")
        else:
            for line in open("OUTCAR", "r"):
                if re.search("accuracy", line):
                    os.chdir(start_dir)
                    return True
            os.chdir(start_dir)
            return False

def read_incar(dir=".") -> dict:
    if not os.path.exists(dir):
        raise FileNotFoundError(f"{dir} directory does not exist !")
    else:
        start_dir = os.getcwd()
        os.chdir(dir)
        if not os.path.isfile("INCAR"):
            raise FileNotFoundError(f"No INCAR file was found inside {dir} directory !")
        else:
            calc_params = {}
            with open("INCAR", "r") as fi:
                next(fi)
                for line in fi:
                    key, _, value = line.split()
                    calc_params.update({key: value})
            os.chdir(start_dir)
            return calc_params

def get_relaxed_structure(dir=".") -> ase.Atoms:
    if not os.path.exists(dir):
        raise FileNotFoundError(f"{dir} directory does not exist !")
    else:
        start_dir = os.getcwd()
        os.chdir(dir)
        if not os.path.isfile(f"vasprun.xml"):
            raise FileNotFoundError(f"No vasprun.xml file was found inside {dir} directory !")
        else:
            atoms = read("vasprun.xml", format="vasp-xml", index=-1)   
            os.chdir(start_dir)
            return atoms

def add_to_db(ase_db, atoms, keys=None, meta_data=None, dir="."):
    if not os.path.exists(dir):
        raise FileNotFoundError(f"{dir} directory does not exist !")
    else:
        start_dir = os.getcwd()
        os.chdir(dir)
        ase_db.write(atoms, key_value_pairs=keys, data=meta_data)
        os.chdir(start_dir)

def write_relaxed_db(ase_db, dir):
    atoms = get_relaxed_structure(dir)

    ################## Meta-data for each row of ase db #################### 
    vasp_params = read_incar(dir)
    meta_data = {key.lower(): vasp_params[key] for key in ["ENCUT", "EDIFF", "EDIFFG"]}   
    if vasp_params["GGA"] == "PE" and vasp_params["IVDW"] == "12":
        meta_data.update({"xc" :"PBE+D3(BJ)"})
    ########################################################################

    ################## Keys for each row of ase db ######################### 
    keys = {}
    name = dir.split("/")[1]
    keys.update({"structure_name": name})
    keys.update({"relaxed": is_converged(dir)})
    ########################################################################

    add_to_db(ase_db, atoms, keys, meta_data, dir)       

# Below cell populates the search directories and is for writing relaxed_structures.db

In [3]:
os.chdir("/global/u2/s/sudheesh/mofs_1/dft")

linker_dirs = []
for dir in glob.glob("./linker-*/*/*/"):
    if "aimd" in dir:
        continue
    elif "linker-7" in dir:
        continue
    else:
        linker_dirs.append(dir)
linker_dirs.sort()

mof_dirs = []
for dir in glob.glob("./mof-*/*/*/"):
    if "aimd" in dir:
        continue
    elif "linker" in dir:
        continue
    elif "mof-7" in dir:
        continue
    elif "isif=2" in dir:
        continue
    elif "isif=4" in dir:
        continue
    else:
        mof_dirs.append(dir)
mof_dirs.sort()

system_dirs = []
for dir in glob.glob("./mof-*_linker-*/*/*/"):
    if "aimd" in dir:
        continue
    elif "ts0_relax" in dir:
        continue
    elif "linker-7" in dir:
        continue
    elif "diffusion_neb" in dir:
        continue
    else:
        system_dirs.append(dir)
system_dirs.sort()

search_dirs = linker_dirs + mof_dirs + system_dirs
# search_dirs

os.chdir("/global/u2/s/sudheesh/mofs_1/dft")
ase_db = connect("../databases/relaxed_structures.db", append=False)

for dir in search_dirs:
    write_relaxed_db(ase_db, dir)

# Functions in the cell below is for writing MD trajectories to ase db

In [24]:
def read_incar(dir=".") -> dict:
    if not os.path.exists(dir):
        raise FileNotFoundError(f"{dir} directory does not exist !")
    else:
        start_dir = os.getcwd()
        os.chdir(dir)
        if not os.path.isfile("INCAR"):
            raise FileNotFoundError(f"No INCAR file was found inside {dir} directory !")
        else:
            calc_params = {}
            with open("INCAR", "r") as fi:
                next(fi)
                for line in fi:
                    key = line.split("=")[0].replace(" ", "")
                    value = line.split("=")[1].replace("\n", "").strip()
                    calc_params.update({key: value})
            os.chdir(start_dir)
            return calc_params

def check_md_completion(dir=".") -> bool:
    if not os.path.exists(dir):
        raise FileNotFoundError(f"{dir} directory does not exist !")
    else:
        start_dir = os.getcwd()
        os.chdir(dir)
        with open("OSZICAR", "r") as fi:
            lines = fi.readlines()
            os.chdir(start_dir)
            return True if "T" in lines[-1] else False       
        
def get_md_trajectories(dir="."):
    if not os.path.exists(dir):
        raise FileNotFoundError(f"{dir} directory does not exist !")
    else:
        start_dir = os.getcwd()
        os.chdir(dir)
        if not os.path.isfile(f"vasprun.xml"):
            raise FileNotFoundError(f"No vasprun.xml file was found inside {dir} directory !")
        else:
            vasprun_files = [file for file in os.listdir() if "vasprun_" in file]
            vasprun_files.sort(key=lambda x: int(re.split("[_.]", x)[1]))
            vasprun_files.append("vasprun.xml")
            atoms_list = []
            for file in vasprun_files:
                atoms_list.extend(read(file, index=":", format="vasp-xml"))
            os.chdir(start_dir)
            return atoms_list if check_md_completion(dir) else atoms_list[:-1]  

def write_md_db(db_path, dir):
    atoms_list = get_md_trajectories(dir)
    vasp_params = read_incar(dir)

    ################## Meta-data for each row of ase db #################### 
    meta_data = {}
    for key in ["ENCUT", "EDIFF"]:
        dict_key = key.lower()
        dict_value = float(vasp_params.get(key)) if vasp_params.get(key) else None
        meta_data.update({dict_key: dict_value}) 
    if vasp_params["GGA"] == "PE" and vasp_params["IVDW"] == "12":
        meta_data.update({"xc" :"PBE+D3(BJ)"})
    ########################################################################

    ################## Keys for each row of ase db ######################### 
    keys = {}
    if "nvt" in dir:
        calc_name = "nvt"
    elif "npt" in dir:
        calc_name = "npt"
    keys.update({"calc_type": calc_name})

    for key in ["TEBEG", "PSTRESS"]:
        if key == "TEBEG":
            dict_key = "sim_temp"
            dict_value = float(vasp_params.get(key)) if vasp_params.get(key) else float("nan")  # stores in K
        elif key == "PSTRESS":
            dict_key = "sim_press"
            dict_value = 1000*float(vasp_params.get(key)) if vasp_params.get(key) else float("nan")  # stores in bar as in VASP, PSTRESS is defined in kbar
        keys.update({dict_key: dict_value})
    ########################################################################

    with connect(db_path, append=True) as ase_db:
        for atoms in atoms_list:
            ase_db.write(atoms, key_value_pairs=keys, data=meta_data)    

# Below cell populates the search directories and is for writing md trajectories ase db

In [40]:
os.chdir("/global/u2/s/sudheesh/mofs_linkers/dft")

linker_dirs = []
for dir in glob.glob("./linker-*/*/*/*"):
    if os.path.isdir(dir):
        if "relaxation" in dir:
            continue
        elif "linker-7" in dir:
            continue
        else:
            linker_dirs.append(dir)
    else:
        continue
linker_dirs.sort()

mof_dirs = []
for dir in glob.glob("./mof-*/*/*/*"):
    if os.path.isdir(dir):
        if "relaxation" in dir:
            continue
        elif "linker" in dir:
            continue
        elif "mof-7" in dir:
            continue
        else:
            mof_dirs.append(dir)
    else:
        continue
mof_dirs.sort()

system_dirs = []
for dir in glob.glob("./mof-*_linker-*/*/*/*"):
    if os.path.isdir(dir):
        if "relaxation" in dir:
            continue
        elif "diffusion_neb" in dir:
            continue
        # elif "constrained_NVT" in dir:
        #     continue
        elif "/nvt/" in dir:
            continue
        elif "linker-7" in dir:
            continue
        else:
            if os.listdir(dir):  # checking for non-empty directories
                system_dirs.append(dir)
    else:
        continue
system_dirs.sort()

search_dirs = linker_dirs + mof_dirs + system_dirs
# search_dirs

os.chdir("/global/u2/s/sudheesh/mofs_linkers/dft")
for i, dir in enumerate(search_dirs):
    if "constrained" in dir:
        db_name = dir.split("/")[1] + "_pore"
    else:
        db_name = dir.split("/")[1] 
    db_path = f"../databases/{db_name}.db"
    try:
        # write_md_db(db_path, dir)
        print(f"{i+1}: Structures from {dir} written into {db_name} ase db successfully.")
    except:
        print(f"\n{i+1}): Falied to write ase db from {dir}.\n")

1: Structures from ./mof-2_linker-2(1)/aimd/constrained_nvt/T=400_encut=400 written into mof-2_linker-2(1)_pore ase db successfully.
2: Structures from ./mof-3_linker-3(1)/aimd/constrained_nvt/T=400_encut=400 written into mof-3_linker-3(1)_pore ase db successfully.
3: Structures from ./mof-4_linker-4(1)/aimd/constrained_nvt/T=400_encut=400 written into mof-4_linker-4(1)_pore ase db successfully.
4: Structures from ./mof-5_linker-5(1)/aimd/constrained_nvt/T=400_encut=400 written into mof-5_linker-5(1)_pore ase db successfully.
5: Structures from ./mof-6_linker-6(1)/aimd/constrained_nvt/T=400_encut=400 written into mof-6_linker-6(1)_pore ase db successfully.
