In [1]:
from pathlib import Path
# import nfp
import numpy as np
import pandas as pd
import tarfile
from pymatgen.core import Structure
import json
import gzip
from glob import glob
from tqdm.notebook import tqdm
tqdm.pandas()

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(context='talk', style='ticks',
        color_codes=True, rc={'legend.frameon': False})

%matplotlib inline

In [3]:
def extract_strc_from_tar(tar, filename):
    f = tar.extractfile(filename)
    contents = f.read().decode()
    strc = Structure.from_str(contents, fmt='poscar').as_dict()
    return strc

In [5]:
# Prashun shared the volume relaxed output files as .tar.gz 
# try reading them directly without extracting
comp_type_archives = list(glob("/projects/rlmolecule/pgorai/volrelax/relaxed_output/*.tar.gz"))
# comp_type_archive = "/projects/rlmolecule/pgorai/volrelax/relaxed_output/_3_3_4.tar.gz"
unrel_poscar = {}
rel_contcar = {}
failed = set()
flagged = set()
flagged_out_dir = "/projects/rlmolecule/jlaw/inputs/structures/volrelax/distance_warnings"
flagged_out = open(f"{flagged_out_dir}/distance_warnings.txt", 'w')

for comp_type_archive in tqdm(comp_type_archives):
    print(comp_type_archive)
    with tarfile.open(comp_type_archive, "r:gz") as tar:
        for tarinfo in tar:
            # print(f"{tarinfo.name} is {tarinfo.size} bytes in size and is: ")
            if len(tarinfo.name.split('/')) > 2:
                strc_id = tarinfo.name.split('/')[2]
            if tarinfo.name.endswith("POSCAR"):
                continue
                # try:
                #     strc = extract_strc_from_tar(tar, tarinfo.name)
                #     unrel_poscar[strc_id] = strc
                # except ValueError:
                #     failed.add(strc_id)
            elif tarinfo.name.endswith("CONTCAR"):
                continue
                # try:
                #     strc = extract_strc_from_tar(tar, tarinfo.name)
                #     rel_contcar[strc_id] = strc
                # except ValueError:
                #     failed.add(strc_id)
            elif tarinfo.name.endswith("stdout"):
                f = tar.extractfile(tarinfo.name)
                content = f.read().decode()
                if 'distance between some ions is very small' in content:
                    flagged.add(strc_id)
                    flagged_out.write(strc_id + "\n")
                    flagged_out.write(content + "\n\n")
flagged_out.close()

  0%|          | 0/49 [00:00<?, ?it/s]

/projects/rlmolecule/pgorai/volrelax/relaxed_output/_1_2_3_3.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_3_3_4.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_1_4_5.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_2_2_3.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_1_4_4.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_1_3_3_3.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_1_3_4.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_1_1_1_5.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_1_1_1.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_1_1_1_2_2.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_2_3.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_1_1_1_2_5.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_1_1_4_4.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_2_3_3.tar.gz
/projects/rlmolecule/pgorai/volrelax/relaxed_output/_1_1_7.tar

In [77]:
print(f"{len(flagged)} flagged, {len(unrel_poscar)} unrel_poscar, {len(rel_contcar)} rel_contcar")

8014 flagged, 30522 unrel_poscar, 30324 rel_contcar


In [70]:
def write_structures_file(structures_file, structures_dict, round_float=None):
    """ Write pymatgen structures to a gzipped json file
    *round_float*: round floats in json file to the specified number
    """

    def round_floats(o):
        if isinstance(o, float): return round(o, round_float)
        if isinstance(o, dict): return {k: round_floats(v) for k, v in o.items()}
        if isinstance(o, (list, tuple)): return [round_floats(x) for x in o]
        return o

    if round_float is not None:
        structures_dict = round_floats(structures_dict)

    print(f"writing {structures_file}")
    with gzip.open(structures_file, 'w') as out:
        out.write(json.dumps(structures_dict).encode())

In [79]:
out_dir = "/projects/rlmolecule/jlaw/inputs/structures/volrelax"
# the poscar file was not properly formatted for pymatgen
# unrel_file = f"{out_dir}/battery_unrelaxed_vol_structures.json.gz"
rel_file = f"{out_dir}/battery_relaxed_vol_structures.json.gz"
# write_structures_file(unrel_file, unrel_poscar, round_float=5)
write_structures_file(rel_file, rel_contcar, round_float=5)

writing /projects/rlmolecule/jlaw/inputs/volrelax/battery_unrelaxed_vol_structures.json.gz
writing /projects/rlmolecule/jlaw/inputs/volrelax/battery_relaxed_vol_structures.json.gz


In [161]:
out_dir = "/projects/rlmolecule/jlaw/inputs/structures/volrelax"
with open(f"{out_dir}/distance_warning_strcs.txt", 'w') as out:
    out.write('\n'.join(flagged))