# Parse Atoms Objects for IrO2 and IrO3 Unique Prototypes
---

In [1]:
%%capture
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys

import pickle

from ase import io
from ase.visualize import view
import pandas as pd


import bulk_enumerator as be
import time

from pymatgen.io.vasp.inputs import Poscar
from pymatgen.io.ase import AseAtomsAdaptor

# pd.set_option('display.max_rows', None)

# Script Inputs

In [10]:
tmp = 42

# Reading Structures

In [3]:
root_path = os.path.join(
    os.environ["PROJ_irox"],
    "chris_prototypes_structures",
    )

master_list = []
for root, dirs, files in os.walk(root_path):
    if ".ipynb_checkpoints" in root:
        continue

    if "iro2" in root:
        stoich_i = "AB2"
    elif "iro3" in root:
        stoich_i = "AB3"
    else:
        stoich_i = None

    if "oqmd" in root:
        source_i = "oqmd"
    else:
        source_i = "chris"

    for file_i in files:
        if ".POSCAR" in file_i or ".cif" in file_i:
            id_i = file_i.split("_")[0]

            path_i = root.replace("/mnt/c/Users/raulf/Dropbox/01_norskov/00_projects/", "")

            atoms_i = io.read(
                os.path.join(root, file_i))

            sys_i = {
                "id_old": int(id_i),
                "atoms": atoms_i,
                "stoich": stoich_i,
                "path": path_i,
                "source": source_i,
                }
            master_list.append(sys_i)

df_struct = pd.DataFrame(master_list)

# Setting Unique ID Tag

In [4]:
path_i = os.path.join(
    os.environ["PROJ_irox"],
    "data/ml_irox_data",
    "unique_ids.csv")
df_id = pd.read_csv(path_i)


id_mapp_iro2 = dict(zip(
    df_id[df_id["stoich"] == "AB2"]["id"],
    df_id[df_id["stoich"] == "AB2"]["unique_ids"]))

id_mapp_iro3 = dict(zip(
    df_id[df_id["stoich"] == "AB3"]["id"],
    df_id[df_id["stoich"] == "AB3"]["unique_ids"]))

In [5]:
def method(row_i):
    id_i = row_i["id_old"]

    if row_i["stoich"] == "AB2":
        unique_id_i = id_mapp_iro2[id_i]
    elif row_i["stoich"] == "AB3":
        unique_id_i = id_mapp_iro3[id_i]
    else:
        print("BADDDDD!!!!! fsdfjisajids")
        unique_id_i = None

    return(unique_id_i)

df_struct["id_unique"] = df_struct.apply(
    method,
    axis=1,
    )

df_struct.set_index("id_unique", inplace=True)

# Analyzing Structures with Bulk Enumerator

In [6]:
t0 = time.time()

data_list = []
for id_i, row_i in df_struct.iterrows():   
    atoms_i = row_i["atoms"]

    structure_i = AseAtomsAdaptor.get_structure(atoms_i)
    poscar_str_i = Poscar(structure_i).get_string()

    b = be.bulk.BULK()
    b.set_structure_from_file(poscar_str_i)

    spacegroup_i = b.get_spacegroup()
    species_i = b.get_species()
    wyckoff_i = b.get_wyckoff()
    name_i = b.get_name()
    parameter_values_i = b.get_parameter_values()

    row_dict_i = {
        "id": id_i,
        "spacegroup_i": spacegroup_i,
        "species_i": species_i,
        "wyckoff_i": wyckoff_i,
        "name_i": name_i,
        "parameter_values_i": parameter_values_i,
        }
    data_list.append(row_dict_i)


t1 = time.time()
print("time to complete for loop: ")
print(t1 - t0)

df_proto = pd.DataFrame(data_list)
df_proto.set_index("id", inplace=True)

print(len(df_proto["name_i"].to_list()))

print(len(
    set(df_proto["name_i"].to_list())
    ))

time to complete for loop: 
159.689435005188
967
942


# Save data to pickle

In [7]:
with open("data_structures.pickle", "wb") as fle:
    pickle.dump(df_struct, fle)

with open("data_prototypes.pickle", "wb") as fle:
    pickle.dump(df_proto, fle)

In [8]:
df_struct

Unnamed: 0_level_0,atoms,id_old,path,source,stoich
id_unique,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
95c29e9f6h,"(Atom('O', [0.0, 0.0, 1.8684114399999998], tag...",0,04_irox_surfaces_oer/chris_prototypes_structur...,chris,AB2
8ixs9szims,"(Atom('O', [0.0, 0.0, 1.413785], tag=0, index=...",100,04_irox_surfaces_oer/chris_prototypes_structur...,chris,AB2
be7wm39lmp,"(Atom('O', [6.6350561574, 3.5561670516, 1.1026...",101,04_irox_surfaces_oer/chris_prototypes_structur...,chris,AB2
bynub4vic2,"(Atom('O', [9.828870512, 10.196679652, 0.0], t...",102,04_irox_surfaces_oer/chris_prototypes_structur...,chris,AB2
mqxi8q7kvd,"(Atom('O', [9.0657, 11.106933011999999, 0.0], ...",103,04_irox_surfaces_oer/chris_prototypes_structur...,chris,AB2
81meck64ba,"(Atom('O', [0.0, 4.2656295, 9.564676192], tag=...",104,04_irox_surfaces_oer/chris_prototypes_structur...,chris,AB2
7fvk74mhcj,"(Atom('O', [11.94255, 5.6734275, 0.0], tag=0, ...",105,04_irox_surfaces_oer/chris_prototypes_structur...,chris,AB2
zon4z3vpvd,"(Atom('O', [15.380149139, 1.4932307459999998, ...",106,04_irox_surfaces_oer/chris_prototypes_structur...,chris,AB2
c48lx363be,"(Atom('Ir', [0.0, 0.0, 0.0], tag=0, index=0), ...",107,04_irox_surfaces_oer/chris_prototypes_structur...,chris,AB2
vhbaxp9wmh,"(Atom('O', [2.428007823, 3.72765, 9.73096929],...",108,04_irox_surfaces_oer/chris_prototypes_structur...,chris,AB2


In [9]:
# from random import choice

# def GetFriendlyID():
#     """
#     Create an ID string we can recognise.
#     (Think Italian or Japanese or Native American.)
#     """
#     v = 'qwertyuiopasdfghjkl12345'
#     c = 'zxcvbnm6789'
    
#     return ''.join([choice(v if i%2 else c) for i in range(10)])

# def GetUniqueFriendlyID(used_ids):
#     """
#     Return an ID that is not in our list of already used IDs.
#     """
#     # trying infinitely is a bad idea
#     LIMIT = 1000

#     count = 0
#     while count < LIMIT:
#         id = GetFriendlyID()
#         if id not in used_ids:
#             break
#         count += 1
#         id = ''
#     return id

# used_ids = set()
# for i in range(len(df_struct)):
#     id = GetUniqueFriendlyID(used_ids)
#     if not id:
#         print('something broke')
#         break
#     used_ids.add(id)

# df_struct.index = used_ids