# Creating mapping between the old and new prototype numbering system for IrO2 
---

# Import Modules

In [1]:
import os
import sys

import json
import random
import pickle
import collections

import numpy as np
from ase import io

import pandas as pd

from jupyter_modules.jupyter_helpers.following_tail import FollowingTail

from pymatgen.io.vasp.inputs import Poscar
from pymatgen.io.ase import AseAtomsAdaptor

import bulk_enumerator as be

# Script Inputs

In [2]:
parse_files = False
compare_old_new = False

# Methods

In [3]:
def compare_bulk_protos(row_i, row_j):
    """
    """
    props_to_compare = [
        "name_i",
        "parameter_values_i",
        "spacegroup_i",
        "species_i",
        "wyckoff_i",
        ]

    bool_compare_list = []
    for prop_k in props_to_compare:

        # *********************************************************************
        # *********************************************************************
        if prop_k == "name_i":
            val_i = row_i["prototype_info"][prop_k]
            val_j = row_j["prototype_info"][prop_k]

            if val_i == val_j:
                bool_compare_list.append(True)
            else:
                bool_compare_list.append(False)

        # *********************************************************************
        # *********************************************************************
        if prop_k == "parameter_values_i":
            tmp = 42

        # *********************************************************************
        # *********************************************************************
        if prop_k == "spacegroup_i":
            val_i = row_i["prototype_info"][prop_k]
            val_j = row_j["prototype_info"][prop_k]

            if val_i == val_j:
                bool_compare_list.append(True)
            else:
                bool_compare_list.append(False)

        # *********************************************************************
        # *********************************************************************
        if prop_k == "species_i":
            val_i = row_i["prototype_info"][prop_k]
            val_j = row_j["prototype_info"][prop_k]

            if list(set(val_i)) == list(set(val_j)):
                bool_compare_list.append(True)
            else:
                bool_compare_list.append(False)

        # *********************************************************************
        # *********************************************************************
        if prop_k == "wyckoff_i":
            val_i = row_i["prototype_info"][prop_k]
            val_j = row_j["prototype_info"][prop_k]

            if list(set(val_i)) == list(set(val_j)):
                bool_compare_list.append(True)
            else:
                bool_compare_list.append(False)


    return(bool_compare_list)

# Reading Files

## Reading Old Prototype Structures

In [4]:
if parse_files:
    root_path = os.path.join(
        os.environ["PROJ_irox"],
        "chris_prototypes_structures/fixedprototypesIrO2_old")

    master_list = []
    for root, dirs, files in os.walk(root_path):
        for file_i in files:
            if ".cif" in file_i:
                atoms_i = io.read(os.path.join(root, file_i))
                id_i = file_i.split("_")[0]

                sys_i = {
                    "atoms": atoms_i,
                    "id": id_i}

                master_list.append(sys_i)

    df_old = pd.DataFrame(master_list)

## Reading New Prototype Structures

In [5]:
if parse_files:
    root_path = os.path.join(
        os.environ["PROJ_irox"],
        "chris_prototypes_structures/fixedprototypesIrO2")

    master_list = []
    for root, dirs, files in os.walk(root_path):
        for file_i in files:
            if ".cif" in file_i:
                atoms_i = io.read(os.path.join(root, file_i))
                id_i = file_i.split("_")[0]

                sys_i = {
                    "atoms": atoms_i,
                    "id": id_i}

                master_list.append(sys_i)

    df_new = pd.DataFrame(master_list)

# Analyzing Prototype Structure

## Analyzing Old Prototypes

In [6]:
if parse_files:
    data_list = []
    for i_cnt, row_i in df_old.iterrows():
        atoms_i = row_i["atoms"]
        structure_i = AseAtomsAdaptor.get_structure(atoms_i)

        b = be.bulk.BULK()
        b.set_structure_from_file(Poscar(structure_i).get_string())

        spacegroup_i = b.get_spacegroup()
        species_i = b.get_species()
        wyckoff_i = b.get_wyckoff()
        name_i = b.get_name()
        parameter_values_i = b.get_parameter_values()

        row_dict_i = {
            "spacegroup_i": spacegroup_i,
            "species_i": species_i,
            "wyckoff_i": wyckoff_i,
            "name_i": name_i,
            "parameter_values_i": parameter_values_i,
            }
        data_list.append(row_dict_i)

    df_old = pd.concat(
        [df_old, pd.DataFrame(data_list)],
        axis=1, join_axes=[df_old.index],
        keys=["default_columns", "prototype_info"])

## Analyzing New Prototypes

In [7]:
if parse_files:
    data_list = []
    for i_cnt, row_i in df_new.iterrows():
        atoms_i = row_i["atoms"]
        structure_i = AseAtomsAdaptor.get_structure(atoms_i)

        b = be.bulk.BULK()
        b.set_structure_from_file(Poscar(structure_i).get_string())

        spacegroup_i = b.get_spacegroup()
        species_i = b.get_species()
        wyckoff_i = b.get_wyckoff()
        name_i = b.get_name()
        parameter_values_i = b.get_parameter_values()

        row_dict_i = {
            "spacegroup_i": spacegroup_i,
            "species_i": species_i,
            "wyckoff_i": wyckoff_i,
            "name_i": name_i,
            "parameter_values_i": parameter_values_i,
            }
        data_list.append(row_dict_i)

    df_new = pd.concat(
        [df_new, pd.DataFrame(data_list)],
        axis=1, join_axes=[df_new.index],
        keys=["default_columns", "prototype_info"])

## Writing/Reading Dataframes to File

In [8]:
if parse_files:
    with open("tmp_df_new.pickle", "wb") as fle:
        pickle.dump(df_new, fle)

    with open("tmp_df_old.pickle", "wb") as fle:
        pickle.dump(df_old, fle)

if not parse_files:
    with open("tmp_df_old.pickle", "rb") as fle:
        df_old = pickle.load(fle)
        
    with open("tmp_df_new.pickle", "rb") as fle:
        df_new = pickle.load(fle)

# Comparing Old Structures to New Structures

## Shortening old df to save time

In [9]:
# index_list = df_old.index.tolist()
        
# filtered_index_list = []
# for index_i in index_list:
#     if random.random() > 0.96:  # 0.60
#         filtered_index_list.append(index_i)

# print("Shortened length of df_old: ", len(filtered_index_list))

# df_old = df_old.loc[filtered_index_list,:]

## Constructing mapping between old and new labels

In [10]:
if compare_old_new:

    follow_tail = FollowingTail(n=35)
    follow_tail.activate()

    mapping_dict = {}
    for tmp_i, (i_cnt, row_i) in enumerate(df_old.iterrows()):

        str_i = str(tmp_i) + " | Processing old id: " + str(row_i["default_columns"]["id"])
        follow_tail(str_i)

        mapping_dict[row_i["default_columns"]["id"]] = []

        for j_cnt, row_j in df_new.iterrows():
            comp_list_j = compare_bulk_protos(row_i, row_j)
            if all(comp_list_j):
                mapping_dict[row_i["default_columns"]["id"]].append(row_j["default_columns"]["id"])
                str_i = "-   id match found: " + str(row_j["default_columns"]["id"])
                follow_tail(str_i)

        follow_tail("----------------------------")


    print("--------------------------> Done! <--------------------------")

    with open("mapping_dict.json", "w") as fle:
        json.dump(mapping_dict, fle, indent=2)

else:
    with open("mapping_iro2_ids_190326_1.json", "r") as fle:
        mapping_dict = json.load(fle)

## Analysing Mapping Dictionary

In [None]:
# mapping_dict

In [12]:
values_list = list(mapping_dict.values())

flattened_values = []
for i in values_list:
    flattened_values += i
    
print(
    "The following new IDs have more than 1 corresponding old ID: ",
    "\n",
    [item for item, count in collections.Counter(flattened_values).items() if count > 1]
    )

The following new IDs have more than 1 corresponding old ID:  
 ['0', '308', '245', '604', '449', '455', '655', '570', '682', '58']


In [13]:
for key, value in mapping_dict.items():
    if len(value) > 1:
        print("old_id: ", key, " | new_ids", value)
        print("")

old_id:  225  | new_ids ['0', '308']

old_id:  241  | new_ids ['245', '604']

old_id:  255  | new_ids ['449', '455']

old_id:  258  | new_ids ['0', '308']

old_id:  265  | new_ids ['449', '455']

old_id:  356  | new_ids ['149', '31']

old_id:  40  | new_ids ['107', '259']

old_id:  43  | new_ids ['570', '682']

old_id:  46  | new_ids ['245', '604']

old_id:  92  | new_ids ['570', '682']



In [14]:
for key, value in mapping_dict.items():
    if len(value) == 0:
        print("The following old ID doesn't have a mapping into the new ID list")
        print("old_id: ", key)

The following old ID doesn't have a mapping into the new ID list
old_id:  210
The following old ID doesn't have a mapping into the new ID list
old_id:  72


In [31]:
follow_tail = FollowingTail(n=35)
follow_tail.activate()

duplicates_list = []

for tmp_i, (i_cnt, row_i) in enumerate(df_new.iterrows()):
    id_i = row_i["default_columns"]["id"]
    str_i = str(tmp_i) + " | Processing old id: " + str(id_i)
    follow_tail(str_i)

    mapping_dict[row_i["default_columns"]["id"]] = []

    for tmp_j, (j_cnt, row_j) in enumerate(df_new.iterrows()):
        id_j = row_j["default_columns"]["id"]

        if i_cnt == j_cnt:
            continue

        comp_list_j = compare_bulk_protos(row_i, row_j)
        if all(comp_list_j):
            follow_tail(id_i)
            follow_tail(id_j)
            duplicates_list.append([id_i, id_j])

Output()

In [47]:
duplicates_list_2 = [set(i) for i in duplicates_list]
# [set(i) for i in duplicates_list][0] == [set(i) for i in duplicates_list][8]

In [57]:
duplicates_list_pruned = []
for i in duplicates_list_2:   
    if i not in duplicates_list_pruned:
        duplicates_list_pruned.append(i)

[print(i) for i in duplicates_list_pruned]

{'0', '308'}
{'259', '107'}
{'140', '23'}
{'31', '149'}
{'604', '245'}
{'420', '270'}
{'449', '455'}
{'478', '544'}
{'682', '570'}


[None, None, None, None, None, None, None, None, None]

In [63]:
# df_m = df_new
# df_m[
#     (df_m["default_columns"]["id"] == "0") &
# #     (df_m[""] == "") &
# #     (df_m[""] == "") &
#     [True for i in range(len(df_m))]
#     ]

In [64]:
# df_m[
#     (df_m["default_columns"]["id"] == "308") &
# #     (df_m[""] == "") &
# #     (df_m[""] == "") &
#     [True for i in range(len(df_m))]
#     ]

In [None]:
# # from time import sleep
# # from jupyter_modules.jupyter_helpers.following_tail import FollowingTail


# for i in range(300):
#     follow_tail(i)
#     sleep(0.2)

In [None]:
# index_list = df_old.index.tolist()
        
# filtered_index_list = []
# for index_i in index_list:
#     if random.random() > 0.9:
#         filtered_index_list.append(index_i)

# len(filtered_index_list)

# df_old = df_old.loc[filtered_index_list,:]

In [None]:
# index_list = df_old.index.tolist()

# filtered_index_list = []
# for index_i in index_list:
#     if random.random() > 0.9:
#         filtered_index_list.append(index_i)

# len(filtered_index_list)

# df_old = df_old.loc[filtered_index_list,:]

In [None]:
# row_i = df_old[df_old["default_columns"]["id"] == "225"].iloc[0]

# for j_cnt, row_j in df_new.iterrows():
#     comp_list_j = compare_bulk_protos(row_i, row_j)
#     if all(comp_list_j):
#         print(row_j)
#         print("")
#         print("")
#         print("")
# #         print("lskfksf")

In [None]:
# df_new.loc[0]

# df_new.loc[231]

In [None]:
# val_i = row_i["prototype_info"]["wyckoff_i"]
# val_j = row_j["prototype_info"]["wyckoff_i"]

# print(val_i)
# print(val_j)

# list(set(val_i)) == list(set(val_j))