# Import Modules

In [1]:
import os
print(os.getcwd())
import sys

import json

import numpy as np
import pandas as pd

from pymatgen.io.ase import AseAtomsAdaptor

from mpcontribs.client import load_client

/mnt/f/Dropbox/01_norskov/00_git_repos/PROJ_IrOx_Active_Learning_OER/CatHub_MPContribs_upload/MPContribs_upload


# Read IrOx DFT Data

In [2]:
# #########################################################
import pickle; import os
path_i = os.path.join(
    os.environ["PROJ_irox"],
    "workflow/ml_modelling/processing_bulk_dft/creating_final_dataset_for_upload",
    "out_data/df_dft_final_no_dupl.pickle")
with open(path_i, "rb") as fle:
    df_dft = pickle.load(fle)
# #########################################################

df_dft = df_dft.drop(columns=["id", "form_e_chris", "path", "source"])

In [3]:
df_dft = df_dft.sort_values(["stoich", "dH"])


# #########################################################
df_dft = df_dft.iloc[0:16]
# df_dft = df_dft.iloc[0:80]
# df_dft = df_dft.iloc[0:260]

# df_dft = df_dft.sample(n=10)

In [4]:
%%capture

sys.path.insert(0, 
    os.path.join(
        os.environ["PROJ_irox"],
        "workflow/ml_modelling"))

from ml_methods import get_ml_dataframes
DF_dict = get_ml_dataframes(
    names=[
        'bulk_dft_data_path',
        'unique_ids_path',
        'prototypes_data_path',
        'static_irox_structures_path',
        'static_irox_structures_kirsten_path',
        'oqmd_irox_data_path',
        'df_features_pre_opt_path',
        'df_features_pre_opt_kirsten_path',
        'df_features_post_opt_path',
        'oer_bulk_structures_path',
        'df_ccf_path',
        'df_dij_path',
        'ids_to_discard__too_many_atoms_path',
        'df_prototype_dft_path',
        'df_prototype_static_path',
        ]
    )

bulk_dft_data = DF_dict["bulk_dft_data"]
unique_ids = DF_dict["unique_ids"]
prototypes_data = DF_dict["prototypes_data"]
static_irox_structures = DF_dict["static_irox_structures"]
static_irox_structures_kirsten = DF_dict["static_irox_structures_kirsten"]
oqmd_irox_data = DF_dict["oqmd_irox_data"]
df_features_pre_opt = DF_dict["df_features_pre_opt"]
df_features_pre_opt_kirsten = DF_dict["df_features_pre_opt_kirsten"]
df_features_post_opt = DF_dict["df_features_post_opt"]
oer_bulk_structures = DF_dict["oer_bulk_structures"]
df_ccf = DF_dict["df_ccf"]
df_dij = DF_dict["df_dij"]
ids_to_discard__too_many_atoms = DF_dict["ids_to_discard__too_many_atoms"]
df_prototype_static = DF_dict["df_prototype_static"]
df_prototype_dft = DF_dict["df_prototype_dft"]

In [5]:
# assert False

In [6]:
# #########################################################
import pickle; import os
path_i = os.path.join(
    os.environ["PROJ_irox"],
    "workflow/ml_modelling/processing_bulk_dft/creating_final_dataset_for_upload",
    "out_data/df_dft_final_no_dupl.pickle")
with open(path_i, "rb") as fle:
    df_dft_new = pickle.load(fle)
# #########################################################

In [7]:
# print("Why are these numbers different")
# print(df_prototype_dft.shape)
# print(df_dft.shape)

In [8]:
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

# Add 'Formula' Column to df

In [9]:
def method(row_i):
    stoich_to_form_dict = {
        "AB2": "IrO2",
        "AB3": "IrO3"}

    stoich = row_i.stoich
    formula = stoich_to_form_dict.get(stoich)
    return(formula)

df_i = df_dft
df_i["formula"] = df_i.apply(
    method,
    axis=1)

# MPContribs

In [10]:
import yaml
path_i = os.path.join(os.environ["PROJ_irox"], "config", "config.yml")
with open(path_i) as file:
    config_dict = yaml.load(file, Loader=yaml.FullLoader)

api_key = config_dict["mpcontrib"]["api_key"]

In [11]:
project = 'active_learned_irox_polymorphs'

client = load_client(api_key)

# print(dir(client))
# print(dir(client.projects))

# Deleting all data to start over

In [12]:
total_count = 1234
while total_count != 0:
    deleted = client.contributions.delete_entries(project=project).result()

    total_count = deleted["total_count"]
    num_deleted = deleted["count"]

    print(
        num_deleted, 'contribution(s) deleted',
        "|",
        total_count, "contribution(s) remaining")

# Delete entire project to start from scratch
# DON'T DO THIS (PATRICK HAS TO MANUALLY APPROVE PROJECT EVERYTIME)
if False:
    results = client.projects.delete_entry(pk=project).result()

# client.projects.delete_entry?

1 contribution(s) deleted | 1 contribution(s) remaining
0 contribution(s) deleted | 0 contribution(s) remaining


# Create Project (Once)

In [13]:
is_public = True
info = {"project": project}

# CREATE PROJECT FROM SCRATCH (DO THIS SELDOMLY)
# DON'T DO THIS (PATRICK HAS TO MANUALLY APPROVE PROJECT EVERYTIME)
if False:
# if True:
    client.projects.create_entry(project=info).result()

In [14]:
if False:
    all_data = client.projects.get_entry(pk=project, _fields=['_all']).result()

# all_data

# Update Project Properties

In [15]:
# if False:
if True:
    results = client.projects.update_entry(pk=project,
        project={
            # "is_public": False,
            # "project": project,

            "is_public": is_public,
            "title": "Active Learned IrOx Polymorphs",
            "owner": "raulf2012@gmail.com",
            # "authors": "R. Flores, W. Kirsten",
            "authors": "Raul A. Flores, Christopher Paolucci, Kirsten T. Winther, Ankit Jain, Jose Antonio Garrido Torres, Muratahan Aykol, Joseph Montoya, Jens K. Nørskov",

            "description": " ".join("""
                Materials science is primarily concerned with the underlying relationship between a material's structure and functionality,
                where the knowledge of viable polymorphic forms of crystals plays an indispensable role.
                Machine-learning based surrogate models have the potential to accelerate this process of creating the knowledge-base for materials polymorphs for target applications in under-explored chemistries.
                Herein, we report on a readily generalizable active-learning (AL) accelerated algorithm for the targeted identification of novel and stable IrOx (x=2 or 3) polymorphs and subsequent thermochemical analyses of the activity of these discovered structures towards the oxygen evolution reaction (OER).
                We demonstrate that compared to a random search,
                the AL framework more than doubles the efficiency of using DFT to find stable polymorphs out of a large array of prototypical structures.
                We find nearly 195 IrO2 polymorphs within the thermodynamic synthesizability limit and reaffirm the rutile ground state.
                For IrO3, we find 74 unique synthesizable polymorphs and report a previously unknown FeF3-like ground state.
                The algorithm is exceptionally adept at quickly picking out the most stable polymorphs, with the most stable α-IrO3 phase discovered on average in only 4.3 generations.
                An analysis of the structural properties of these metastable polymorphs reveals that octahedral local coordination environments are preferred for all low energy structures.
                """.replace("\n", "").split()),
            # "urls": None,
            # "urls": dict(),
            "urls": {
                "PaperGit":    "https://github.com/raulf2012/PAPER_IrOx_Active_Learning_OER",
                "ProjGit":     "https://github.com/raulf2012/PROJ_IrOx_Active_Learning_OER",
                "PaperURL":    "https://github.com/raulf2012/PROJ_IrOx_Active_Learning_OER",
                # "": "",
                },

            "other": {
                "InternalID": "Unique ID used internally, including for posterity in case anybody wants to dig through the project's Git repo",
                "ΔH|formation": "Heat of formation",
                # "Formula": "",
                "EnergyDFT": "Raw DFT VASP energy",
                # "NumberOfAtoms": "",
                # "Volume|UnitCell": "",
                "Volume": "Total computational cell volume",
                "StructurePrototype|PreDFT": "Structural prototype of initial pre-DFT optimized structure candidate",
                "StructurePrototype|PostDFT": "Structural prototype of post-DFT relaxed structure",
                "SpaceGroupNumber|PreDFT": "Space group of pre-DFT structure candidate",
                "SpaceGroupNumber|PostDFT": "Space group of post-DFT structure candidate",

                },

            }
        )
    results.result()

# Adding new data rows

In [16]:
contributions = dict()
for ind_i, row_i in df_dft.iterrows():
    stoich = row_i.stoich
    dH = row_i.dH
    formula = row_i.formula
    energy_pa = row_i.energy_pa
    num_atoms = row_i.num_atoms
    volume = row_i.volume
    volume_pa = row_i.volume_pa

    # #####################################################
    if ind_i in df_prototype_dft.index:
        row_proto_dft = df_prototype_dft.loc[ind_i]

        prototype_name_dft = row_proto_dft.p_name
        spacegroup_dft = int(row_proto_dft.spacegroup)
    else:
        print("Woops", ind_i)
        
        prototype_name_dft = ""
        spacegroup_dft = None

    # #########################################################
    if ind_i in df_prototype_static.index:
        row_proto_static_i = df_prototype_static.loc[ind_i]

        prototype_name_static = row_proto_static_i.p_name
        spacegroup_static = int(row_proto_static_i.spacegroup)
    else:
        print("Woops", ind_i)

        prototype_name_static = ""
        spacegroup_static = None

    # prototype_name_static = prototype_name_static

    # #####################################################
    # #####################################################
    dH = str(dH) + " eV/atom"
    dft_energy_per_atom = str(energy_pa) + " eV/atom"
    number_of_atoms = num_atoms
    volume = str(volume) + " angstrom**3"
    volume_pa = str(volume_pa) + " angstrom**3/atom"

    # #####################################################
    contributions[ind_i] = dict(
        contrib=dict(
            identifier=ind_i, project=project, is_public=is_public,
            # identifier="NA", project=project, is_public=is_public,
            data={
                "InternalID": row_i.name,

                "ΔH|formation": dH,
                "Formula": formula,
                "EnergyDFT": dft_energy_per_atom,
                "NumberOfAtoms": number_of_atoms,
                "Volume|UnitCell": volume,
                "Volume": volume_pa,

                "StructurePrototype|PreDFT": prototype_name_static,
                "StructurePrototype|PostDFT": prototype_name_dft,
                "SpaceGroupNumber|PreDFT": spacegroup_static,
                "SpaceGroupNumber|PostDFT": spacegroup_dft,
                },
            )
        )

In [17]:
if True:
    contribs = []
    for key, val in contributions.items():
        contribs.append(val["contrib"])

    chunk_size = 20
    df_mp_list = []
    for contribs_chunk_i in chunks(contribs, chunk_size):

        created = client.contributions.create_entries(
            contributions=contribs_chunk_i).result()

        df_mp_i = pd.DataFrame(created["data"]).set_index("identifier")
        df_mp_list.append(df_mp_i)


df_mp = pd.concat(df_mp_list)

# Add Structures

In [18]:
for id_chunk_i in chunks(df_mp.index.tolist(), chunk_size):
    df_mp_i = df_mp.loc[id_chunk_i]

    structure_contribs = []
    for id_i, row_i in df_mp_i.iterrows():
        print(id_i)

        cid = row_i.id

        # #####################################################
        #  DFT Data ###########################################
        row_dft_i= df_dft.loc[id_i]

        formula = row_dft_i.formula
        atoms_final = row_dft_i.atoms

        # #####################################################
        # Static IrOx #########################################
        row_static_i = static_irox_structures.loc[id_i]

        atoms_init = row_static_i.atoms


        # #####################################################
        # #####################################################
        structure_final = AseAtomsAdaptor.get_structure(atoms_final)
        structure_init = AseAtomsAdaptor.get_structure(atoms_init)

        # #####################################################
        sdct = dict(contribution=cid,
            name=id_i + "_final",
            label="Final_DFT_Optimized",
            )
        sdct.update(structure_final.as_dict())
        structure_contribs.append(sdct)
        # print(id_i + "_final")

        sdct = dict(contribution=cid,
            name=id_i + "_init",
            # label="Initialstructuralprototype",
            label="Initial_Prototype",
            )
        sdct.update(structure_init.as_dict())
        structure_contribs.append(sdct)
        # print(id_i + "_init")

    sid = client.structures.create_entries(structures=structure_contribs).result()

64cg6j9any
n36axdbw65
clc2b1mavs
ck638t75z3
mkbj6e6e9p
b49kx4c19q
85z4msnl6o
bpc2nk6qz1
926dnunrxf
mwmg9p7s6o
ze8ymybjct
nu94n3v4n4
xemg9wb27y
m2bs8w82x5
v1xpx482ba
nu7hbg6rnt


In [19]:
assert False

AssertionError: 

# MISC
---

# Extracting data with `get_entries` methods

In [None]:
# identifier = "mp-1234"

# client.contributions.get_entries(
#     project=project,
#     identifier=identifier,
#     # _fields=["formula"],
#     ).result()
#     # ).result()['data']

# # client.projects.get_entries?

In [None]:
# identifier = 'mp-1002-temp'
# cid = client.contributions.create_entry(contribution={
#     'project': project, 'identifier': identifier,
#     'data': {'E': '3.33 eV', 'E|V': {'a': 1, 'b': '3 cm'}}
# }).result()['id']

In [None]:
# is_public = True

# # dir(client.contributions)
# # client.contributions.create_entries?

# contributions = {
#     "mp-001": {
#         "contrib": {
#             "identifier": "mp-001",
#             "project": project, "is_public": is_public,
#             "data": {"temp0": 0, "temp1": 1},
#             }
#         },

#     "mp-002": {
#         "contrib": {
#             "identifier": "mp-002",
#             "project": project, "is_public": is_public,
#             "data": {"temp0": 8, "temp1": 9},
#             }
#         },

# #     "mp-002": ,
# #     "mp-003": ,
#     }

In [None]:
# row_i = df_dft.iloc[0]
# atoms = row_i.atoms
# structure = AseAtomsAdaptor.get_structure(atoms)

In [None]:
# # structure = Structure.from_file('Fe3O4.cif')
# # for i in created:

# sdct = dict(contribution=cid_tmp, name='Fe3O4',
#     label="temp_label",
#     # name=
#     )
# sdct.update(structure.as_dict())


# ## sid = client.structures.create_entries(structure=[sdct]).result()['id']
# # sid = client.structures.create_entries(structures=[sdct])

In [None]:
# identifier = 'mp-1002'
# cid = client.contributions.create_entry(contribution={
#     'project': project, 'identifier': identifier,
#     'data': {'E': '3.33 eV', 'E|V': {'a': 1, 'b': '3 cm'}}
# }).result()['id']

In [None]:
# get_ml_dataframes(
#     names=[
#         # 'bulk_dft_data_path',
#         # 'unique_ids_path',
#         # 'prototypes_data_path',
#         # 'static_irox_structures_path',
#         # 'static_irox_structures_kirsten_path',
#         # 'oqmd_irox_data_path',
#         # 'df_features_pre_opt_path',
#         # 'df_features_pre_opt_kirsten_path',
#         # 'df_features_post_opt_path',
#         # 'oer_bulk_structures_path',
#         # 'df_ccf_path',
#         # 'df_dij_path',
#         # 'ids_to_discard__too_many_atoms_path',
#         'df_prototype_dft_path',
#         'df_prototype_static_path',
#         ]
#     )

In [None]:
# row_proto_static_i

# df_dft

# df_prototype_static.shape

# if ind_i in df_prototype_static.index:



In [None]:
# row_i.name

# ind_i

In [None]:
# # df_prototype_dft.loc["8p8evt9pcg"]

# df_prototype_dft.head()
# df_prototype_dft.index.unique()

# df_dft.index

In [None]:
# # df_prototype_dft.shape
# # df_prototype_static.shape

# for ind_i, row_i in df_dft.iterrows():
#     tmp = 42
    
# #     if ind_i not in df_prototype_dft.index:
# #         tmp = 42
        
#     if ind_i not in df_prototype_static.index:
#         print(ind_i)
#         tmp = 42

In [None]:
# assert False