# Sandbox to troubleshoot DOS calcs
---

### Import Modules

In [None]:
import os
print(os.getcwd())
import sys

import numpy as np
import pandas as pd

import plotly.graph_objs as go

from methods import (
    get_df_dft,
    get_df_features_targets,
    get_df_slab_ids,
    read_pdos_data,
    )

### Read Data

In [None]:
df_dft = get_df_dft()

df_features_targets = get_df_features_targets()

df_slab_ids = get_df_slab_ids()
df_slab_ids_i = df_slab_ids.set_index("slab_id")

### Searching for jobs that are made from R-IrO2

In [None]:
bulk_id_to_find = "64cg6j9any"

df_ind = df_features_targets.index.to_frame()
for name_i, row_i in df_ind.iterrows():
    slab_id_i = row_i.slab_id

    # #####################################################
    row_ids = df_slab_ids_i.loc[slab_id_i]
    # #####################################################
    bulk_id_i = row_ids.bulk_id
    facet_i = row_ids.facet
    # #####################################################

    if bulk_id_i == bulk_id_to_find:
        print(slab_id_i, facet_i, name_i, sep=" | ")

In [None]:
# This one is R-IrO2 110
# ('sherlock', 'tetuwido_70', 25.0)


row_feat_tar = df_features_targets.loc[
    ('sherlock', 'tetuwido_70', 25.0)
    ]

row_feat_tar["features"]["o"]["p_band_center"]

In [None]:
# Colin 2p band center -2.44 eV

# My 2p band center -3.07 eV

In [None]:
row_feat_tar["data"]["job_id_o"]

In [None]:
row_feat_tar["targets"]

In [None]:
sys.path.insert(0, os.path.join(
    os.environ["PROJ_irox"],
    "data"))

from proj_data_irox import (
    h2_ref,
    h2o_ref,
    )

In [None]:
# from proj_data_irox import 
-9.88557216 / 2

In [None]:
h2_ref

In [None]:
h2o_ref

In [None]:
-1.00 + 0.044
-2.66 + 0.2945

## Plotting mine and Colin DOS files for R-IrO2

In [None]:
df_pdos_i, df_band_centers_i = read_pdos_data("gasedilo_55")

# df_pdos_i.columns.tolist()

x = df_pdos_i.index.tolist()
y = df_pdos_i["O17"]["p_tot_sum"]

trace = go.Scatter(
    x=x,
    y=y,
    )
data = [trace]

fig = go.Figure(data=data)
fig.show()

### Colin's DOS Data

In [None]:
colin_file_path = os.path.join(
    os.environ["dropbox"],
    "06_downloads/04_firefox_download/1-s2.0-S003960281830760X-mmc1/IrO2_systems",
    "IrO2-1c-O.csv",
    )

df_dos_colin = pd.read_csv(colin_file_path)

In [None]:
# Plotting

x = df_dos_colin["Energy relative to fermi level (eV)"]
y = df_dos_colin["O 2p-PDOS"]

trace = go.Scatter(
    x=x,
    y=y,
    )
data = [trace]

fig = go.Figure(data=data)
fig.show()

In [None]:
df_dos_colin = df_dos_colin.rename(columns={
    "Energy relative to fermi level (eV)": "energy",
    })

df_dos_colin = df_dos_colin.set_index("energy")

df_dos_colin = df_dos_colin.drop(columns=["Total DOS", ])

# df_dos_colin = df_dos_colin.dropna()

In [None]:
df = df_dos_colin

df = df[
    (df.index > -10) & \
    # (df.index > -5.87) & \
    (df.index < 2.)
    ]

pho_i = df["O 2p-PDOS"]
eps = np.array(df.index.tolist())

band_center_up = np.trapz(pho_i * eps, x=eps) / np.trapz(pho_i, x=eps)

print(
    "ϵ_2p:",
    np.round(band_center_up, 4),
    " eV",
    sep="")

### Reading all files

In [None]:
df_colin_si = pd.read_csv("colin_si_data.csv")
df_colin_si = df_colin_si.replace(np.nan, "NaN", regex=True)

df_colin_si

In [None]:
def process_colin_pdos(file_path):
    """
    """


    df_dos_colin = pd.read_csv(file_path)


    df_dos_colin = df_dos_colin.rename(columns={
        "Energy relative to fermi level (eV)": "energy",
        })

    df_dos_colin = df_dos_colin.set_index("energy")

    df_dos_colin = df_dos_colin.drop(columns=["Total DOS", ])

    # df_dos_colin = df_dos_colin.dropna()





    df = df_dos_colin

    df = df[
        # (df.index > -10) & \

        (df.index > -5.87) & \

        (df.index < 2.)
        ]

    pho_i = df["O 2p-PDOS"]
    eps = np.array(df.index.tolist())

    band_center_up = np.trapz(pho_i * eps, x=eps) / np.trapz(pho_i, x=eps)

    # print(
    #     "ϵ_2p:",
    #     np.round(band_center_up, 4),
    #     " eV",
    #     sep="")
    
    return(band_center_up)

In [None]:
# #########################################################
data_dict_list = []
# #########################################################
root_dir = os.path.join(
    os.environ["dropbox"],
    "06_downloads/04_firefox_download/1-s2.0-S003960281830760X-mmc1/SI_Data")
# #########################################################
for subdir, dirs, files in os.walk(root_dir):
    for file in files:
        file_path_i = os.path.join(subdir, file)
        # print(file_path_i)

        file_ext_i = file.split(".")[-1]


        num_dots = 0
        for str_i in file:
            if str_i == ".":
                num_dots += 1


        if file_ext_i == "csv" and num_dots == 1 and "kink" not in file:

            file_wo_ext_i = file.split(".")[0]
            elem_i = file_wo_ext_i.split("-")[0]
            coord_i = file_wo_ext_i.split("-")[1]
            coord_i_2 = coord_i[0:-1]

            if coord_i_2 == "None":
                # coord_i_3 = np.nan
                coord_i_3 = "NaN"
            else:
                coord_i_3 = float(coord_i_2)

            ads_i = file_wo_ext_i.split("-")[2]


            p_band_i = process_colin_pdos(file_path_i)
            # print(p_band_i)
            # print("")


            df = df_colin_si
            df = df[
                (df["formula"] == elem_i) &
                (df["coord"] == coord_i_3) &

                # (df[""] == "") &
                [True for i in range(len(df))]
                ]
            # print(df.shape[0])

            p_band_colin_i = None
            if df.shape[0] == 1:
                row_si_i = df.iloc[0]
                p_band_colin_i = row_si_i["eps_2p"]








            # #############################################
            data_dict_i = dict()
            # #############################################
            data_dict_i["p_band"] = p_band_i
            data_dict_i["p_band_colin"] = p_band_colin_i
            data_dict_i["file_path"] = file_path_i
            data_dict_i["file_name"] = file
            data_dict_i["file_name_wo_ext"] = file_wo_ext_i
            data_dict_i["elem"] = elem_i
            data_dict_i["coord"] = coord_i_3
            data_dict_i["ads"] = ads_i

            # data_dict_i[""] = 
            # #############################################
            data_dict_list.append(data_dict_i)
            # #############################################

# #########################################################
df_colin_pdos = pd.DataFrame(data_dict_list)
df_colin_pdos = df_colin_pdos.replace(np.nan, "NaN", regex=True)
# #########################################################

In [None]:
p_band = df_colin_pdos.p_band
p_band_colin = df_colin_pdos.p_band_colin


# df_pdos_i, df_band_centers_i = read_pdos_data("gasedilo_55")
# df_pdos_i.columns.tolist()
# x = df_pdos_i.index.tolist()
# y = df_pdos_i["O17"]["p_tot_sum"]

trace = go.Scatter(
    x=p_band_colin,
    y=p_band,
    mode="markers",
    )
data = [trace]

fig = go.Figure(data=data)
fig.show()

In [None]:
# bulk_id_i

# df_dft

In [None]:
# eps.shape

In [None]:
# pho_i

In [None]:
# # band_center_up = 

# # np.trapz(pho_i * eps, x=eps)
# np.trapz(pho_i, x=eps)

In [None]:
# len(df_dos_colin.index.tolist())

# len(df_dos_colin.index.tolist())

# len(df_dos_colin["O 2p-PDOS"].tolist())

In [None]:
# file_wo_ext_i

In [None]:
# df_colin_pdos

In [None]:
# p_band_i

In [None]:
# df

In [None]:
# # df_colin_pdos.coord.tolist()
# df_colin_pdos

# df = df_colin_pdos
# df = df[
#     (df["elem"] == elem_i) &
# #     (df["coord"] == coord_i_3) &

#     # (df[""] == "") &
#     [True for i in range(len(df))]
#     ]
# df

In [None]:
# elem_i

In [None]:
# df_colin_si.head()

In [None]:
# coord_i_3

In [None]:
# coord_i_3

In [None]:
# df_colin_pdos