# Combining features and adsorption energies into one dataframe
---



### Import Modules

In [1]:
import os
print(os.getcwd())
import sys
import time; ti = time.time()

import pickle
import copy

import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

pd.set_option("display.max_columns", None)

import plotly.graph_objs as go

# #########################################################
from methods import (
    get_df_dft,
    get_df_job_ids,
    get_df_slab,
    get_df_jobs,
    get_df_jobs_data,
    get_df_ads,
    get_df_features,
    get_df_octa_vol_init,
    )

/mnt/f/Dropbox/01_norskov/00_git_repos/PROJ_IrOx_OER/workflow/feature_engineering


In [2]:
from methods import isnotebook    
isnotebook_i = isnotebook()
if isnotebook_i:
    from tqdm.notebook import tqdm
    verbose = True
else:
    from tqdm import tqdm
    verbose = False

### Script Inputs

In [3]:
target_cols = ["g_o", "g_oh", "e_o", "e_oh", ]

### Read Data

In [4]:
df_ads = get_df_ads()
df_ads = df_ads.set_index(["compenv", "slab_id", "active_site", ], drop=False)

df_features = get_df_features()
df_features.index = df_features.index.droplevel(level=5)

df_slab = get_df_slab()

df_jobs = get_df_jobs()

df_jobs_data = get_df_jobs_data()
df_jobs_data["rerun_from_oh"] = df_jobs_data["rerun_from_oh"].fillna(value=False)

df_dft = get_df_dft()

df_job_ids = get_df_job_ids()
df_job_ids = df_job_ids.set_index("job_id")
df_job_ids = df_job_ids[~df_job_ids.index.duplicated(keep='first')]

df_octa_vol_init = get_df_octa_vol_init()

In [5]:
feature_cols = df_features["features"].columns.tolist()

### Collecting other relevent data columns from various data objects

In [6]:
# #########################################################
data_dict_list = []
# #########################################################
for index_i, row_i in df_ads.iterrows():
    # #####################################################
    data_dict_i = dict()
    # #####################################################
    index_dict_i = dict(zip(
        list(df_ads.index.names), index_i, ))
    # #####################################################
    slab_id_i = row_i.slab_id
    job_id_o = row_i.job_id_o
    # #####################################################

    # #####################################################
    row_ids_i = df_job_ids.loc[job_id_o]
    # #####################################################
    bulk_id_i = row_ids_i.bulk_id
    # #####################################################

    # #####################################################
    row_dft_i = df_dft.loc[bulk_id_i]
    # #####################################################
    stoich_i = row_dft_i.stoich
    # #####################################################

    # #####################################################
    row_slab_i = df_slab.loc[slab_id_i]
    # #####################################################
    phase_i = row_slab_i.phase
    # #####################################################

    # #####################################################
    data_dict_i["phase"] = phase_i
    data_dict_i["stoich"] = stoich_i
    # #####################################################
    data_dict_i.update(index_dict_i)
    # #####################################################
    data_dict_list.append(data_dict_i)
    # #####################################################

# #########################################################
df_extra_data = pd.DataFrame(data_dict_list)
df_extra_data = df_extra_data.set_index(
    ["compenv", "slab_id", "active_site", ], drop=True)

new_columns = []
for col_i in df_extra_data.columns:
    new_columns.append(
        ("data", col_i, "")
        )

idx = pd.MultiIndex.from_tuples(new_columns)
df_extra_data.columns = idx
# #########################################################

### Collating features data by looping over `df_ads`

In [7]:
dos_bader_feature_cols = [
    "Ir*O_bader",
    "Ir_bader",
    "O_bader",
    "p_band_center",
    ]

In [8]:
# #########################################################
o_rows_list = []
o_index_list = []
# #########################################################
oh_rows_list = []
oh_index_list = []
# #########################################################
failed_indices_oh = []
for index_i, row_i in df_ads.iterrows():

    # #####################################################
    index_dict_i = dict(zip(list(df_ads.index.names), index_i))
    # #####################################################
    job_id_o_i = row_i.job_id_o
    job_id_oh_i = row_i.job_id_oh
    job_id_bare_i = row_i.job_id_bare
    # #####################################################

    

    # #####################################################
    ads_i = "o"

    idx = pd.IndexSlice
    df_feat_i = df_features.loc[idx[
        index_dict_i["compenv"],
        index_dict_i["slab_id"],
        ads_i,
        index_dict_i["active_site"],
        :], :]


    row_feat_i = df_feat_i[df_feat_i.data.job_id_max == job_id_o_i]
    mess_i = "There should only be one row after the previous filtering"
    assert row_feat_i.shape[0] == 1, mess_i
    row_feat_i = row_feat_i.iloc[0]


    tmp = list(row_feat_i["features"][dos_bader_feature_cols].to_dict().values())
    num_nan = len([i for i in tmp if np.isnan(i)])
    if num_nan > 0:

        tmp_dict = dict()
        df_tmp = df_feat_i["features"][dos_bader_feature_cols]
        for i_cnt, (name_i, row_i) in enumerate(df_tmp.iterrows()):
            # print(name_i)
            row_values = list(row_i.to_dict().values())
            num_nan = len([i for i in row_values if np.isnan(i)])
            tmp_dict[i_cnt] = num_nan

        max_key = None
        for key, val in tmp_dict.items():
            if val == np.min(list(tmp_dict.values())):
                max_key = key

        # print("Replaced row_feat_i with the row that has the dos/bader info")
        row_feat_i = df_feat_i.iloc[max_key]






    # #####################################################
    o_rows_list.append(row_feat_i)
    o_index_list.append(row_feat_i.name)



    # #####################################################
    ads_i = "oh"

    idx = pd.IndexSlice
    df_feat_i = df_features.loc[idx[
        index_dict_i["compenv"],
        index_dict_i["slab_id"],
        ads_i,
        index_dict_i["active_site"],
        :], :]

    if df_feat_i.shape[0] > 0:
        row_feat_i = df_feat_i[df_feat_i.data.job_id_max == job_id_oh_i]

        if row_feat_i.shape[0] > 0:
            mess_i = "There should only be one row after the previous filtering"
            assert row_feat_i.shape[0] == 1, mess_i
            row_feat_i = row_feat_i.iloc[0]


            # #############################################
            oh_rows_list.append(row_feat_i)
            oh_index_list.append(row_feat_i.name)
        else:
            # failed_indices_oh.append(index_i)
            failed_indices_oh.append(job_id_oh_i)

            




# #########################################################
idx = pd.MultiIndex.from_tuples(o_index_list, names=df_features.index.names)
df_o = pd.DataFrame(o_rows_list, idx)
df_o.index = df_o.index.droplevel(level=[2, 4, ])
# #########################################################
idx = pd.MultiIndex.from_tuples(oh_index_list, names=df_features.index.names)
df_oh = pd.DataFrame(oh_rows_list, idx)
df_oh.index = df_oh.index.droplevel(level=[2, 4, ])
# #########################################################

### Checking failed_indices_oh against systems that couldn't be processed

In [9]:
from methods import get_df_atoms_sorted_ind

df_atoms_sorted_ind = get_df_atoms_sorted_ind()

df_atoms_sorted_ind_i = df_atoms_sorted_ind[
    df_atoms_sorted_ind.job_id.isin(failed_indices_oh)
    ]

df_tmp_8 = df_atoms_sorted_ind_i[df_atoms_sorted_ind_i.failed_to_sort == False]

if df_tmp_8.shape[0] > 0:
    print("Check out df_tmp_8, there where some *OH rows that weren't processed but maybe should be")

### Processing and combining feature data columns

In [10]:
from local_methods import combine_dfs_with_same_cols

df_dict_i = {
    "oh": df_oh[["data"]],
    "o": df_o[["data"]],
    }

df_data_comb = combine_dfs_with_same_cols(
    df_dict=df_dict_i,
    verbose=False,
    )


# Adding another empty level to column index
new_cols = []
for col_i in df_data_comb.columns:
    # new_col_i = ("", col_i[0], col_i[1])
    new_col_i = (col_i[0], col_i[1], "", )
    new_cols.append(new_col_i)

idx = pd.MultiIndex.from_tuples(new_cols)
df_data_comb.columns = idx

### Creating `df_features_comb` and adding another column level for ads

In [11]:
# #########################################################
ads_i = "o"

df_features_o = df_o[["features"]]
columns_i = df_features_o.columns

new_columns_i = []
for col_i in columns_i:
    new_col_i = (col_i[0], ads_i, col_i[1])
    new_columns_i.append(new_col_i)

idx = pd.MultiIndex.from_tuples(new_columns_i)
df_features_o.columns = idx

# #########################################################
ads_i = "oh"

df_features_oh = df_oh[["features"]]
columns_i = df_features_oh.columns

new_columns_i = []
for col_i in columns_i:
    new_col_i = (col_i[0], ads_i, col_i[1])
    new_columns_i.append(new_col_i)

idx = pd.MultiIndex.from_tuples(new_columns_i)
df_features_oh.columns = idx

# #########################################################
df_features_comb = pd.concat([
    df_features_o,
    df_features_oh,
    ], axis=1)

### Rounding `effective_ox_state` to deal with groupby floating point issues

In [12]:
eff_ox_state_list = []
for name_i, row_i in df_features_comb.iterrows():
    eff_ox_state_o_i = row_i[("features", "o", "effective_ox_state", )]
    eff_ox_state_oh_i = row_i[("features", "oh", "effective_ox_state", )]

    eff_ox_state_i = eff_ox_state_oh_i
    if eff_ox_state_oh_i != eff_ox_state_oh_i:

        if np.isnan(eff_ox_state_oh_i):
            if not np.isnan(eff_ox_state_o_i):
                eff_ox_state_i = eff_ox_state_o_i


        elif np.isnan(eff_ox_state_o_i):
            if not np.isnan(eff_ox_state_oh_i):
                eff_ox_state_i = eff_ox_state_oh_i


    eff_ox_state_list.append(
        np.round(eff_ox_state_i, 6),
        )


df_features_comb[("features", "effective_ox_state", "")] = eff_ox_state_list

In [13]:
df_features_comb = df_features_comb.drop(columns=[
        ("features", "o", "effective_ox_state", ),
        ("features", "oh", "effective_ox_state", ),
        ]
    )

In [14]:
non_ads_features = [
    # "effective_ox_state",
    "dH_bulk",
    "volume_pa",
    "bulk_oxid_state",
    ]

In [15]:
cols_to_drop = []
new_cols = []

for col_i in df_features_comb.columns:

    if col_i[0] == "features":

        if col_i[2] in non_ads_features:
            print(col_i)
            if col_i[1] == "oh":
                cols_to_drop.append(col_i)
                new_cols.append(col_i)
            elif col_i[1] == "o":
                col_new_i = (col_i[0], col_i[2], "", )
                new_cols.append(col_new_i)
        else:
            new_cols.append(col_i)

    else:
        new_cols.append(col_i)
                
# non_ads_features

('features', 'o', 'dH_bulk')
('features', 'o', 'volume_pa')
('features', 'o', 'bulk_oxid_state')
('features', 'oh', 'dH_bulk')
('features', 'oh', 'volume_pa')
('features', 'oh', 'bulk_oxid_state')


In [16]:
idx = pd.MultiIndex.from_tuples(new_cols)

df_features_comb.columns = idx

df_features_comb = df_features_comb.drop(columns=cols_to_drop)

In [17]:
oh_features = []
o_features = []
other_features = []
for col_i in df_features_comb.columns:
    if col_i[1] == "oh":
        oh_features.append(col_i)
    elif col_i[1] == "o":
        o_features.append(col_i)
    else:
        other_features.append(col_i)

df_features_comb = df_features_comb[
    oh_features + o_features + other_features
    ]

In [18]:
# Adding more levels to df_ads to combine

new_cols = []
for col_i in df_ads.columns:
    # new_col_i = ("", "", col_i)
    new_col_i = (col_i, "", "", )
    new_cols.append(new_col_i)

idx = pd.MultiIndex.from_tuples(new_cols)
df_ads.columns = idx

### Combining all dataframes

In [19]:
df_features_targets = pd.concat([
    df_features_comb,
    df_data_comb,
    df_ads,
    df_extra_data,
    ], axis=1)

### Removing the p-band center feature for *OH (there are none)

In [20]:
df_features_targets = df_features_targets.drop(columns=[
    ('features', 'oh', 'p_band_center'),
    ])

df_features_targets = df_features_targets.drop(columns=[
    ('features', 'oh', 'Ir_bader'),
    ])

df_features_targets = df_features_targets.drop(columns=[
    ('features', 'oh', 'O_bader'),
    ])

df_features_targets = df_features_targets.drop(columns=[
    ('features', 'oh', 'Ir*O_bader'),
    ])

### Create `name_str` column

In [21]:
def method(row_i):
    # #########################################################
    name_i = row_i.name
    # #########################################################
    compenv_i = name_i[0]
    slab_id_i = name_i[1]
    active_site_i = name_i[2]
    # #########################################################
    
    name_i = compenv_i + "__" + slab_id_i + "__" + str(int(active_site_i)).zfill(3)

    return(name_i)

df_features_targets["data", "name_str", ""] = df_features_targets.apply(
    method,
    axis=1)

In [22]:
df_ads_columns = [i[0] for i in df_ads.columns.tolist()]

for i in target_cols:
    df_ads_columns.remove(i)

In [23]:
data_columns_all = [i[0] for i in df_features_targets["data"].columns]

df_ads_columns_to_add = []
df_ads_columns_to_drop = []
for col_i in df_ads_columns:
    if col_i not in data_columns_all:
        df_ads_columns_to_add.append(col_i)
    else:
        df_ads_columns_to_drop.append(col_i)


# #########################################################
for col_i in df_ads_columns_to_drop:
    df_features_targets.drop(columns=(col_i, "", ""), inplace=True)

# #########################################################
new_columns = []
for col_i in df_features_targets.columns:
    if col_i[0] in df_ads_columns_to_add:
        new_columns.append(
            ("data", col_i[0], "", )
            )
    elif col_i[0] in target_cols:
        new_columns.append(
            ("targets", col_i[0], "", )
            )
    else:
        new_columns.append(col_i)

idx = pd.MultiIndex.from_tuples(new_columns)
df_features_targets.columns = idx

### Adding surface area as a coverage-type descriptor

In [24]:
df_slab

Unnamed: 0_level_0,slab_id,bulk_id,facet,slab_thick,num_atoms,slab_final,loop_time,iter_time_i,facet_rank,slab_final_old,phase,source,surf_area,cell_mag_x,cell_mag_y,is_repeated,repeat_list,unique_slab
slab_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
solaleda_75,solaleda_75,b19q9p6k72,101,16.904966,47,"(Atom('O', [4.75225265889749, 4.39208082233217...",0.015629,64.860491,,"(Atom('O', [4.810698012632608, 4.4071623244747...",1,,34.570442,6.26928,6.34295,False,"(1, 1, 1)",True
kalisule_45,kalisule_45,b5cgvsb16w,111,15.860712,74,"(Atom('O', [6.711635630399488, 4.4893403350572...",0.003215,83.882123,,"(Atom('O', [6.642076465290764, 4.4669830159832...",1,,62.003800,6.64208,9.90815,False,"(1, 1, 1)",True
dilesupa_08,dilesupa_08,z5ms8a629a,331,15.565875,136,"(Atom('O', [12.359090625361683, 2.900563738601...",0.004074,163.634185,,"(Atom('O', [12.43314892823446, 2.9126665226056...",1,,123.166349,11.70320,11.70320,False,"(1, 1, 1)",True
fosurufu_23,fosurufu_23,8fxi6rmp75,012,15.490507,46,"(Atom('O', [1.3549083766114465, 0.791460566806...",0.003653,29.055411,,"(Atom('O', [1.7814138333909553, 0.838007836083...",1,,42.972470,5.47458,7.84947,False,"(1, 1, 1)",True
wefedifi_91,wefedifi_91,9573vicg7f,110,15.894913,100,"(Atom('O', [2.07475035525, 6.961648771000001, ...",0.005465,100.591014,,"(Atom('O', [1.8429251517, 6.847386362, 14.9998...",1,,73.923701,7.25247,10.19290,False,"(1, 1, 1)",True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
naronusu_67,naronusu_67,b5cgvsb16w,100,12.094815,44,"(Atom('O', [1.4210449328999994, 1.303056967200...",0.002864,66.837721,,"(Atom('O', [0.9976645171999999, 1.3028824818, ...",2,manual,40.974930,4.69666,8.72427,False,"(1, 1, 1)",True
nofabigo_84,nofabigo_84,8l919k6s7p,111,12.720018,67,"(Atom('O', [2.6248994889927393, 5.358722907485...",0.003700,140.388980,,"(Atom('O', [2.6191472834794673, 5.337107715832...",2,manual,67.002837,7.08305,10.05760,False,"(1, 1, 1)",True
kodefivo_37,kodefivo_37,8l919k6s7p,001,14.280394,51,"(Atom('O', [7.748192707052521, 1.3877448242371...",0.004000,183.312692,,"(Atom('O', [7.6286762848727125, 1.350392486573...",2,manual,43.948252,7.08305,7.08305,False,"(1, 1, 1)",True
wovaseli_71,wovaseli_71,v1xpx482ba,20-23,12.586243,283,"(Atom('O', [4.593202717659148, 2.9077162821537...",0.021844,1369.497710,4.0,"(Atom('O', [4.63295681033422, 2.79671585048171...",2,xrd,310.386147,16.30220,19.07510,False,"(1, 1, 1)",True


In [25]:
from methods import get_df_magmoms, read_magmom_comp_data

df_magmoms = get_df_magmoms()


data_dict_list = []
for name_i, row_i in df_features_targets.iterrows():
    # #####################################################
    data_dict_i = dict()
    # #####################################################
    index_dict_i = dict(zip(df_features_targets.index.names, name_i))
    # #####################################################


    slab_id_i = name_i[1]

    # #########################################################
    row_slab_i = df_slab.loc[slab_id_i]
    # #########################################################
    surf_area_i = row_slab_i.surf_area
    # #########################################################



    # #################################################
    data_dict_i.update(index_dict_i)
    # #################################################
    data_dict_i["surf_area"] = surf_area_i
    # #################################################
    data_dict_list.append(data_dict_i)
    # #################################################



# #########################################################
df_tmp = pd.DataFrame(data_dict_list)
df_tmp = df_tmp.set_index(["compenv", "slab_id", "active_site", ])

# #########################################################
new_cols = []
for col_i in df_tmp.columns:
    new_col_i = ("features", col_i, "")
    new_cols.append(new_col_i)
idx = pd.MultiIndex.from_tuples(new_cols)
df_tmp.columns = idx

df_features_targets = pd.concat([df_features_targets, df_tmp], axis=1)

### Adding magmom comparison data

In [26]:
def process_df_magmoms_comp_i(df_magmoms_comp_i):
    """
    """
    def method(row_i):
        new_column_values_dict = dict(
            job_id_0=None,
            job_id_1=None,
            job_id_2=None,
            )

        job_ids_tri = row_i.job_ids_tri

        ids_sorted = list(np.sort(list(job_ids_tri)))

        job_id_0 = ids_sorted[0]
        job_id_1 = ids_sorted[1]
        job_id_2 = ids_sorted[2]

        new_column_values_dict["job_id_0"] = job_id_0
        new_column_values_dict["job_id_1"] = job_id_1
        new_column_values_dict["job_id_2"] = job_id_2

        for key, value in new_column_values_dict.items():
            row_i[key] = value
        return(row_i)

    df_magmoms_comp_i = df_magmoms_comp_i.apply(method, axis=1)
    df_magmoms_comp_i = df_magmoms_comp_i.set_index(["job_id_0", "job_id_1", "job_id_2", ])

    return(df_magmoms_comp_i)

In [27]:
from methods import get_df_magmoms, read_magmom_comp_data

df_magmoms = get_df_magmoms()


data_dict_list = []
for name_i, row_i in df_features_targets.iterrows():
    # #####################################################
    data_dict_i = dict()
    # #####################################################
    index_dict_i = dict(zip(df_features_targets.index.names, name_i))
    # #####################################################

    magmom_data_i = read_magmom_comp_data(name=name_i)
    if magmom_data_i is not None:
        df_magmoms_comp_i = magmom_data_i["df_magmoms_comp"]
        df_magmoms_comp_i = process_df_magmoms_comp_i(df_magmoms_comp_i)

        # tmp = df_magmoms_comp_i.sum_norm_abs_magmom_diff.min()
        # tmp_list.append(tmp)

        job_ids = []
        for ads_j in ["o", "oh", "bare", ]:
            job_id_j = row_i["data"]["job_id_" + ads_j][""]
            if job_id_j is not None:
                job_ids.append(job_id_j)


        sum_norm_abs_magmom_diff_i = None
        if len(job_ids) == 3:
            job_ids = list(np.sort(job_ids))
            job_id_0 = job_ids[0]
            job_id_1 = job_ids[1]
            job_id_2 = job_ids[2]

            row_mags_i = df_magmoms_comp_i.loc[
                (job_id_0, job_id_1, job_id_2, )
                ]
            sum_norm_abs_magmom_diff_i = row_mags_i.sum_norm_abs_magmom_diff
            norm_sum_norm_abs_magmom_diff_i = sum_norm_abs_magmom_diff_i / 3
            
        # #################################################
        data_dict_i.update(index_dict_i)
        # #################################################
        data_dict_i["sum_norm_abs_magmom_diff"] = sum_norm_abs_magmom_diff_i
        data_dict_i["norm_sum_norm_abs_magmom_diff"] = norm_sum_norm_abs_magmom_diff_i
        # #################################################
        data_dict_list.append(data_dict_i)
        # #################################################

# #########################################################
df_tmp = pd.DataFrame(data_dict_list)
df_tmp = df_tmp.set_index(["compenv", "slab_id", "active_site", ])

# #########################################################
new_cols = []
for col_i in df_tmp.columns:
    new_col_i = ("data", col_i, "")
    new_cols.append(new_col_i)
idx = pd.MultiIndex.from_tuples(new_cols)
df_tmp.columns = idx

df_features_targets = pd.concat([df_features_targets, df_tmp], axis=1)

### Add OER overpotential data

In [28]:
# #########################################################
import pickle; import os
directory = os.path.join(
    os.environ["PROJ_irox_oer"],
    "workflow/oer_analysis",
    "out_data")
path_i = os.path.join(
    directory,
    "df_overpot.pickle")
with open(path_i, "rb") as fle:
    df_overpot = pickle.load(fle)
# #########################################################

In [29]:
df_overpot = df_overpot.drop(columns="name")

new_cols = []
for col_i in df_overpot.columns:
    new_col_i = ("data", col_i, "", )
    new_cols.append(new_col_i)
df_overpot.columns = pd.MultiIndex.from_tuples(new_cols)

In [30]:
df_features_targets = pd.concat([
    df_features_targets,
    df_overpot,
    ], axis=1)

### Adding surface energy data

In [31]:
from methods import get_df_SE
df_SE = get_df_SE()


new_cols = []
for col_i in df_SE.columns:
    new_col_i = ("data", col_i, "", )
    new_cols.append(new_col_i)
df_SE.columns = pd.MultiIndex.from_tuples(new_cols)


cols_to_remove = []
for col_i in df_SE.columns.tolist():
    if col_i in df_features_targets.columns.tolist():
        cols_to_remove.append(col_i)


df_features_targets = pd.concat([
    df_features_targets,
    # df_SE,
    df_SE.drop(columns=cols_to_remove),
    ], axis=1)

### Adding plot format properties

In [32]:
from proj_data import stoich_color_dict

# #########################################################
data_dict_list = []
# #########################################################
# for index_i, row_i in df_features_targets.iterrows():
for index_i, row_i in df_features_targets.iterrows():
    # #####################################################
    data_dict_i = dict()
    # #####################################################
    index_dict_i = dict(zip(list(df_features_targets.index.names), index_i))
    # #####################################################
    row_data_i = row_i["data"]
    # #####################################################
    stoich_i = row_data_i["stoich"][""]
    norm_sum_norm_abs_magmom_diff_i = \
        row_data_i["norm_sum_norm_abs_magmom_diff"][""]
    # #####################################################

    if stoich_i == "AB2":
        color__stoich_i = stoich_color_dict["AB2"]
    elif stoich_i == "AB3":
        color__stoich_i = stoich_color_dict["AB3"]
    else:
        color__stoich_i = stoich_color_dict["None"]


    # #####################################################
    data_dict_i[("format", "color", "stoich")] = color__stoich_i
    data_dict_i[("format", "color", "norm_sum_norm_abs_magmom_diff")] = \
        norm_sum_norm_abs_magmom_diff_i
    # #####################################################
    data_dict_i.update(index_dict_i)
    # #####################################################
    data_dict_list.append(data_dict_i)
    # #####################################################


# #########################################################
df_format = pd.DataFrame(data_dict_list)
df_format = df_format.set_index(["compenv", "slab_id", "active_site", ])
df_format.columns = pd.MultiIndex.from_tuples(df_format.columns)
# #########################################################

In [33]:
df_features_targets = pd.concat(
    [
        df_features_targets,
        df_format,
        ],
    axis=1,
    )

### Mixing Bader charges with bond lengths

In [34]:
# df_features_targets["features"][""]
# df_features_targets[("features", "o", "Ir*O_bader", )]

df_features_targets[("features", "o", "Ir*O_bader/ir_o_mean", )] = \
    df_features_targets[("features", "o", "Ir*O_bader", )] / df_features_targets[("features", "o", "ir_o_mean", )]

### Calculating ΔG_OmOH target column

In [35]:
# Computing ΔG_O-OH
g_o = df_features_targets[("targets", "g_o", "")]
g_oh = df_features_targets[("targets", "g_oh", "")]

df_features_targets[("targets", "g_o_m_oh", "")] = g_o - g_oh

# Computing ΔE_O-OH
e_o = df_features_targets[("targets", "e_o", "")]
e_oh = df_features_targets[("targets", "e_oh", "")]

df_features_targets[("targets", "e_o_m_oh", "")] = e_o - e_oh

### Adding in pre-DFT features

In [36]:
# #########################################################
data_dict_list = []
# #########################################################
for name_i, row_i in df_features_targets.iterrows():
    # #####################################################
    compenv_i = name_i[0]
    slab_id_i = name_i[1]
    active_site_i = name_i[2]
    # #####################################################

    job_id_o_i = row_i[("data", "job_id_o", "")]

    name_octa_i = (compenv_i, slab_id_i,
        "o", active_site_i, 1, )
    row_octa_i = df_octa_vol_init.loc[
        name_octa_i
        ]

    row_octa_dict_i = row_octa_i["features"].to_dict()
    
    # #####################################################
    data_dict_i = {}
    # #####################################################
    data_dict_i["compenv"] = compenv_i
    data_dict_i["slab_id"] = slab_id_i
    data_dict_i["active_site"] = active_site_i
    # #####################################################
    data_dict_i.update(row_octa_dict_i)
    # #####################################################
    data_dict_list.append(data_dict_i)
    # #####################################################







df_feat_pre = pd.DataFrame(data_dict_list)
df_feat_pre = df_feat_pre.set_index(["compenv", "slab_id", "active_site", ])

new_cols = []
for col_i in df_feat_pre.columns:
    new_col_i = ("features_pre_dft", col_i + "__pre", "")
    new_cols.append(new_col_i)

idx = pd.MultiIndex.from_tuples(new_cols)
df_feat_pre.columns = idx




df_features_targets = pd.concat([
    df_features_targets,
    df_feat_pre,
    ], axis=1)

### Reindexing multiindex to get order columns

In [45]:
df_features_targets = df_features_targets.reindex(columns=[
    'targets',
    'data',
    'format',
    'features',
    'features_pre_dft',
    'features_stan',
    ], level=0)

### Removing rows that aren't supposed to be processed (bad slabs)

In [46]:
from methods import get_df_slabs_to_run
df_slabs_to_run = get_df_slabs_to_run()
df_slabs_to_not_run = df_slabs_to_run[df_slabs_to_run.status == "bad"]

slab_ids_to_not_include = df_slabs_to_not_run.slab_id.tolist()

df_index = df_features_targets.index.to_frame()
df_features_targets = df_features_targets.loc[
    ~df_index.slab_id.isin(slab_ids_to_not_include)
    ]

### OLD DEPRECATED | Getting rid of NERSC jobs

In [47]:
# print("Getting rid of NERSC jobs and phase 1 systems")

# Getting rid of NERSC jobs

# indices_to_keep = []
# for i in df_features_targets.index:
#     if i[0] != "nersc":
#         indices_to_keep.append(i)

# df_features_targets = df_features_targets.loc[
#     indices_to_keep
#     ]

df_features_targets = df_features_targets[df_features_targets["data"]["phase"] > 1]

### Printing how many `NaN` rows there are for each feature

In [48]:
for col_i in df_features_targets.features.columns:
    if verbose:
        df_tmp_i = df_features_targets[df_features_targets["features"][col_i].isna()]
        print(col_i, ":", df_tmp_i.shape[0])

('oh', 'O_magmom') : 1
('oh', 'Ir_magmom') : 1
('oh', 'active_o_metal_dist') : 8
('oh', 'angle_O_Ir_surf_norm') : 2
('oh', 'closest_Ir_dist') : 1
('oh', 'closest_O_dist') : 1
('oh', 'ir_o_mean') : 8
('oh', 'ir_o_std') : 8
('oh', 'octa_vol') : 8
('oh', 'oxy_opp_as_bl') : 1
('oh', 'degrees_off_of_straight__as_opp') : 1
('oh', 'as_ir_opp_bl_ratio') : 8
('o', 'O_magmom') : 0
('o', 'Ir_magmom') : 0
('o', 'Ir*O_bader') : 39
('o', 'Ir_bader') : 39
('o', 'O_bader') : 39
('o', 'active_o_metal_dist') : 4
('o', 'angle_O_Ir_surf_norm') : 0
('o', 'closest_Ir_dist') : 4
('o', 'closest_O_dist') : 4
('o', 'ir_o_mean') : 4
('o', 'ir_o_std') : 4
('o', 'octa_vol') : 4
('o', 'p_band_center') : 39
('o', 'oxy_opp_as_bl') : 0
('o', 'degrees_off_of_straight__as_opp') : 0
('o', 'as_ir_opp_bl_ratio') : 4
('dH_bulk', '') : 0
('volume_pa', '') : 0
('bulk_oxid_state', '') : 0
('effective_ox_state', '') : 4
('surf_area', '') : 0
('o', 'Ir*O_bader/ir_o_mean') : 43


In [49]:
# assert False

### Write data to pickle

In [50]:
# df_features_targets = df_features_targets
# Pickling data ###########################################
directory = os.path.join(
    os.environ["PROJ_irox_oer"],
    "workflow/feature_engineering",
    "out_data")
file_name_i = "df_features_targets.pickle"
path_i = os.path.join(directory, file_name_i)
if not os.path.exists(directory): os.makedirs(directory)
with open(path_i, "wb") as fle:
    pickle.dump(df_features_targets, fle)
# #########################################################

In [51]:
from methods import get_df_features_targets

df_features_targets_tmp = get_df_features_targets()
df_features_targets_tmp.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,targets,targets,targets,targets,targets,targets,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,data,format,format,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features,features_pre_dft,features_pre_dft,features_pre_dft,features_pre_dft
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,g_o,g_oh,e_o,e_oh,g_o_m_oh,e_o_m_oh,active_site,compenv,slab_id,active_site_orig__o,active_site_orig__oh,ads__o,ads__oh,att_num__o,att_num__oh,found_active_Ir__o,found_active_Ir__oh,from_oh__o,from_oh__oh,job_id_max__o,job_id_max__oh,num_missing_Os__o,num_missing_Os__oh,orig_slab_good__o,orig_slab_good__oh,used_unrelaxed_df_coord__o,used_unrelaxed_df_coord__oh,job_id_o,job_id_oh,job_id_bare,all_done,any_bare_done,any_oh_done,any_o_done,any_o_w_as_done,low_e_not_from_oh__o,low_e_not_from_oh__bare,phase,stoich,name_str,sum_norm_abs_magmom_diff,norm_sum_norm_abs_magmom_diff,overpot,lim_step,lim_step_str,lim_step_num,SE__area_J_m2,num_nonstoich_O,N_stoich_units,color,color,oh,oh,oh,oh,oh,oh,oh,oh,oh,oh,oh,oh,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,dH_bulk,volume_pa,bulk_oxid_state,effective_ox_state,surf_area,o,active_o_metal_dist__pre,ir_o_mean__pre,ir_o_std__pre,octa_vol__pre
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,stoich,norm_sum_norm_abs_magmom_diff,O_magmom,Ir_magmom,active_o_metal_dist,angle_O_Ir_surf_norm,closest_Ir_dist,closest_O_dist,ir_o_mean,ir_o_std,octa_vol,oxy_opp_as_bl,degrees_off_of_straight__as_opp,as_ir_opp_bl_ratio,O_magmom,Ir_magmom,Ir*O_bader,Ir_bader,O_bader,active_o_metal_dist,angle_O_Ir_surf_norm,closest_Ir_dist,closest_O_dist,ir_o_mean,ir_o_std,octa_vol,p_band_center,oxy_opp_as_bl,degrees_off_of_straight__as_opp,as_ir_opp_bl_ratio,Unnamed: 82_level_2,Unnamed: 83_level_2,Unnamed: 84_level_2,Unnamed: 85_level_2,Unnamed: 86_level_2,Ir*O_bader/ir_o_mean,Unnamed: 88_level_2,Unnamed: 89_level_2,Unnamed: 90_level_2,Unnamed: 91_level_2
compenv,slab_id,active_site,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3,Unnamed: 38_level_3,Unnamed: 39_level_3,Unnamed: 40_level_3,Unnamed: 41_level_3,Unnamed: 42_level_3,Unnamed: 43_level_3,Unnamed: 44_level_3,Unnamed: 45_level_3,Unnamed: 46_level_3,Unnamed: 47_level_3,Unnamed: 48_level_3,Unnamed: 49_level_3,Unnamed: 50_level_3,Unnamed: 51_level_3,Unnamed: 52_level_3,Unnamed: 53_level_3,Unnamed: 54_level_3,Unnamed: 55_level_3,Unnamed: 56_level_3,Unnamed: 57_level_3,Unnamed: 58_level_3,Unnamed: 59_level_3,Unnamed: 60_level_3,Unnamed: 61_level_3,Unnamed: 62_level_3,Unnamed: 63_level_3,Unnamed: 64_level_3,Unnamed: 65_level_3,Unnamed: 66_level_3,Unnamed: 67_level_3,Unnamed: 68_level_3,Unnamed: 69_level_3,Unnamed: 70_level_3,Unnamed: 71_level_3,Unnamed: 72_level_3,Unnamed: 73_level_3,Unnamed: 74_level_3,Unnamed: 75_level_3,Unnamed: 76_level_3,Unnamed: 77_level_3,Unnamed: 78_level_3,Unnamed: 79_level_3,Unnamed: 80_level_3,Unnamed: 81_level_3,Unnamed: 82_level_3,Unnamed: 83_level_3,Unnamed: 84_level_3,Unnamed: 85_level_3,Unnamed: 86_level_3,Unnamed: 87_level_3,Unnamed: 88_level_3,Unnamed: 89_level_3,Unnamed: 90_level_3,Unnamed: 91_level_3
nersc,buvivore_13,38.0,1.516495,0.145745,1.472495,-0.148755,1.37075,1.62125,38.0,nersc,buvivore_13,38,38.0,o,oh,1,0.0,True,True,True,True,fidivuwi_89,nipidida_98,0.0,0.0,True,True,False,False,fidivuwi_89,nipidida_98,halavamu_98,True,True,True,True,True,False,False,2,AB2,nersc__buvivore_13__038,0.23826,0.07942,0.720936,"[ooh, bulk]",ooh__bulk,4.0,0.586438,8.0,24.0,#46cf44,0.07942,0.124,0.405,1.924408,1.782806,3.633028,3.224902,2.002212,0.051808,10.295925,2.02749,9.275869,0.949158,0.696,0.73,,,,1.782397,1.4565,3.686693,3.267705,1.999222,0.104375,10.174465,,2.105314,11.713935,0.846618,-0.56845,12.531646,4,5.666667,76.649271,,2.00216,2.015599,0.019019,10.482594
nersc,dakoputu_58,74.0,2.743977,1.35568,2.699977,1.06118,1.388297,1.638797,74.0,nersc,dakoputu_58,74,74.0,o,oh,1,3.0,True,True,True,True,habamehi_10,buvawasa_30,2.0,0.0,True,False,True,False,habamehi_10,buvawasa_30,dibomehi_65,True,True,True,True,True,False,False,2,AB2,nersc__dakoputu_58__074,0.115908,0.038636,0.398612,"[o, ooh]",o__ooh,3.0,0.997535,11.0,23.0,#46cf44,0.038636,0.01,0.294,1.902083,85.900628,3.611606,2.60448,1.940913,0.150763,9.397154,2.135911,13.825899,0.890525,0.014,0.032,-1.994416,2.873393,-0.694098,1.750519,92.325157,3.591594,2.687644,1.948565,0.219401,9.31443,-3.041147,2.19741,20.148772,0.796628,-0.735138,11.952412,4,8.333333,82.764943,-1.023531,2.049825,2.018042,0.027765,10.462885
nersc,dakoputu_58,75.0,2.792433,1.294538,2.748433,1.000038,1.497896,1.748396,75.0,nersc,dakoputu_58,75,75.0,o,oh,1,1.0,True,True,True,True,hatabufi_70,kutabadu_95,0.0,0.0,True,True,False,False,hatabufi_70,kutabadu_95,sanegusa_24,True,True,True,True,True,False,False,2,AB2,nersc__dakoputu_58__075,0.172235,0.057412,0.27923,"[o, ooh]",o__ooh,3.0,0.997535,11.0,23.0,#46cf44,0.057412,0.006,0.428,1.899401,83.096801,3.436343,2.705932,1.979701,0.122135,9.928546,2.034289,19.243276,0.933693,0.185,0.523,-1.699462,2.506493,-0.678024,1.761406,84.838339,3.435932,2.862594,1.972141,0.163273,9.692582,-2.77186,2.078597,25.459917,0.847401,-0.735138,11.952412,4,7.333333,82.764943,-0.861735,1.968475,2.018114,0.027837,10.464302
nersc,dakoputu_58,76.0,2.578245,1.452632,2.534245,1.158132,1.125613,1.376113,76.0,nersc,dakoputu_58,76,76.0,o,oh,1,1.0,True,True,True,True,ditogotu_52,vokuvige_58,0.0,0.0,True,False,False,False,ditogotu_52,vokuvige_58,metuwifa_55,True,True,True,True,True,False,False,2,AB2,nersc__dakoputu_58__076,0.083184,0.027728,0.676808,"[o, ooh]",o__ooh,3.0,0.997146,11.0,23.0,#46cf44,0.027728,0.012,0.503,1.927104,60.545047,3.487364,2.987158,1.954253,0.166545,9.647097,1.928698,22.135452,0.999174,0.005,0.233,-2.048709,2.856678,-0.717165,1.744237,63.032976,3.217964,2.898253,1.947287,0.211677,9.327337,-2.94343,1.97013,30.241444,0.885341,-0.735138,11.952412,4,8.333333,82.764943,-1.052084,2.009364,2.018042,0.027765,10.462885
nersc,dakoputu_58,77.0,2.471656,0.888724,2.427656,0.594224,1.582932,1.833432,77.0,nersc,dakoputu_58,77,77.0,o,oh,1,1.0,True,True,True,True,timeviri_63,madopana_89,1.0,0.0,True,False,True,False,timeviri_63,madopana_89,dunopafu_19,True,True,True,True,True,False,False,2,AB2,nersc__dakoputu_58__077,0.126044,0.042015,0.352932,"[oh, o]",oh__o,2.0,0.998416,11.0,23.0,#46cf44,0.042015,0.014,0.082,1.936845,43.906615,3.622015,2.663475,1.942756,0.151443,9.367321,2.049189,6.602723,0.945176,0.004,0.013,-1.818367,2.878498,-0.631707,1.761731,43.199083,3.661047,2.702575,1.944606,0.214784,9.279538,-2.778007,2.284935,5.990649,0.77102,-0.735138,11.952412,4,8.333333,82.764943,-0.935083,2.020909,2.018042,0.027765,10.462885


In [52]:
df_features_targets.features

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,oh,oh,oh,oh,oh,oh,oh,oh,oh,oh,oh,oh,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,dH_bulk,volume_pa,bulk_oxid_state,effective_ox_state,surf_area,o
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,O_magmom,Ir_magmom,active_o_metal_dist,angle_O_Ir_surf_norm,closest_Ir_dist,closest_O_dist,ir_o_mean,ir_o_std,octa_vol,oxy_opp_as_bl,degrees_off_of_straight__as_opp,as_ir_opp_bl_ratio,O_magmom,Ir_magmom,Ir*O_bader,Ir_bader,O_bader,active_o_metal_dist,angle_O_Ir_surf_norm,closest_Ir_dist,closest_O_dist,ir_o_mean,ir_o_std,octa_vol,p_band_center,oxy_opp_as_bl,degrees_off_of_straight__as_opp,as_ir_opp_bl_ratio,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Ir*O_bader/ir_o_mean
compenv,slab_id,active_site,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2
nersc,buvivore_13,38.0,0.124,0.405,1.924408,1.782806,3.633028,3.224902,2.002212,0.051808,10.295925,2.027490,9.275869,0.949158,0.696,0.730,,,,1.782397,1.456500,3.686693,3.267705,1.999222,0.104375,10.174465,,2.105314,11.713935,0.846618,-0.568450,12.531646,4,5.666667,76.649271,
nersc,dakoputu_58,74.0,0.010,0.294,1.902083,85.900628,3.611606,2.604480,1.940913,0.150763,9.397154,2.135911,13.825899,0.890525,0.014,0.032,-1.994416,2.873393,-0.694098,1.750519,92.325157,3.591594,2.687644,1.948565,0.219401,9.314430,-3.041147,2.197410,20.148772,0.796628,-0.735138,11.952412,4,8.333333,82.764943,-1.023531
nersc,dakoputu_58,75.0,0.006,0.428,1.899401,83.096801,3.436343,2.705932,1.979701,0.122135,9.928546,2.034289,19.243276,0.933693,0.185,0.523,-1.699462,2.506493,-0.678024,1.761406,84.838339,3.435932,2.862594,1.972141,0.163273,9.692582,-2.771860,2.078597,25.459917,0.847401,-0.735138,11.952412,4,7.333333,82.764943,-0.861735
nersc,dakoputu_58,76.0,0.012,0.503,1.927104,60.545047,3.487364,2.987158,1.954253,0.166545,9.647097,1.928698,22.135452,0.999174,0.005,0.233,-2.048709,2.856678,-0.717165,1.744237,63.032976,3.217964,2.898253,1.947287,0.211677,9.327337,-2.943430,1.970130,30.241444,0.885341,-0.735138,11.952412,4,8.333333,82.764943,-1.052084
nersc,dakoputu_58,77.0,0.014,0.082,1.936845,43.906615,3.622015,2.663475,1.942756,0.151443,9.367321,2.049189,6.602723,0.945176,0.004,0.013,-1.818367,2.878498,-0.631707,1.761731,43.199083,3.661047,2.702575,1.944606,0.214784,9.279538,-2.778007,2.284935,5.990649,0.771020,-0.735138,11.952412,4,8.333333,82.764943,-0.935083
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
slac,wonataro_02,64.0,0.003,0.049,1.928650,66.183779,3.679039,3.076929,1.926832,0.101255,9.105478,1.917148,18.214122,1.006000,0.220,0.068,-1.746720,2.748144,-0.635600,1.764480,66.748371,3.747862,3.038895,1.911879,0.114769,8.817489,-2.413283,1.955354,23.005478,0.902384,-0.599425,15.036089,6,8.000000,82.216630,-0.913615
slac,wowukoli_59,27.0,0.019,0.066,1.935593,56.411728,3.641954,2.890194,1.978550,0.162758,10.188347,2.099253,1.741675,0.922039,0.025,0.061,-1.761933,2.708886,-0.650427,1.753292,65.694611,3.568594,2.624510,1.992317,0.238801,10.175932,-3.058257,2.222942,11.185747,0.788726,-0.691297,10.810322,4,8.000000,22.461235,-0.884363
slac,wufulafe_03,56.0,0.078,0.095,1.916197,16.396962,3.603952,2.778521,1.985642,0.040405,10.171034,2.001183,2.913593,0.957532,0.461,0.380,-1.431377,2.328897,-0.614616,1.766557,24.970682,3.438168,2.709465,1.971451,0.102071,9.923953,-2.824867,2.063066,11.513787,0.856278,-0.700424,11.389867,4,5.666667,57.210419,-0.726053
slac,wufulafe_03,57.0,0.019,0.041,1.938808,31.792455,3.563218,2.759678,1.947774,0.058912,9.723260,2.015630,3.596863,0.961887,0.483,0.406,-1.487388,2.446655,-0.607927,1.780165,31.750716,3.499725,2.726953,1.944713,0.092489,9.545081,-2.630303,2.075884,6.387289,0.857545,-0.700424,11.389867,4,6.333333,57.210419,-0.764836


In [53]:
# #########################################################
print(20 * "# # ")
print("All done!")
print("Run time:", np.round((time.time() - ti) / 60, 3), "min")
print("combine_features_targets.ipynb")
print(20 * "# # ")
# #########################################################

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
All done!
Run time: 1.763 min
combine_features_targets.ipynb
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
