In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from config.config_modeling import CAT_COLS, TRAIN_SIZE, TEST_FROM_VAL, RANDOM_STATE
from src.modeling.create_data_split import split_data
from src.utils.models_pkl import load_pickle
from scipy import stats
from src.fairness.FPDP import (
    fairness_pdp_cat,
    fairness_pdp_num,
    fairness_pdp_ohe,
    fpdp_dataset,
    remove_sensitive_variable,
)
from src.explanation.global_.utils import get_underscore, ohe_filter
import warnings

In [None]:
warnings.filterwarnings("ignore")

# Initializing all the features 

In [None]:
IN_PATH = Path("../data") / "data.csv"
MODEL_PATH = Path("../models") / "XGB.pkl"

In [None]:
data = pd.read_csv(IN_PATH)
model = load_pickle(MODEL_PATH)

In [None]:
ohe_data = split_data(
    cols=CAT_COLS,
    df=data,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
)

X_train = ohe_data["train"][0]
Y_train = ohe_data["train"][1]
X_test = ohe_data["test"][0]

In [None]:
cols = X_train.columns.values.tolist()
underscore = get_underscore(cols)
underscore_cols = underscore[0]
non_underscore_cols = underscore[1]
ohe_results = ohe_filter(non_underscore_cols, underscore_cols)
non_ohe = [element[0] for element in ohe_results[0]]
ohe = ohe_results[1]
numerical = ["Year"]
non_ohe.remove("Year")

# Gender

In [None]:
sensitive_topic = "Gender"
sensitive_values = fpdp_dataset(
    data, sensitive_topic, cat_cols=CAT_COLS, train_size=TRAIN_SIZE, test_size=TEST_FROM_VAL
)
ohe = remove_sensitive_variable(ohe, sensitive_topic)

### One Hot Econded variables

In [None]:
for element in ohe:
    fairness_pdp = fairness_pdp_ohe(
        model,
        feature_names=element,
        protected_group="F",
        X=X_test,
        sensitive_values=sensitive_values,
        fig_size=(10, 5),
        p_value_threshold=0.05,
    )

### Categorical, not one hot encoded variables

In [None]:
for element in non_ohe:
    fairness_pdp = fairness_pdp_cat(
        model,
        feature=element,
        protected_group="F",
        X=X_test,
        sensitive_values=sensitive_values,
        fig_size=(10, 5),
        p_value_threshold=0.05,
        x_label=element,
    )

### Numerical variables

In [None]:
Year_FPDP = fairness_pdp_num(
    model=model,
    feature=numerical[0],
    step=1.0,
    protected_group="F",
    X=X_test,
    sensitive_values=sensitive_values,
    fig_size=(10, 5),
    p_value_threshold=0.05,
)

# Race

In [None]:
# resetting ohe
ohe = ohe_results[1]

In [None]:
sensitive_topic = "Race"
sensitive_values = fpdp_dataset(
    data, sensitive_topic, cat_cols=CAT_COLS, train_size=TRAIN_SIZE, test_size=TEST_FROM_VAL
)
ohe = remove_sensitive_variable(ohe, sensitive_topic)

### One Hot Encoded variables

In [None]:
for element in ohe:
    fairness_pdp = fairness_pdp_ohe(
        model,
        feature_names=element,
        protected_group="BLACK",
        X=X_test,
        sensitive_values=sensitive_values,
        fig_size=(10, 5),
        p_value_threshold=0.05,
    )

### Categorical, not one hot encoded variables

In [None]:
for element in non_ohe:
    fairness_pdp = fairness_pdp_cat(
        model,
        feature=element,
        protected_group="BLACK",
        X=X_test,
        sensitive_values=sensitive_values,
        fig_size=(10, 5),
        p_value_threshold=0.05,
        x_label=element,
    )

### Numerical variables

In [None]:
Year_FPDP = fairness_pdp_num(
    model=model,
    feature=numerical[0],
    step=1.0,
    protected_group="BLACK",
    X=X_test,
    sensitive_values=sensitive_values,
    fig_size=(10, 5),
    p_value_threshold=0.05,
)