# INSTALLATION

In [1]:
%matplotlib inline

In [2]:
import sys
from pathlib import Path

# Add src directory to Python path
src_path = "../src"
sys.path.insert(0, str(src_path))

# DATASET AND MODEL

In [None]:
import pandas as pd
import numpy as np

TEST_SIZE = 0.2
RANDOM_STATE = 42
N_ESTIMATORS = 30

# Download dataset
url = "https://raw.githubusercontent.com/christophM/interpretable-ml-book/master/data/bike.csv"
df = pd.read_csv(url)

# Preprocess data
columns = ["temp", "hum", "windspeed", "mnth", "yr", "cnt"]
df = df[columns]

# Preprocess mnth column
mnth_map = { "JAN": 1, "FEB": 2, "MAR": 3, "APR": 4, "MAY": 5, "JUN": 6, "JUL": 7, "AUG": 8, "SEP": 9, "OCT": 10, "NOV": 11, "DEC": 12 }
df["mnth"] = df["mnth"].map(mnth_map)

# Show dataset
df.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

df_x = df.drop(columns=["cnt"])
df_y = df["cnt"]

# Divide in train and test set
X_train, X_test, y_train, y_test = train_test_split(
    df_x,
    df_y,
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE
)

# Train model
model = RandomForestRegressor(random_state=RANDOM_STATE, n_estimators=30)
model.fit(X_train, y_train)

# Evaluate model
r2_train = model.score(X_train, y_train)
r2_test = model.score(X_test, y_test)

print(f"R2 on train set: {r2_train:.3f}")
print(f"R2 on test set: {r2_test:.3f}")

# Explanations

In [None]:
from faex.core.DataCore import DataCore
from faex.core.ExplanationCore import ExplanationCore

FEATURE = "temp"
BINS = 200

# Generate DataCore
datacore = DataCore(
        df_X=df_x,
        model=model,
        study_features=[FEATURE],
        bins=BINS
)

# Generate explainer
explainer = ExplanationCore(datacore=datacore)

# VISUALIZATION

In [None]:
USE_MATPLOTLIB = True

# Explaining PDP
visualization = explainer.visualize_doubleplot(
    explanations=[
        "real-prediction",
        "histogram",
        "distribution",
        "ice",
        "pdp",
        "pdp-d",
    ],
    matplotlib=USE_MATPLOTLIB,
)

visualization.show()

In [None]:
# Explaining PDP
visualization = explainer.visualize_doubleplot(
    explanations=[
        "real-prediction",
        "histogram",
        "distribution",
        "mice",
        "mpdp",
    ],
    matplotlib=USE_MATPLOTLIB,
)

visualization.show()

In [None]:
# Explaining l-PDP
visualization = explainer.visualize_doubleplot(
    explanations=[
        "real-prediction",
        "histogram",
        "distribution",
        "KernelNormalizer",
        "l-pdp",
        "l-pdp-d",
    ],
    matplotlib=USE_MATPLOTLIB,
)

visualization.show()

In [None]:
# Joint Explanations with arguments
visualization = explainer.visualize_doubleplot(
    explanations=[
        "real-prediction",
        "histogram",
        "distribution",
        "KernelNormalizer",
        "l-pdp",
        "m-pdp",
        "pdp",
    ],
    plot_arguments={
        "real-prediction" : {"alpha": 0.2},
        "l-pdp": {'alpha': 0.8},
        "m-pdp": {'alpha': 0.8},
        "pdp": {'alpha': 0.8},
    },
    matplotlib=USE_MATPLOTLIB,
)

visualization.show()