# INSTALLATION

In [1]:
%matplotlib inline

In [2]:
import sys
from pathlib import Path

# Add src directory to Python path
src_path = "../src"
sys.path.insert(0, str(src_path))

In [3]:
import logging

# Silence traces from other modules except my library
logging.basicConfig(level=logging.WARNING)

# Activate debugging
logging.getLogger("faex").setLevel(logging.DEBUG)

# DATASET AND MODEL

In [4]:
import pandas as pd
import numpy as np

TEST_SIZE = 0.2
RANDOM_STATE = 42
N_ESTIMATORS = 30

# Download dataset
url = "https://raw.githubusercontent.com/christophM/interpretable-ml-book/master/data/bike.csv"
df = pd.read_csv(url)

# Preprocess data
columns = ["temp", "hum", "windspeed", "mnth", "yr", "cnt"]
df = df[columns]

# Preprocess mnth column
mnth_map = { "JAN": 1, "FEB": 2, "MAR": 3, "APR": 4, "MAY": 5, "JUN": 6, "JUL": 7, "AUG": 8, "SEP": 9, "OCT": 10, "NOV": 11, "DEC": 12 }
df["mnth"] = df["mnth"].map(mnth_map)

# Show dataset
df.head()

Unnamed: 0,temp,hum,windspeed,mnth,yr,cnt
0,1.229108,43.7273,16.636703,1,2011,1349
1,1.4,59.0435,10.739832,1,2011,1562
2,2.666979,43.6957,12.5223,1,2011,1600
3,1.604356,51.8261,6.000868,1,2011,1606
4,1.236534,49.8696,11.304642,1,2011,1510


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

df_x = df.drop(columns=["cnt"])
df_y = df["cnt"]

# Divide in train and test set
X_train, X_test, y_train, y_test = train_test_split(
    df_x,
    df_y,
    test_size=TEST_SIZE,
    random_state=RANDOM_STATE
)

# Train model
model = RandomForestRegressor(random_state=RANDOM_STATE, n_estimators=30)
model.fit(X_train, y_train)

# Evaluate model
r2_train = model.score(X_train, y_train)
r2_test = model.score(X_test, y_test)

print(f"R2 on train set: {r2_train:.3f}")
print(f"R2 on test set: {r2_test:.3f}")

R2 on train set: 0.978
R2 on test set: 0.829


# Explanations

In [6]:
from faex.core.DataCore import DataCore
from faex.core.ExplanationCore import ExplanationCore

FEATURE = "hum"
BINS = 30

# Generate DataCore
datacore = DataCore(
        df_X=df_x,
        model=model,
        study_features=[FEATURE],
        bins=BINS
)

# Generate explainer
explainer = ExplanationCore(datacore=datacore)

DEBUG:faex.utils.SingletonFactory:Singleton created for class ExplainerFactory
DEBUG:faex.explaining.ExplainerFactory:Registering explainer 'ice' with aliases: ['ice']
DEBUG:faex.explaining.ExplainerFactory:Registering explainer 'icescatter' with aliases: ['ice-scatter']
DEBUG:faex.explaining.ExplainerFactory:Registering explainer 'lice' with aliases: ['l-ice', 'local-ice']
DEBUG:faex.explaining.ExplainerFactory:Registering explainer 'lpdp' with aliases: ['l-pdp', 'local-pdp']
DEBUG:faex.explaining.ExplainerFactory:Registering explainer 'lpdpdistribution' with aliases: ['l-pdp-d']
DEBUG:faex.explaining.ExplainerFactory:Registering explainer 'lpdperror' with aliases: ['l-pdp-e']
DEBUG:faex.explaining.ExplainerFactory:Registering explainer 'mice' with aliases: ['m-ice', 'marginal-ice']
DEBUG:faex.explaining.ExplainerFactory:Registering explainer 'mpdp' with aliases: ['m-pdp', 'marginal-pdp']
DEBUG:faex.explaining.ExplainerFactory:Registering explainer 'pdp' with aliases: ['pdp', 'Partial

# VISUALIZATION

In [7]:
USE_MATPLOTLIB = False

# Explaining PDP
visualization = explainer.visualize_doubleplot(
    explanations=[
        # "real-prediction",
        "histogram",
        "distribution",
        # "ice",
        "pdp",
        "pdp-d",
    ],
    matplotlib=USE_MATPLOTLIB,
)

visualization.show()

DEBUG:faex.explaining.ExplainerFactory:Creating explainer for technique: ice
DEBUG:faex.explaining.explainers.ICE:ICE explanation generation
DEBUG:faex.explaining.explainers.ICE:ICE grid shape: (30,)
DEBUG:faex.explaining.explainers.ICE:ICE grid to predict dataframe size: (21840, 5)
DEBUG:faex.explaining.ExplainerFactory:Creating explainer for technique: histogram
DEBUG:faex.explaining.ExplainerFactory:Creating explainer for technique: distribution
DEBUG:faex.explaining.ExplainerFactory:Creating explainer for technique: pdp
DEBUG:faex.explaining.explainers.PDP:PDP explanation generation
DEBUG:faex.explaining.explainers.PDP:PDP grid shape: (30,)
DEBUG:faex.explaining.ExplainerFactory:Creating explainer for technique: pdpdistribution
DEBUG:faex.explaining.explainers.PDP:PDP distribution visualization generation
DEBUG:faex.explaining.explainers.PDP:PDP distribution explanation generation
DEBUG:faex.data.holder_to_plotter:X values: (30,)
DEBUG:faex.data.holder_to_plotter:Mean values: (30,)

In [8]:
# Explaining PDP
visualization = explainer.visualize_doubleplot(
    explanations=[
        "real-prediction",
        "histogram",
        "distribution",
        "mice",
        "mpdp",
    ],
    matplotlib=USE_MATPLOTLIB,
)

visualization.show()

DEBUG:faex.explaining.ExplainerFactory:Creating explainer for technique: realprediction
DEBUG:faex.explaining.ExplainerFactory:Creating explainer for technique: mice
DEBUG:faex.explaining.explainers.M_ICE:m-ICE visualization generation
DEBUG:faex.explaining.explainers.M_ICE:m-ICE explanation generation
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 20 plotters to Plotly.
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 1 plotters to Plotly.
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 1 plotters to Plotly.
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 4 plotters to Plotly.
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 6 plotters to Plotly.
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 10 plotters to Plotly.
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 30 plotters to Plotly.
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 39 plotters to Plotly.
DEBUG:faex.plotting.DataPlot

In [11]:
# Explaining l-PDP
visualization = explainer.visualize_doubleplot(
    explanations=[
        "real-prediction",
        "histogram",
        "distribution",
        "KernelNormalizer",
        # "l-ice",
        "l-pdp",
        "l-pdp-d",
        "l-pdp-e",
    ],
    matplotlib=USE_MATPLOTLIB,
)

visualization.show()

DEBUG:faex.explaining.explainers.kernel:Plotting kernel normalizer
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 2 plotters to Plotly.
DEBUG:faex.explaining.explainers.L_PDP:Plotting l-PDP with params: {'color': 'darkgreen', 'label': 'l-PDP', 'linewidth': 3, 'opacity': 1.0}
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 29 plotters to Plotly.
DEBUG:faex.explaining.explainers.L_PDP:PDP distribution visualization generation
DEBUG:faex.data.holder_to_plotter:X values: (30,)
DEBUG:faex.data.holder_to_plotter:Mean values: (30,)
DEBUG:faex.data.holder_to_plotter:Std values: (30,)
DEBUG:faex.data.holder_to_plotter:Sigma level 1: x=(30,) y_min=(30,), y_max=(30,)
DEBUG:faex.data.holder_to_plotter:Added area for sigma level 1: y_min=[1307.50493175 1277.99377617 4415.09048676 2693.80862189 2116.22016136
 2226.10928597 2564.74016187 2495.35021456 2090.54640145 2120.71070254
 2378.81644222 2605.95041194 2651.97985488 2819.26354204 2970.6751729
 3251.14056266 3262.992909

In [10]:
# Joint Explanations with arguments
visualization = explainer.visualize_doubleplot(
    explanations=[
        "real-prediction",
        "histogram",
        "distribution",
        "KernelNormalizer",
        "l-pdp",
        "m-pdp",
        "pdp",
    ],
    plot_arguments={
        "real-prediction" : {"alpha": 0.2},
        "l-pdp": {'alpha': 0.8},
        "m-pdp": {'alpha': 0.8},
        "pdp": {'alpha': 0.8},
    },
    matplotlib=USE_MATPLOTLIB,
)

visualization.show()

DEBUG:faex.explaining.explainers.kernel:Plotting kernel normalizer
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 2 plotters to Plotly.
DEBUG:faex.explaining.explainers.L_PDP:Plotting l-PDP with params: {'alpha': 0.8, 'color': 'darkgreen', 'label': 'l-PDP', 'linewidth': 3, 'opacity': 1.0}
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 29 plotters to Plotly.
DEBUG:faex.explaining.explainers.M_PDP:m-PDP visualization generation
DEBUG:faex.plotting.DataPlotter:Plotting DP2_Collection with 20 plotters to Plotly.
