In [None]:
from interpret.glassbox import ExplainableBoostingRegressor
from interpret.glassbox import show

from sklearn.model_selection import train_test_split
from sklearn import datasets

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
feature_names = [
    "age",
    "sex",
    "bmi",
    "bp",
    "s1",
    "s2",
    "s3",
    "s4",
    "s5",
    "s6",
]

feature_types = [
    "continuous",
    "continuous",
    "continuous",
    "continuous",
    "continuous",
    "continuous",
    "continuous",
    "continuous",
    "continuous",
    "continuous",
]


In [5]:
# EBM Regressor
# 1. smoothing_rounds: used to initiate the boosting process in a non greedy manner
# 2. interactions: It allows or controls how interactions terms are handled by the model. 
# it allows inclusion of terms that capture combined effects of multiple features, beyond their individual contributions.
# Specifiying interactions can be implemented in multiple ways:
# 1. integer approach - 1 or greater. This specifies the exact number of pairwise interactions to be automatically
#    selected and included in the model. interactions=3 - Detect and include 3 of the strongest pairwise interactions.
# 2. list of tuples approach - [(feature_1, feature_2), (feature_3, feature_4)] - manually specify the interactions to be included.
# This approach provides explicit control over which interactions are considered, allowing for model customization based on domain knowledge or analysis.
# interactions=[(4,5), (7,8,9)] would include pair-wise interaction between total cholesterol and LDL.
# and a three-way interaction between thyroid, glucoma, and glucose, allowing higher-order interactions to be considered.


ebm = ExplainableBoostingRegressor(
    feature_names,
    feature_types=feature_types,
    interactions=3,
    smoothing_rounds=5000,
    reg_alpha=10
)
print(ebm)

ExplainableBoostingRegressor(feature_names=['age', 'sex', 'bmi', 'bp', 's1',
                                            's2', 's3', 's4', 's5', 's6'],
                             feature_types=['continuous', 'continuous',
                                            'continuous', 'continuous',
                                            'continuous', 'continuous',
                                            'continuous', 'continuous',
                                            'continuous', 'continuous'],
                             interactions=3, reg_alpha=10,
                             smoothing_rounds=5000)


In [None]:
ebm.fit(X_train, y_train)


0,1,2
,feature_names,"['age', 'sex', ...]"
,feature_types,"['continuous', 'continuous', ...]"
,max_bins,1024
,max_interaction_bins,64
,interactions,3
,exclude,
,validation_size,0.15
,outer_bags,14
,inner_bags,0
,learning_rate,0.04


In [12]:
from interpret import show

interaction_exp = ebm.explain_global()
show(interaction_exp)

Bottom graph for individual, how much contribution from feature function. Individual density plot - showing which regions of the feature space that the model has seen a lot.

In [None]:
from interpret.perf import RegressionPerf

AttributeError: 'RegressionPerf' object has no attribute 'selector'

In [18]:
ebm_local = ebm.explain_local(X_test, y_test)
show(ebm_local)