In [1]:
!pip3 install interpret

Collecting interpret
  Downloading interpret-0.6.9-py3-none-any.whl.metadata (1.0 kB)
Collecting interpret-core==0.6.9 (from interpret-core[aplr,dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.9->interpret)
  Downloading interpret_core-0.6.9-py3-none-any.whl.metadata (2.8 kB)
Collecting SALib>=1.3.3 (from interpret-core[aplr,dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.9->interpret)
  Downloading salib-1.5.1-py3-none-any.whl.metadata (11 kB)
Collecting dill>=0.2.5 (from interpret-core[aplr,dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.9->interpret)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting aplr>=10.6.1 (from interpret-core[aplr,dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.9->interpret)
  Downloading aplr-10.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)
Collecting dash>=1.0.0 (from interpret-core[aplr,dash,debug,linear,notebook,plotly,sensitivity,shap]==0.6.9->interpret)
  Download

In [2]:
# load dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from interpret.glassbox import LinearRegression
from interpret import show
import matplotlib.pyplot as plt


In [43]:
# set random seed for reproducibility
np.random.seed(42)

In [4]:

# generate synthetic dataset
n_samples = 1000
# features
feature1 = np.random.normal(0, 1, n_samples)  # normally distributed feature
feature2 = np.random.uniform(0, 10, n_samples)  # uniformly distributed feature
feature3 = np.random.binomial(1, 0.5, n_samples)  # binary feature
feature4 = np.random.exponential(2, n_samples)  # exponentially distributed feature

# create target variable with some controlled relationships
# target = 2 * feature1 + 0.5 * feature2 - 3 * feature3 + 0.1 * feature4 + noise
noise = np.random.normal(0, 0.1, n_samples)
target = (2 * feature1 + 0.5 * feature2 - 3 * feature3 + 0.1 * feature4 + noise)

# create dataframe
data = pd.DataFrame({
    'feature1': feature1,
    'feature2': feature2,
    'feature3': feature3,
    'feature4': feature4,
    'target': target
})

# let see few records
data.head()

Unnamed: 0,feature1,feature2,feature3,feature4,target
0,0.496714,1.674826,0,2.090056,2.03245
1,-0.138264,1.045678,0,0.15399,0.254143
2,0.647689,6.364302,0,0.941508,4.768933
3,1.52303,7.064757,0,3.25739,6.765579
4,-0.234153,0.315861,1,1.136458,-3.146171


In [5]:
# split the dataset into training and testing sets
X = data[['feature1', 'feature2', 'feature3', 'feature4']]
y = data['target']
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size = 0.2,
    random_state = 42
)

In [6]:
# train interpretml's linear regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

<interpret.glassbox._linear.LinearRegression at 0x7ad41b737390>

In [7]:
# make predictions
y_pred = lr_model.predict(X_test)

In [8]:
# evaluate the model
print(f"R Square Score: {r2_score(y_test, y_pred):.4f}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.4f}")

R Square Score: 0.9988
Mean Squared Error: 0.0099


In [14]:
# generate local explanations for the first 2 test sample
local_explanation = lr_model.explain_local(
    X_test[:2],
    y_test[:2],
    name = 'Linear Regression Local Explanation'
)

In [38]:
# extract the explanation data for the first record (index 0)
first_record_explanation = local_explanation.data(0)  #get data for the first instance

# print the numerical values for the first record
print("Local Explanation for First Record:")
print(f"Predicted Class: {lr_model.predict(X_test[:1])[0]}")
print(f"True Label: {y_test.iloc[0]}")
print("Feature Contributions:")
for feature_name, contribution in zip(first_record_explanation['names'], first_record_explanation['scores']):
    print(f"{feature_name}: {contribution}")

Local Explanation for First Record:
Predicted Class: 5.273822447922763
True Label: 5.195288049667121
Feature Contributions:
feature1: 1.0855998010881616
feature2: 3.847432002462039
feature3: -0.0
feature4: 0.35087395489306394


In [39]:
# visualize local explanations for the first test sample
show(local_explanation)

In [40]:
# generate global explanations
global_explanation = lr_model.explain_global(name='Linear Regression Global Explanation')


In [41]:
# extract the global feature importance data
global_data = global_explanation.data()

# print the feature importance values
print("Global Feature Importance:")
for feature_name, importance in zip(global_data['names'], global_data['scores']):
    print(f"{feature_name}: {importance}")

Global Feature Importance:
feature1: 1.9979376780128109
feature2: 0.5001711159206623
feature3: -2.999199794321629
feature4: 0.10113213137763244


In [42]:
# visualize global explanations
show(global_explanation)

In [None]:
# otional: dave plots if needed (uncomment to save)
# plt.savefig('global_explanation.png')
# plt.savefig('local_explanation.png')
