# Explanaible AI - alibi

**Links**

- alibi documentation main page: https://docs.seldon.io/projects/alibi/en/stable/
- alibi repo github: https://github.com/SeldonIO/alibi

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import sklearn

import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.express as px

# data
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

# models
from sklearn.linear_model import LinearRegression # lr
from sklearn.linear_model import Ridge # ridge
from sklearn.linear_model import Lasso # lasso
from sklearn.tree import DecisionTreeRegressor # tree
from sklearn.ensemble import GradientBoostingRegressor #gb
from sklearn.ensemble import RandomForestRegressor #rf
from xgboost import XGBRegressor # xgb
from  sklearn.neural_network import MLPRegressor # mlp

### 1. Load data

In [2]:
##### load data
data_X, data_y = fetch_california_housing(return_X_y=True, as_frame=True)

##### joinn and rename dataset
data = data_X.copy()
data['Price'] = data_y

##### dropduplicates - some kind of plots in plotly return error with duplicated values
data = data.drop_duplicates()

##### delete outliers
percentil_lower = data.quantile(0.01)
percentil_upper = data.quantile(0.99)

data_raw = data[(data >= percentil_lower) & (data <= percentil_upper)]
data_raw.dropna(inplace = True)


##### split into train/test
target = 'Price'
list_features = list(set(data_raw.columns) - set([target]))

X_train, X_test, y_train, y_test = train_test_split(data_raw[list_features], 
                                                    data_raw[[target]], 
                                                    test_size = 0.2, 
                                                    random_state=42
                                                   )

In [3]:
list_features

['AveBedrms',
 'HouseAge',
 'Population',
 'MedInc',
 'Latitude',
 'AveRooms',
 'AveOccup',
 'Longitude']

In [4]:
X_train.head()

Unnamed: 0,AveBedrms,HouseAge,Population,MedInc,Latitude,AveRooms,AveOccup,Longitude
5712,1.09434,34.0,926.0,4.1523,34.22,4.641509,2.495957,-118.24
15731,1.06375,52.0,1588.0,3.3882,37.78,3.77125,1.985,-122.44
2894,0.990991,38.0,743.0,1.7292,35.37,4.135135,3.346847,-118.99
3333,1.055046,13.0,249.0,1.8417,38.92,4.770642,2.284404,-122.62
8410,1.026738,33.0,2649.0,2.3375,33.93,3.636364,4.721925,-118.35


### 2. Train models

#### 2.1 Linear regression

In [5]:
# train lr
lr = LinearRegression()
lr.fit(X_train, y_train)

In [6]:
# r2 score
r2_lr_train = lr.score(X_train, y_train)
r2_lr_test = lr.score(X_test, y_test)

print('r2_train: ', r2_lr_train)
print('r2_test: ', r2_lr_test)

r2_train:  0.6811276939092024
r2_test:  0.6813140626891638


#### 2.2 Random Forest Simple

In [7]:
# train rf simple
param_n_trees = 3
rf_simple = RandomForestRegressor(n_estimators = param_n_trees,
                                  random_state = 42,
                                 min_samples_split = 0.2,
                                    min_samples_leaf = 0.1)
                                   #max_depth = 2)
rf_simple.fit(X_train, y_train)

In [8]:
# r2 score
r2_rf_simple_train = rf_simple.score(X_train, y_train)
r2_rf_simple_test = rf_simple.score(X_test, y_test)

print('r2_train: ', r2_rf_simple_train)
print('r2_test: ', r2_rf_simple_test)

r2_train:  0.45686524868931044
r2_test:  0.43402741471810513


#### 2.3 Random Forest Default

In [9]:
# train rf default hiperparameters
rf_default = RandomForestRegressor(random_state = 42)
rf_default.fit(X_train, y_train)

In [10]:
# r2 score
r2_rf_default_train = rf_default.score(X_train, y_train)
r2_rf_default_test = rf_default.score(X_test, y_test)

print('r2_train: ', r2_rf_default_train)
print('r2_test: ', r2_rf_default_test)

r2_train:  0.9742324992188779
r2_test:  0.8097542157672419


### 3. ALIBI - Transform data - To use alibi it is neccesary the data need to be in format np.array()

In [12]:
# transform data into numpy array
X_train_numpy = X_train.to_numpy()
y_train_numpy = y_train.to_numpy()

X_test_numpy = X_test.to_numpy()
y_test_numpy = y_test.to_numpy()

### 4. ALIBI - ALE - Accumulated local effects

In [13]:
# list features
list_features

['AveBedrms',
 'HouseAge',
 'Population',
 'MedInc',
 'Latitude',
 'AveRooms',
 'AveOccup',
 'Longitude']

In [14]:
# target into format list
list_target = [target]