# ___

# [ Machine Learning in Geosciences ]

### | DEMO: **LIME and SHAP explanation** 


.....................................................................................................................

Author: *Lukas Brodsky* *lukas.brodsky@natur.cuni.cz* 

---

# TASK: Explain Machine Learning models 


In [None]:
import pandas as pd
import lime
import lime.lime_tabular
import shap

from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from lime.lime_tabular import LimeTabularExplainer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [None]:
# Data 
data = datasets.load_iris()

### Classification

In [None]:
# Model 
classifier = RandomForestClassifier()

In [None]:
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target
feature_names = X.columns
class_names = data.target_names

In [None]:
# feature_names

In [None]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  

In [None]:
classifier.fit(X_train, y_train)

### LIME explanation

In [None]:
# Create a LIME explainer object
explainer = lime.lime_tabular.LimeTabularExplainer(data.data, mode='classification')

In [None]:
# Select an instance to be explained
instance = data.data[0]

In [None]:
# Generate and explanation for the instance
explanation = explainer.explain_instance(instance, classifier.predict_proba, num_features = 5)

In [None]:
# Display the explanation
explanation.show_in_notebook()

### Regression 

In [None]:
# Data 
np.random.seed(42)
X = np.random.rand(100, 5)  # 100 samples, 5 features
y = 2 * X[:, 0] + 3 * X[:, 1] + 1 * X[:, 2] + np.random.randn(100)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train a simple linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# Initialize a LimeTabularExplainer
explainer = LimeTabularExplainer(training_data=X_train, mode="regression")

In [None]:
# Select a sample instance for explanation
sample_instance = X_test[0]

In [None]:
# Explain the prediction for the sample instance
explanation = explainer.explain_instance(sample_instance, model.predict)

In [None]:
# Show the explanation
explanation.show_in_notebook()
# It shows the prediction base like classification.

### SHAP Explanation

In [None]:
shap.initjs()

#### Explain a single prediction from the test set

In [None]:

explainer = shap.KernelExplainer(classifier.predict_proba, X_train)
shap_values = explainer.shap_values(X_test.iloc[0, :])
shap.force_plot(explainer.expected_value[0], shap_values[:, 0], X_test.iloc[0, :])

#### Explain all the predictions in the test set

In [None]:
shap_values = explainer.shap_values(X_test)
shap.force_plot(explainer.expected_value[0], shap_values[..., 0], X_test)
