# SHAP etc.

## Set up

#### User-specified parameters

In [1]:
python_material_folder_name = "python-material"

### Import libraries

In [2]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Check if in Google Colab environment
try:
    from google.colab import drive
    # Mount drive
    drive.mount('/content/drive')
    # Set up path to Python material parent folder
    path_python_material = rf"drive/MyDrive/{python_material_folder_name}"
        # If unsure, print current directory path by executing the following in a new cell:
        # !pwd
    IN_COLAB = True
except:
    IN_COLAB = False
    # If working locally on Jupyter Notebook, parent folder is one folder up (assuming you are using the folder structure shared at the beginning of the course)
    path_python_material = ".."

In [3]:
if IN_COLAB == True:
  !pip install fastapi uvicorn

## Data import

In [4]:
# Build quick model here for fraud data
import pandas as pd

df = pd.read_csv(f"{path_python_material}/data/1-raw/dsif11-fraud-detection/synthetic_transaction_data.csv")
df.head()


Unnamed: 0,transaction_amount,transaction_date,transaction_time,customer_age,customer_balance,is_fraud
0,46.926809,2023-02-09,55817,43,9143.802446,0
1,301.012143,2023-01-28,9356,60,3126.627558,0
2,131.674569,2023-11-13,33099,33,4316.836831,0
3,91.294255,2023-03-26,3190,18,4235.945356,0
4,16.962487,2023-12-07,13332,49,5491.237144,0


In [5]:
df.shape

(100000, 6)

# dsif11app.py

### set up

In [6]:
model_id = "lr1"
import pickle 
import shap

transaction_amount = 20000
customer_age = 20
customer_balance = 20000

data_point = [[
        transaction_amount,
        customer_age,
        customer_balance
    ]]

In [7]:
# Load the pipeline
with open(f"{path_python_material}/models/{model_id}-pipeline.pkl", "rb") as f:
    loaded_pipeline = pickle.load(f)


In [12]:
# Shap values
path = f"{path_python_material}/data/2-intermediate/dsif11-X_train_scaled.npy"
print(path)
X_train_scaled = np.load(path) #pd.DataFrame(np.load(path))

explainer = shap.LinearExplainer(loaded_pipeline[1], data_point)
shap_values = explainer.shap_values(data_point)
shap_values

../data/2-intermediate/dsif11-X_train_scaled.npy


array([[-0.06281656,  0.02766043,  0.27972252],
       [-0.03251329,  0.02611688, -1.74136979],
       [-0.02208351,  0.01376847,  0.26444425],
       ...,
       [-0.07080837,  0.02302977,  1.03543975],
       [ 0.04647673,  0.00759427, -0.21013666],
       [-0.0561283 , -0.00321059, -0.06440905]])

In [None]:
X_train_scaled = 
# X_train_scaled.head()


In [None]:
explainer = shap.LinearExplainer(loaded_pipeline[1], X_train_scaled)

# dsif11app-fraud-streamlit.py 

### set up

In [None]:
api_url = "http://localhost:8502"

import streamlit as st
import requests
import numpy as np
import matplotlib.pyplot as plt
import shap

transaction_amount = 20000
customer_age = 20
customer_balance = 20000

data = {
        "transaction_amount": transaction_amount,
        "customer_age": customer_age,
        "customer_balance": customer_balance
    }

## if st.button("Predict"):

In [None]:
response = requests.post(f"{api_url}/predict/",
                         json=data)
result = response.json()
result

In [None]:
import seaborn as sns

######### SHAP #########
# Extract SHAP values and feature names
shap_values = np.array(result['shap_values'])
features = result['features']

# Bar plot for SHAP values
fig, ax = plt.subplots()
ax.barh(features, shap_values[0])
ax.set_xlabel('SHAP Value (Impact on Model Output)')
sns.pyplot(fig)

## if st.button("Show Feature Importance"):

In [None]:
## Now kick off app from terminal:
## uvicorn dsif11app-fraud:app --reload --port 8502

In [None]:
import matplotlib.pyplot as plt
response = requests.get(f"{api_url}/feature-importance")
response


In [None]:
feature_importance = response.json().get('feature_importance', {})
feature_importance

In [None]:
features = list(feature_importance.keys())
importance = list(feature_importance.values())


In [None]:

fig, ax = plt.subplots()
ax.barh(features, importance)
ax.set_xlabel('Importance')
ax.set_title('Feature Importance')
st.pyplot(fig)