**Initial Setup**

In [None]:
# Import python packages
import lime
import lime.lime_tabular
import pandas as pd
import numpy as np

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


**Create Training View** -  https://quickstarts.snowflake.com/guide/lead_scoring_with_ml_powered_classification/index.html?index=..%2F..index#0

In [None]:
create or replace view customer_training
as select age_band, household_income, marital_status, household_size, case when total_order_value<10 then 'BRONZE'
    when total_order_value<=25 and total_order_value>10 then 'SILVER'
    else 'GOLD' END as segment
from customers
where join_date<'2024-02-11'::date;

**Encode Textual Labels, as LIME does not work on Textual Values**

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df_training_data = session.table('customer_training').to_pandas()

feat = ['AGE_BAND', 'HOUSEHOLD_INCOME_le', 'MARITAL_STATUS_le', 'HOUSEHOLD_SIZE']
# label encoding textual data
df_training_data['HOUSEHOLD_INCOME_le'] = le.fit_transform(df_training_data['HOUSEHOLD_INCOME'])
df_training_data['MARITAL_STATUS_le'] = le.fit_transform(df_training_data['MARITAL_STATUS']) 
df_training_data = df_training_data.drop(columns=["HOUSEHOLD_INCOME","MARITAL_STATUS"])

#save dataframe
sp_df = session.create_dataframe(df_training_data)
sp_df.write.mode("overwrite").save_as_table("customer_training_encoded")

**View new Table with Encoded Values**

In [None]:
SELECT * FROM customer_training_encoded;

**Train ML Powered Classification Model on Encoded Training View**

In [None]:
-- create the classification model
CREATE OR REPLACE SNOWFLAKE.ML.CLASSIFICATION customer_classification_model(
    INPUT_DATA => SYSTEM$REFERENCE('table', 'customer_training_encoded'),
    TARGET_COLNAME => 'segment'
);

**Create Temporary Table for Predictions**

In [None]:
-- run prediction and save results
CREATE OR REPLACE TEMPORARY TABLE customer_predictions_prep AS
SELECT age_band, household_income, marital_status, household_size
from customers;

**Encode Textual Values in Prediction Table and Save**

In [None]:
df_predictions_data = session.table('customer_predictions_prep').to_pandas()

feat = ['AGE_BAND', 'HOUSEHOLD_INCOME_le', 'MARITAL_STATUS_le', 'HOUSEHOLD_SIZE']
# label encoding textual data
df_predictions_data['HOUSEHOLD_INCOME_le'] = le.fit_transform(df_predictions_data['HOUSEHOLD_INCOME'])
df_predictions_data['MARITAL_STATUS_le'] = le.fit_transform(df_predictions_data['MARITAL_STATUS']) 
df_predictions_data = df_predictions_data.drop(columns=["HOUSEHOLD_INCOME","MARITAL_STATUS"])

sp_df = session.create_dataframe(df_predictions_data)
#print(sp_df.to_pandas().to_string())
sp_df.write.mode("overwrite").save_as_table("customer_predictions_encoded")

**Create a new Table from Temp Table with Numerical (Encoded) Values**

In [None]:
-- run prediction and save results
CREATE OR REPLACE TABLE customer_predictions AS
SELECT AGE_BAND, "HOUSEHOLD_INCOME_le", "MARITAL_STATUS_le", HOUSEHOLD_SIZE,
customer_classification_model!PREDICT(INPUT_DATA => object_construct(*)) as predictions,
PARSE_JSON(PREDICTIONS)['class']::String as predicted_segment
from customer_predictions_encoded;

**Load Table into Snowpark Dataframe** 

In [None]:
df_prediction_data = session.table('customer_predictions').to_pandas()

**Set Up LIME TABULAR Explainer for Classification**

In [None]:
df_training_data_encoded = session.table('customer_training_encoded').to_pandas()
feat = ['AGE_BAND', 'HOUSEHOLD_INCOME_le', 'MARITAL_STATUS_le', 'HOUSEHOLD_SIZE']

explainer = lime.lime_tabular.LimeTabularExplainer(df_training_data_encoded[feat].astype(int).values,mode='classification',training_labels=np.array(df_training_data_encoded['SEGMENT']),feature_names=feat)

**Ask the Explainer to explain the first row of predictions**

In [None]:
#ML Powered Classification does not have a predict_proba function, so we need to create a custom function
#to generate the probabilities for each class and return them in a 2D numpy array
def prob(data):
    return_array = []
    for i in data:     
        cmd ="""CREATE OR REPLACE TEMPORARY TABLE run_classification AS 
        SELECT customer_classification_model!PREDICT(INPUT_DATA => object_construct('AGE_BAND', ?, 'HOUSEHOLD_INCOME_le', ?, 'MARITAL_STATUS_le', ?, 'HOUSEHOLD_SIZE', ?)) as predictions"""

        age = float(i[0])
        hh_income = float(i[1])
        mstatus = float(i[2])
        hh_size = float(i[3])
        
        session.sql(cmd, params=[age, hh_income, mstatus, hh_size]).collect()

        cmd ="""
            CREATE OR REPLACE TEMPORARY TABLE predict_proba_classificationone AS
            SELECT
                predictions:probability:BRONZE::FLOAT AS bronze_proba,
                predictions:probability:SILVER::FLOAT AS silver_proba,
                predictions:probability:GOLD::FLOAT AS gold_proba,
            FROM run_classification,
            LATERAL FLATTEN(input => predictions);
        """
        session.sql(cmd).collect()
        
        proba_array = session.table('predict_proba_classificationone').to_pandas().iloc[0].tolist()
        #print(proba_array)
        return_array.append(proba_array)
        #print(return_array)
    return np.array(return_array)

# asking for explanation for LIME model, first row
i = 1
exp = explainer.explain_instance(df_prediction_data.loc[i,feat].astype(int).values, prob, num_features=4, num_samples=7)

**What is the First Row of Predictions?**

In [None]:
i=1
print(df_prediction_data.loc[i,feat].astype(int).values)

**Show Explanation For First Row in a List**

In [None]:
exp.as_list()