## Use Customized Notebook Template (CLAIMS_RESERVE_Template)

In [2]:
from snowflake.snowpark import Session
from snowflake.ml.modeling.pipeline import Pipeline
from snowflake.ml.modeling.xgboost import XGBRegressor
from snowflake.ml.modeling.preprocessing import MinMaxScaler, OrdinalEncoder, OneHotEncoder
# import seaborn as sns
from sklearn.metrics import mean_absolute_percentage_error
# Pandas Tools
from snowflake.connector.pandas_tools import write_pandas
# Data Science Libs
import numpy as np
import pandas as pd
# create_temp_table warning suppresion
import warnings; warnings.simplefilter('ignore')
from joblib import dump, load
# FosforIO to read from snowflake
from fosforio import snowflake
# FosforML to register Model on FDC
from fosforml import *
from fosforml.constants import MLModelFlavours
import requests

# Read data using FosforML

In [3]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [7]:
table_name = 'AUTO_INSURANCE_CLAIMS_DATA'

In [8]:
sf_df = my_session.sql("select * from {}".format(table_name))

In [9]:
df = sf_df.to_pandas()

In [3]:
# To read a specific dataset published from a snowflake connection
#df = snowflake.get_dataframe("AUTO_INSURANCE_CLAIMS_DATA_PRODUCT")

In [10]:
df_backup = df.copy()

In [11]:
df = df.drop(['POLICY_NUMBER','MONTHS_AS_CUSTOMER','CUSTOMER_AGE','POLICY_BIND_DATE','POLICY_STATE','POLICY_CSL','UMBRELLA_LIMIT',
 'INSURED_ZIP','INSURED_SEX','INSURED_EDUCATION_LEVEL','INSURED_OCCUPATION','INSURED_HOBBIES','INSURED_RELATIONSHIP',
 'CAPITAL_GAINS','CAPITAL_LOSS', 'INCIDENT_DATE', 'AUTHORITIES_CONTACTED', 'INCIDENT_STATE', 'INCIDENT_CITY',
 'INCIDENT_LOCATION', 'INCIDENT_HOUR_OF_THE_DAY', 'INCIDENT_TIME_OF_DAY', 'WITNESSES', 'POLICE_REPORT_AVAILABLE',
 'INJURY_CLAIM', 'PROPERTY_CLAIM', 'VEHICLE_CLAIM', 'FRAUD_REPORTED'])

KeyError: "['POLICY_NUMBER', 'MONTHS_AS_CUSTOMER', 'CUSTOMER_AGE', 'POLICY_BIND_DATE', 'POLICY_STATE', 'POLICY_CSL', 'UMBRELLA_LIMIT', 'INSURED_ZIP', 'INSURED_SEX', 'INSURED_EDUCATION_LEVEL', 'INSURED_OCCUPATION', 'INSURED_HOBBIES', 'INSURED_RELATIONSHIP', 'CAPITAL_GAINS', 'CAPITAL_LOSS', 'INCIDENT_DATE', 'AUTHORITIES_CONTACTED', 'INCIDENT_STATE', 'INCIDENT_CITY', 'INCIDENT_LOCATION', 'INCIDENT_HOUR_OF_THE_DAY', 'INCIDENT_TIME_OF_DAY', 'WITNESSES', 'POLICE_REPORT_AVAILABLE', 'INJURY_CLAIM', 'PROPERTY_CLAIM', 'VEHICLE_CLAIM', 'FRAUD_REPORTED'] not found in axis"

In [19]:
df.show()

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"POLICY_DEDUCTABLE"  |"POLICY_ANNUAL_PREMIUM"  |"INCIDENT_TYPE"           |"COLLISION_TYPE"       |"INCIDENT_SEVERITY"  |"NUMBER_OF_VEHICLES_INVOLVED"  |"PROPERTY_DAMAGE"      |"BODILY_INJURIES"  |"TOTAL_CLAIM_AMOUNT_PAID"  |"AUTO_MAKE"  |"AUTO_MODEL"  |"AUTO_YEAR"  |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|537                  |697                      |Single Vehicle Collision  |Rear Collision         |Major Damage         |1                              |Details not Available  |1           

In [20]:
train_df, test_df = df.random_split([0.8,0.2], seed=69) #seed=60 for Model version 2
CATEGORICAL_COLUMNS = ["INCIDENT_TYPE", "COLLISION_TYPE","INCIDENT_SEVERITY", "PROPERTY_DAMAGE",
                       "AUTO_MAKE","AUTO_MODEL","AUTO_YEAR",]
NUMERICAL_COLUMNS = ["POLICY_DEDUCTABLE", "POLICY_ANNUAL_PREMIUM","NUMBER_OF_VEHICLES_INVOLVED", "BODILY_INJURIES"]
LABEL_COLUMNS = ["TOTAL_CLAIM_AMOUNT_PAID"]
OUTPUT_COLUMNS = ["PREDICTION"]

In [21]:
## train_df and test_df are both snowpark dataframes
pipeline = Pipeline(
    steps=[(
                "OE",
                OrdinalEncoder(
                input_cols=CATEGORICAL_COLUMNS,
                output_cols=CATEGORICAL_COLUMNS,
                handle_unknown='use_encoded_value',
                unknown_value=-1
                )),
               ("MMS",
                MinMaxScaler(
                clip=True,
                input_cols=NUMERICAL_COLUMNS,
                output_cols=NUMERICAL_COLUMNS,
                )),
               ("classification",
                XGBRegressor(
                input_cols=CATEGORICAL_COLUMNS+NUMERICAL_COLUMNS,
                label_cols=LABEL_COLUMNS,
                output_cols=OUTPUT_COLUMNS
                ))])

pipeline.fit(train_df) ## fiting the dataset
result = pipeline.predict(test_df)

The version of package 'numpy' in the local environment is 1.24.4, which does not fit the criteria for the requirement 'numpy==1.24.3'. Your UDF might not work when the package version is different between the server and your local environment.
The version of package 'numpy' in the local environment is 1.24.4, which does not fit the criteria for the requirement 'numpy==1.24.3'. Your UDF might not work when the package version is different between the server and your local environment.


In [28]:
from joblib import dump, load
filename = "Claims_Snowpark_XGB_Regression.joblib" #Model version 1
dump(pipeline, 'model_artifacts/' + filename)

['Claims_Snowpark_XGB_Regression.joblib']

In [29]:
model = load('model_artifacts/' + filename)

In [30]:
test = test_df.to_pandas().replace(np.nan, pd.isna)
train = train_df.to_pandas().replace(np.nan, pd.isna)
test["AUTO_YEAR"] = test["AUTO_YEAR"].astype(str)
train["AUTO_YEAR"] = train["AUTO_YEAR"].astype(str)

In [31]:
@scoring_func
def score(model, request):
    payload_dict = request.json["payload"]
    data = pd.DataFrame(payload_dict,index=[0])
    data = data.replace(np.nan, pd.isna)
    data["AUTO_YEAR"] = data["AUTO_YEAR"].astype(str)
    prediction = str(model.predict(data)["PREDICTION"][0])
    return prediction

In [32]:
pred = model.predict(test)
test["PREDICTION"] = pred["PREDICTION"]

In [33]:
X_train = train.drop(["TOTAL_CLAIM_AMOUNT_PAID"], axis=1)
y_train = train["TOTAL_CLAIM_AMOUNT_PAID"]

X_test = pred.drop(["TOTAL_CLAIM_AMOUNT_PAID", "PREDICTION"], axis=1)
y_test = pred["TOTAL_CLAIM_AMOUNT_PAID"]

y_pred = pred["PREDICTION"]

In [34]:
payload  = test.iloc[0].to_dict()
del payload["TOTAL_CLAIM_AMOUNT_PAID"]
del payload["PREDICTION"]
payload

{'POLICY_DEDUCTABLE': 809,
 'POLICY_ANNUAL_PREMIUM': 1064,
 'INCIDENT_TYPE': 'Single Vehicle Collision',
 'COLLISION_TYPE': 'Rear Collision',
 'INCIDENT_SEVERITY': 'Major Damage',
 'NUMBER_OF_VEHICLES_INVOLVED': 1,
 'PROPERTY_DAMAGE': 'Property Damage',
 'BODILY_INJURIES': 2,
 'AUTO_MAKE': 'Accura',
 'AUTO_MODEL': 'MDX',
 'AUTO_YEAR': '1997'}

In [35]:
req = requests.Request()
req.json = {"payload":payload}
y_req = req
y_out = score(model, y_req)
y_out

'74180.48'

In [36]:
## SAMPLE PAYLOAD
data = {
  "payload": {
    "POLICY_DEDUCTABLE": 500,
    "POLICY_ANNUAL_PREMIUM": 938,
    "INCIDENT_TYPE": "Vehicle Theft",
    "COLLISION_TYPE": "Details not Available",
    "INCIDENT_SEVERITY": "Total Loss",
    "NUMBER_OF_VEHICLES_INVOLVED": 1,
    "PROPERTY_DAMAGE": "Property Damage",
    "BODILY_INJURIES": 0,
    "AUTO_MAKE": "Honda",
    "AUTO_MODEL": "Civic",
    "AUTO_YEAR": "2002"
  }
}

In [37]:
## registering the model in Fosfor Insight Designer
model_reg = register_model(model, 
               score, 
               name="Claims_Snowpark_XGB_Regression", 
               description="AutoInsurance claims SnowflakeML XGB Regression",
               flavour=MLModelFlavours.sklearn,
               model_type="regression",
               init_script="pip install snowflake-ml-python==1.0.11",
               y_true=y_test,
               y_pred=y_pred, 
               features=X_train.columns,
               input_type="json", 
               explain_ai=True,
               x_train=X_train, 
               x_test=X_test, 
               y_train=y_train.tolist(),
               y_test=y_test.tolist(),
               feature_names=X_train.columns.tolist(),
               original_features=X_train.columns.tolist(),
               feature_ids=X_train.columns,
               kyd=True, kyd_score = True)

Calculating build time metrics

Progress: ██████████████████████████████████████████████████████████████████████ 100.0%


VBox(children=(HTML(value='<style>.grad_1{background: #2468a4;} .grad_2{ color:white; background: #2468a4;}</s…

# Code to call Claims_Snowpark_XGB_Regression internally using Insight Designer Notebook

In [38]:
def model_prediction(data):
    model = load('model_artifacts/Claims_Snowpark_XGB_Regression.joblib', mmap_mode='r')
    data = data.replace(np.nan, pd.isna)
    data["AUTO_YEAR"] = data["AUTO_YEAR"].astype(str)
    return model.predict(data)['PREDICTION'] #Only returns Prediction
    #return model.predict(data) #Returns Complete input data along with Prediction

In [39]:
test['PREDICTION'] = model_prediction(test) #If only Prediction is expected from the score function output
#test = model_prediction(test) #If complete input data along with Prediction is expected from the score function output

In [40]:
test.head()

Unnamed: 0,POLICY_DEDUCTABLE,POLICY_ANNUAL_PREMIUM,INCIDENT_TYPE,COLLISION_TYPE,INCIDENT_SEVERITY,NUMBER_OF_VEHICLES_INVOLVED,PROPERTY_DAMAGE,BODILY_INJURIES,TOTAL_CLAIM_AMOUNT_PAID,AUTO_MAKE,AUTO_MODEL,AUTO_YEAR,PREDICTION
0,809,1064,Single Vehicle Collision,Rear Collision,Major Damage,1,Property Damage,2,76833.0,Accura,MDX,1997,74180.476562
1,1116,1268,Vehicle Theft,Details not Available,Major Damage,1,No Property Damage,0,69459.0,Chevrolet,Malibu,2015,72643.09375
2,1334,1607,Single Vehicle Collision,Side Collision,Minor Damage,1,Property Damage,0,38769.0,Toyota,Camry,2008,43409.59375
3,1734,2312,Single Vehicle Collision,Details not Available,Major Damage,1,No Property Damage,1,75894.0,Nissan,Maxima,2010,70534.773438
4,1312,1660,Single Vehicle Collision,Details not Available,Major Damage,1,Property Damage,0,72553.0,BMW,M5,2000,72034.96875


In [41]:
#Drift files to be setup using snowflake
drift_old = test.head(10000)
drift_old.to_csv("/data/Output/Claims_Snowpark_XGB_Regression_Output_v1.csv", index=False)

drift_new = test.tail(10000)
drift_new.to_csv("/data/Output/Claims_Snowpark_XGB_Regression_Output_v2.csv", index=False)

# Sample Code to call Claims_Snowpark_XGB_Regression externally

In [None]:
access_token = "<Access Token here to be available in FDC 1.0>"

def model(payload, access_token):
    headers = {'Content-Type': 'application/json',
               'Authorization': 'Bearer ' + access_token}
    url = "https://prpr.fdc.leni.ai/claimssnowparkxgbregression/<model_version_id>/score"
    
    data = {"payload": payload}
    print("***********************************")
    print(data)
    print("***************************")
    
    response = requests.post(url, json=data, headers=headers)
    return response 
    return #pd.DataFrame(eval(response.json()["data"]))

In [None]:
# data_1 = pd.DataFrame(payload, index=[0])
output1 = model(payload, access_token)

In [None]:
output1.json()["data"]

# Version 2 of Snowpark XGB Regression Model

In [42]:
train_df, test_df = df.random_split([0.8,0.2], seed=60) #seed=69 for Model version 1
CATEGORICAL_COLUMNS = ["INCIDENT_TYPE", "COLLISION_TYPE","INCIDENT_SEVERITY", "PROPERTY_DAMAGE",
                       "AUTO_MAKE","AUTO_MODEL","AUTO_YEAR",]
NUMERICAL_COLUMNS = ["POLICY_DEDUCTABLE", "POLICY_ANNUAL_PREMIUM","NUMBER_OF_VEHICLES_INVOLVED", "BODILY_INJURIES"]
LABEL_COLUMNS = ["TOTAL_CLAIM_AMOUNT_PAID"]
OUTPUT_COLUMNS = ["PREDICTION"]

In [43]:
## train_df and test_df are both snowpark dataframes
pipeline = Pipeline(
    steps=[(
                "OE",
                OrdinalEncoder(
                input_cols=CATEGORICAL_COLUMNS,
                output_cols=CATEGORICAL_COLUMNS,
                handle_unknown='use_encoded_value',
                unknown_value=-1
                )),
               ("MMS",
                MinMaxScaler(
                clip=True,
                input_cols=NUMERICAL_COLUMNS,
                output_cols=NUMERICAL_COLUMNS,
                )),
               ("classification",
                XGBRegressor(
                input_cols=CATEGORICAL_COLUMNS+NUMERICAL_COLUMNS,
                label_cols=LABEL_COLUMNS,
                output_cols=OUTPUT_COLUMNS
                ))])

pipeline.fit(train_df) ## fiting the dataset
result = pipeline.predict(test_df)



In [44]:
from joblib import dump, load
filename = "Claims_Snowpark_XGB_Regression_v2.joblib" #Model version 1
dump(pipeline, 'model_artifacts/' + filename)

['Claims_Snowpark_XGB_Regression_v2.joblib']

In [45]:
model = load('model_artifacts/' + filename)

In [46]:
test = test_df.to_pandas().replace(np.nan, pd.isna)
train = train_df.to_pandas().replace(np.nan, pd.isna)
test["AUTO_YEAR"] = test["AUTO_YEAR"].astype(str)
train["AUTO_YEAR"] = train["AUTO_YEAR"].astype(str)

In [47]:
pred = model.predict(test)
test["PREDICTION"] = pred["PREDICTION"]

In [48]:
X_train = train.drop(["TOTAL_CLAIM_AMOUNT_PAID"], axis=1)
y_train = train["TOTAL_CLAIM_AMOUNT_PAID"]

X_test = pred.drop(["TOTAL_CLAIM_AMOUNT_PAID", "PREDICTION"], axis=1)
y_test = pred["TOTAL_CLAIM_AMOUNT_PAID"]

y_pred = pred["PREDICTION"]

In [49]:
payload  = test.iloc[0].to_dict()
del payload["TOTAL_CLAIM_AMOUNT_PAID"]
del payload["PREDICTION"]
payload

{'POLICY_DEDUCTABLE': 583,
 'POLICY_ANNUAL_PREMIUM': 821,
 'INCIDENT_TYPE': 'Parked Car',
 'COLLISION_TYPE': 'Side Collision',
 'INCIDENT_SEVERITY': 'Minor Damage',
 'NUMBER_OF_VEHICLES_INVOLVED': 1,
 'PROPERTY_DAMAGE': 'No Property Damage',
 'BODILY_INJURIES': 2,
 'AUTO_MAKE': 'Toyota',
 'AUTO_MODEL': 'Highlander',
 'AUTO_YEAR': '2000'}

In [50]:
req = requests.Request()
req.json = {"payload":payload}
y_req = req
y_out = score(model, y_req)
y_out

'45579.38'

In [51]:
## registering the model in Fosfor Insight Designer using same name.
model_reg = register_model(model, 
               score, 
               name="Claims_Snowpark_XGB_Regression", 
               description="AutoInsurance claims SnowflakeML XGB Regression",
               flavour=MLModelFlavours.sklearn,
               model_type="regression",
               init_script="pip install snowflake-ml-python==1.0.11",
               y_true=y_test,
               y_pred=y_pred, 
               features=X_train.columns,
               input_type="json", 
               explain_ai=True,
               x_train=X_train, 
               x_test=X_test, 
               y_train=y_train.tolist(),
               y_test=y_test.tolist(),
               feature_names=X_train.columns.tolist(),
               original_features=X_train.columns.tolist(),
               feature_ids=X_train.columns,
               kyd=True, kyd_score = True)

Calculating build time metrics

Progress: ██████████████████████████████████████████████████████████████████████ 100.0%


VBox(children=(HTML(value='<style>.grad_1{background: #2468a4;} .grad_2{ color:white; background: #2468a4;}</s…