# Use Insurance Code Template Notebook Template

## Packages needed to run this notebook if running with inbuilt snowpark 3.8 template
#### !pip install --q "snowflake-connector-python[pandas]"
#### !sudo pip install --q snowflake-ml-python==1.0.11 -U
#### !pip install --upgrade --q snowflake-snowpark-python==1.9.0
#### !pip install --q seaborn
#### !pip install --upgrade --q xgboost==1.7.3
#### !pip install --upgrade --q numpy==1.24.3
#### !pip install --upgrade --q pandas==1.5.3
#### !pip install --upgrade --q anyio==3.5.0
#### !pip install --upgrade --q packaging==23.1
#### !pip install --upgrade --q scikit-learn==1.3.0
#### !pip install --upgrade --q typing-extensions==4.7.1
#### !pip install --upgrade --q cryptography==39.0.0
#### !pip install --upgrade --q fsspec==2023.9.2
#### !pip install --q xgboost
#### !pip install --q catboost
#### !pip install --q fosforio
#### !pip install --q fosforml

In [51]:
from snowflake.snowpark import Session

# Data Science Libs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# create_temp_table warning suppresion
import warnings; warnings.simplefilter('ignore')

#Modeling Libs
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import VotingClassifier

from xgboost import XGBClassifier
from catboost import CatBoostClassifier

# FosforIO to read from snowflake
from fosforio import snowflake
# FosforML to register Model on FDC
from joblib import dump, load
from fosforml import *
from fosforml.constants import MLModelFlavours
import requests

# Read data using FosforIO

In [52]:
# To get snowflake connection object with a specific connection name
snowflake.get_connection(connection_name="FDC_Insurance_Snowflake")

Connection object created: <snowflake.connector.connection.SnowflakeConnection object at 0x7f0a995db250>
Please close the connection after use!


<snowflake.connector.connection.SnowflakeConnection at 0x7f0a995db250>

In [53]:
# To read a specific dataset published from a snowflake connection
df = snowflake.get_dataframe("AUTO_INSURANCE_CLAIMS_DATA")
#df = snowflake.get_dataframe("AUTO_INSURANCE_CLAIMS_DATA_PRODUCT")

In [54]:
df.head()

Unnamed: 0,MONTHS_AS_CUSTOMER,CUSTOMER_AGE,POLICY_NUMBER,POLICY_BIND_DATE,POLICY_STATE,POLICY_CSL,POLICY_DEDUCTABLE,POLICY_ANNUAL_PREMIUM,UMBRELLA_LIMIT,INSURED_ZIP,...,WITNESSES,POLICE_REPORT_AVAILABLE,TOTAL_CLAIM_AMOUNT_PAID,INJURY_CLAIM,PROPERTY_CLAIM,VEHICLE_CLAIM,AUTO_MAKE,AUTO_MODEL,AUTO_YEAR,FRAUD_REPORTED
0,37,58,459791,27-04-2010,CT,250/500,1171,1394,4344805,444558,...,2,Police Report Available,77586.0,13159.0,888.0,63539.0,BMW,3 Series,2010,No Fraud Reported
1,152,31,460016,25-06-2009,CT,100/300,1508,1773,3302239,604138,...,0,No Police Report Available,83454.0,12951.0,2165.0,68338.0,Chevrolet,Tahoe,2014,No Fraud Reported
2,288,54,460722,06-04-2007,CT,100/300,1095,1385,3019065,466201,...,3,Police Report Available,68192.0,365.0,20936.0,46891.0,Ford,F150,2004,No Fraud Reported
3,69,61,461172,25-07-1996,CT,250/500,1177,1322,6133054,445856,...,1,Police Report Available,75917.0,11604.0,19876.0,44437.0,Jeep,Wrangler,1997,Fraud Reported
4,279,41,461397,17-07-2013,CT,100/300,959,1278,2321250,434342,...,0,No Police Report Available,65547.0,7447.0,18436.0,39664.0,Saab,95,2015,No Fraud Reported


# Read data using Snowflake's Snowpark

In [55]:
#Import all snowflake connection details from Template or Project variables.
db_user = os.getenv('Snowflake_user')
db_password =  os.getenv('Snowflake_password')
db_account = os.getenv('Snowflake_Account')
db_database =  os.getenv('Snowflake_Database')
db_role = os.getenv('Snowflake_user')
db_warehouse = os.getenv('Snowflake_Warehouse')
db_schema = os.getenv('Snowflake_Schema')

In [56]:
from snowflake.snowpark.session import Session
connection_params = {
    'user': db_user,
    'password': db_password,
    'account': db_account,
    'warehouse': db_warehouse,
    'database': db_database,
    'schema': db_schema,
    'role': db_role
}
session = Session.builder.configs(connection_params).create()

In [57]:
session.sql('use warehouse FOSFOR_SOLUTIONS_WH;').collect()
session.sql('use database FDC_Insurance;').collect()
session.sql('use schema FDC_Insurance.PUBLIC;').collect()

df = session.table('FDC_Insurance.PUBLIC.AUTO_INSURANCE_CLAIMS_DATA')
#df = session.table('FDC_Insurance.PUBLIC.AUTO_INSURANCE_CLAIMS_DATA_PRODUCT')
insurance_claim = df.to_pandas()

In [58]:
insurance_claim.columns = insurance_claim.columns.str.lower()

In [59]:
insurance_claim.columns

Index(['months_as_customer', 'customer_age', 'policy_number',
       'policy_bind_date', 'policy_state', 'policy_csl', 'policy_deductable',
       'policy_annual_premium', 'umbrella_limit', 'insured_zip', 'insured_sex',
       'insured_education_level', 'insured_occupation', 'insured_hobbies',
       'insured_relationship', 'capital_gains', 'capital_loss',
       'incident_date', 'incident_type', 'collision_type', 'incident_severity',
       'authorities_contacted', 'incident_state', 'incident_city',
       'incident_location', 'incident_hour_of_the_day', 'incident_time_of_day',
       'number_of_vehicles_involved', 'property_damage', 'bodily_injuries',
       'witnesses', 'police_report_available', 'total_claim_amount_paid',
       'injury_claim', 'property_claim', 'vehicle_claim', 'auto_make',
       'auto_model', 'auto_year', 'fraud_reported'],
      dtype='object')

In [60]:
insurance_claim.rename(columns = {'total_claim_amount_paid': 'total_claim_amount'}, inplace=True)

In [61]:
insurance_claim=insurance_claim.replace("?",np.NaN)

In [62]:
df = insurance_claim.copy()

In [63]:
actual_inference = df.copy()
actual_inference.drop('fraud_reported', axis = 1, inplace=True)

In [64]:
actual_inference.head()

Unnamed: 0,months_as_customer,customer_age,policy_number,policy_bind_date,policy_state,policy_csl,policy_deductable,policy_annual_premium,umbrella_limit,insured_zip,...,bodily_injuries,witnesses,police_report_available,total_claim_amount,injury_claim,property_claim,vehicle_claim,auto_make,auto_model,auto_year
0,170,60,429383,05-05-2013,CT,500/1000,537,697,0,466132,...,1,0,No Police Report Available,101257.0,21316.0,6844.0,73097.0,Dodge,Neon,2013
1,50,28,460493,18-04-2013,CT,500/1000,1727,1918,2919469,463809,...,0,0,Police Report Available,80444.0,472.0,1165.0,78807.0,Accura,TL,2005
2,31,28,448474,31-07-2004,CT,250/500,952,1204,7957599,610393,...,2,1,Police Report Available,75641.0,7510.0,10412.0,57719.0,BMW,X5,2009
3,188,58,462247,27-12-2010,CT,100/300,1521,1728,8639634,606942,...,1,2,No Police Report Available,80621.0,10134.0,908.0,69579.0,BMW,X5,2015
4,24,65,436220,13-01-2003,CT,500/1000,1090,1535,3551771,431277,...,0,1,Unknown,59144.0,305.0,15598.0,43241.0,Nissan,Pathfinder,2014


In [65]:
# dropping columns which are not necessary for prediction

to_drop = ['policy_number','policy_bind_date','policy_state','insured_zip','incident_location','incident_date',
           'incident_state','incident_city','insured_hobbies','auto_make','auto_model','auto_year']

df.drop(to_drop, inplace=True, axis=1)

In [66]:
# separating the feature and target columns

X = df.drop('fraud_reported', axis = 1)
y = df['fraud_reported']

In [67]:
# extracting categorical columns
cat_df = X.select_dtypes(include = ['object'])

In [68]:
cat_df.columns

Index(['customer_age', 'policy_csl', 'insured_sex', 'insured_education_level',
       'insured_occupation', 'insured_relationship', 'incident_type',
       'collision_type', 'incident_severity', 'authorities_contacted',
       'incident_time_of_day', 'property_damage', 'police_report_available'],
      dtype='object')

In [69]:
# printing unique values of each column
for col in cat_df.columns:
    print(f"{col}: \n{cat_df[col].unique()}\n")

customer_age: 
['60' '28' '58' '65' '34' '33' '20' '29' '27' '64' '50' '35' '43' '23'
 '19' '25' '68' '46' '31' '24' '30' '69' '37' '57' '55' '45' '49' '59'
 '53' '56' '42' '26' '41' '52' '36' '44' '40' '67' '51' '54' '38' '63'
 '61' '48' '22' '32' '39' '21' '47' '62' '66']

policy_csl: 
['500/1000' '250/500' '100/300']

insured_sex: 
['FEMALE' 'MALE']

insured_education_level: 
['MD' 'High School' 'Associate' 'Masters' 'College' 'JD' 'PhD']

insured_occupation: 
['other-service' 'prof-specialty' 'tech-support' 'craft-repair'
 'priv-house-serv' 'adm-clerical' 'sales' 'farming-fishing' 'armed-forces'
 'protective-serv' 'exec-managerial' 'machine-op-inspct'
 'handlers-cleaners' 'transport-moving']

insured_relationship: 
['unmarried' 'own-child' 'husband' 'other-relative' 'wife' 'not-in-family']

incident_type: 
['Single Vehicle Collision' 'Multi-vehicle Collision' 'Parked Car'
 'Vehicle Theft']

collision_type: 
['Rear Collision' 'Front Collision' 'Side Collision'
 'Details not Availabl

In [70]:
cat_df = pd.get_dummies(cat_df, drop_first = True)

In [71]:
num_df = df.select_dtypes(include = ['int16','int8','int32','float64'])

In [72]:
num_df.columns

Index(['months_as_customer', 'policy_deductable', 'policy_annual_premium',
       'umbrella_limit', 'capital_gains', 'capital_loss',
       'incident_hour_of_the_day', 'number_of_vehicles_involved',
       'bodily_injuries', 'witnesses', 'total_claim_amount', 'injury_claim',
       'property_claim', 'vehicle_claim'],
      dtype='object')

In [73]:
# combining the Numerical and Categorical dataframes to get the final dataset

X = pd.concat([num_df, cat_df], axis = 1)

In [74]:
# splitting data into training set and test set

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [75]:
num_df = X_train[['months_as_customer', 'policy_deductable', 'umbrella_limit',
       'capital_gains', 'capital_loss', 'incident_hour_of_the_day',
       'number_of_vehicles_involved', 'bodily_injuries', 'witnesses', 'injury_claim', 'property_claim',
       'vehicle_claim']]

In [76]:
num_df_test = X_test[['months_as_customer', 'policy_deductable', 'umbrella_limit',
       'capital_gains', 'capital_loss', 'incident_hour_of_the_day',
       'number_of_vehicles_involved', 'bodily_injuries', 'witnesses', 'injury_claim', 'property_claim',
       'vehicle_claim']]

In [77]:
# Scaling the numeric values in the dataset

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_data = scaler.fit_transform(num_df)

In [78]:
scaled_data_test = scaler.transform(num_df_test)

In [79]:
scaled_num_df = pd.DataFrame(data = scaled_data, columns = num_df.columns, index = X_train.index)
scaled_num_df.head()

Unnamed: 0,months_as_customer,policy_deductable,umbrella_limit,capital_gains,capital_loss,incident_hour_of_the_day,number_of_vehicles_involved,bodily_injuries,witnesses,injury_claim,property_claim,vehicle_claim
193141,2.090619,-1.287403,0.336185,1.635393,-1.253865,-0.179726,-0.678236,0.005975,-1.347864,1.35594,1.334588,0.244942
87067,-0.98738,-0.158154,0.133299,-0.738531,-0.691794,-0.521337,0.749108,0.005975,1.342984,0.938088,1.355046,0.116724
37173,0.034955,0.180247,0.484473,-0.564482,0.291002,0.161885,-0.678236,-1.217664,1.342984,-1.015758,-0.985495,-1.078747
228110,-0.316816,2.003124,-0.480264,1.405643,0.422948,0.674301,2.176452,0.005975,-0.450915,1.510644,1.580841,-1.554832
70577,1.782819,-0.543296,0.076485,-0.842096,0.227505,-2.400197,0.749108,1.229615,-0.450915,-0.064759,0.685793,-0.223364


In [80]:
scaled_num_df_test = pd.DataFrame(data = scaled_data_test, columns = num_df_test.columns, index = X_test.index)
scaled_num_df_test.head()

Unnamed: 0,months_as_customer,policy_deductable,umbrella_limit,capital_gains,capital_loss,incident_hour_of_the_day,number_of_vehicles_involved,bodily_injuries,witnesses,injury_claim,property_claim,vehicle_claim
80335,0.62857,-0.846174,-0.951944,-0.517123,1.38388,-2.571002,-0.678236,-1.217664,0.446034,-1.029314,-0.953348,-1.327724
211998,-0.151923,1.068315,-0.634356,1.379212,0.372671,0.674301,0.749108,0.005975,1.342984,-0.554443,-0.963513,-1.234254
33582,0.62857,1.101968,1.2935,-1.715106,-0.439653,0.674301,-0.678236,-1.217664,0.446034,-1.021488,-0.503029,-0.177904
145858,-0.921423,-0.975177,0.030728,0.648317,0.154818,-2.571002,-0.678236,0.005975,1.342984,1.586249,-0.966309,0.007506
58658,-0.195895,-1.00883,0.934363,-1.715106,0.721715,0.161885,-0.678236,-1.217664,-0.450915,-1.011426,-0.946105,1.564998


In [81]:
X_train.drop(columns = scaled_num_df.columns, inplace = True)

In [82]:
X_train = pd.concat([scaled_num_df, X_train], axis = 1)

In [83]:
X_test.drop(columns = scaled_num_df_test.columns, inplace = True)

In [84]:
X_test = pd.concat([scaled_num_df_test, X_test], axis = 1)

# Decision Tree

In [85]:
#from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)

In [86]:
y_pred = dtc.predict(X_test)

In [87]:
dtc_train_acc = accuracy_score(y_train, dtc.predict(X_train))
dtc_test_acc = accuracy_score(y_test, y_pred)

print(f"Training accuracy of Decision Tree is : {dtc_train_acc}")
print(f"Test accuracy of Decision Tree is : {dtc_test_acc}")

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Training accuracy of Decision Tree is : 1.0
Test accuracy of Decision Tree is : 0.9941605340891628
[[ 8505   130]
 [  212 49720]]
                   precision    recall  f1-score   support

   Fraud Reported       0.98      0.98      0.98      8635
No Fraud Reported       1.00      1.00      1.00     49932

         accuracy                           0.99     58567
        macro avg       0.99      0.99      0.99     58567
     weighted avg       0.99      0.99      0.99     58567



In [88]:
actual_model_columns = ['months_as_customer',
 'policy_deductable',
 'umbrella_limit',
 'capital_gains',
 'capital_loss',
 'incident_hour_of_the_day',
 'number_of_vehicles_involved',
 'bodily_injuries',
 'witnesses',
 'injury_claim',
 'property_claim',
 'vehicle_claim',
 'policy_annual_premium',
 'total_claim_amount',
 'customer_age_20',
 'customer_age_21',
 'customer_age_22',
 'customer_age_23',
 'customer_age_24',
 'customer_age_25',
 'customer_age_26',
 'customer_age_27',
 'customer_age_28',
 'customer_age_29',
 'customer_age_30',
 'customer_age_31',
 'customer_age_32',
 'customer_age_33',
 'customer_age_34',
 'customer_age_35',
 'customer_age_36',
 'customer_age_37',
 'customer_age_38',
 'customer_age_39',
 'customer_age_40',
 'customer_age_41',
 'customer_age_42',
 'customer_age_43',
 'customer_age_44',
 'customer_age_45',
 'customer_age_46',
 'customer_age_47',
 'customer_age_48',
 'customer_age_49',
 'customer_age_50',
 'customer_age_51',
 'customer_age_52',
 'customer_age_53',
 'customer_age_54',
 'customer_age_55',
 'customer_age_56',
 'customer_age_57',
 'customer_age_58',
 'customer_age_59',
 'customer_age_60',
 'customer_age_61',
 'customer_age_62',
 'customer_age_63',
 'customer_age_64',
 'customer_age_65',
 'customer_age_66',
 'customer_age_67',
 'customer_age_68',
 'customer_age_69',
 'policy_csl_250/500',
 'policy_csl_500/1000',
 'insured_sex_MALE',
 'insured_education_level_College',
 'insured_education_level_High School',
 'insured_education_level_JD',
 'insured_education_level_MD',
 'insured_education_level_Masters',
 'insured_education_level_PhD',
 'insured_occupation_armed-forces',
 'insured_occupation_craft-repair',
 'insured_occupation_exec-managerial',
 'insured_occupation_farming-fishing',
 'insured_occupation_handlers-cleaners',
 'insured_occupation_machine-op-inspct',
 'insured_occupation_other-service',
 'insured_occupation_priv-house-serv',
 'insured_occupation_prof-specialty',
 'insured_occupation_protective-serv',
 'insured_occupation_sales',
 'insured_occupation_tech-support',
 'insured_occupation_transport-moving',
 'insured_relationship_not-in-family',
 'insured_relationship_other-relative',
 'insured_relationship_own-child',
 'insured_relationship_unmarried',
 'insured_relationship_wife',
 'incident_type_Parked Car',
 'incident_type_Single Vehicle Collision',
 'incident_type_Vehicle Theft',
 'collision_type_Front Collision',
 'collision_type_Rear Collision',
 'collision_type_Side Collision',
 'incident_severity_Minor Damage',
 'incident_severity_Total Loss',
 'authorities_contacted_Fire',
 'authorities_contacted_None',
 'authorities_contacted_Other',
 'authorities_contacted_Police',
 'incident_time_of_day_Early Morning Hours',
 'incident_time_of_day_Morning to Noon',
 'incident_time_of_day_Night Time',
 'property_damage_No Property Damage',
 'property_damage_Property Damage',
 'police_report_available_Police Report Available',
 'police_report_available_Unknown']

In [89]:
@scoring_func
def score(model, request):
    payload_dict = request.json["payload"]
    df = pd.DataFrame(payload_dict,index=[0])
    df = df.fillna(0)
    df.columns = df.columns.str.lower()
    df.drop(to_drop, inplace=True, axis=1)
    
    # extracting categorical columns
    cat_df = df[['customer_age','policy_csl','insured_sex','insured_education_level','insured_occupation','insured_relationship','incident_type','collision_type','incident_severity','authorities_contacted','incident_time_of_day','property_damage','police_report_available']]
    cat_df = pd.get_dummies(cat_df, drop_first = True)

    num_df = df[['months_as_customer', 'policy_deductable', 'policy_annual_premium',
       'umbrella_limit', 'capital_gains', 'capital_loss',
       'incident_hour_of_the_day', 'number_of_vehicles_involved',
       'bodily_injuries', 'witnesses', 'total_claim_amount', 'injury_claim',
       'property_claim', 'vehicle_claim']]
    
    X = pd.concat([num_df, cat_df], axis = 1)
    
    num_df_test = X[['months_as_customer', 'policy_deductable', 'umbrella_limit',
       'capital_gains', 'capital_loss', 'incident_hour_of_the_day',
       'number_of_vehicles_involved', 'bodily_injuries', 'witnesses', 'injury_claim', 'property_claim',
       'vehicle_claim']]

    scaled_data_test = scaler.transform(num_df_test)
    scaled_num_df_test = pd.DataFrame(data = scaled_data_test, columns = num_df_test.columns, index = X.index)
    
    X.drop(columns = scaled_num_df_test.columns, inplace = True)
    X = pd.concat([scaled_num_df_test, X], axis = 1)
    
    missing_features = [missing_col for missing_col in actual_model_columns if missing_col not in X.columns]
    X[missing_features] = 0
    
    prediction = model.predict(X[actual_model_columns])
    probability = model.predict_proba(X)[:,1]
    return {"prediction" : prediction, "probability" : probability}

# Dump all the required artifacts into Pickle file

In [90]:
import pickle

In [91]:
with open('/data/Output/Fraud_Classifier/Decisiontree_Fraudclassifier_v1.pkl', 'wb') as f:  # open a text file
    pickle.dump(dtc, f) # serialize the list
    
with open('model_artifacts/Decisiontree_Fraudclassifier_v1.pkl', 'wb') as f:  # open a text file
    pickle.dump(dtc, f) # serialize the list

In [92]:
with open('/data/Output/Fraud_Classifier/to_drop.pkl', 'wb') as f:  # open a text file
    pickle.dump(to_drop, f) # serialize the list
    
with open('model_artifacts/to_drop.pkl', 'wb') as f:  # open a text file
    pickle.dump(to_drop, f) # serialize the list

In [93]:
cat_col = ['customer_age','policy_csl','insured_sex','insured_education_level','insured_occupation','insured_relationship',
'incident_type','collision_type','incident_severity','authorities_contacted','incident_time_of_day','property_damage','police_report_available']

with open('/data/Output/Fraud_Classifier/cat_col.pkl', 'wb') as f:  # open a text file
    pickle.dump(cat_col, f) # serialize the list

with open('model_artifacts/cat_col.pkl', 'wb') as f:  # open a text file
    pickle.dump(cat_col, f) # serialize the list

In [94]:
num_col = ['months_as_customer', 'policy_deductable', 'policy_annual_premium',
       'umbrella_limit', 'capital_gains', 'capital_loss',
       'incident_hour_of_the_day', 'number_of_vehicles_involved',
       'bodily_injuries', 'witnesses', 'total_claim_amount', 'injury_claim',
       'property_claim', 'vehicle_claim']

with open('/data/Output/Fraud_Classifier/num_col.pkl', 'wb') as f:  # open a text file
    pickle.dump(num_col, f) # serialize the list
    
with open('model_artifacts/num_col.pkl', 'wb') as f:  # open a text file
    pickle.dump(num_col, f) # serialize the list

In [95]:
num_col_test = ['months_as_customer', 'policy_deductable', 'umbrella_limit',
       'capital_gains', 'capital_loss', 'incident_hour_of_the_day',
       'number_of_vehicles_involved', 'bodily_injuries', 'witnesses', 'injury_claim', 'property_claim',
       'vehicle_claim']

with open('/data/Output/Fraud_Classifier/num_col_test.pkl', 'wb') as f:  # open a text file
    pickle.dump(num_col_test, f) # serialize the list

with open('model_artifacts/num_col_test.pkl', 'wb') as f:  # open a text file
    pickle.dump(num_col_test, f) # serialize the list

In [96]:
with open('/data/Output/Fraud_Classifier/scaler.pkl', 'wb') as f:  # open a text file
    pickle.dump(scaler, f) # serialize the list

with open('model_artifacts/scaler.pkl', 'wb') as f:  # open a text file
    pickle.dump(scaler, f) # serialize the list

In [97]:
with open('/data/Output/Fraud_Classifier/actual_model_columns.pkl', 'wb') as f:  # open a text file
    pickle.dump(actual_model_columns, f) # serialize the list
    
with open('model_artifacts/actual_model_columns.pkl', 'wb') as f:  # open a text file
    pickle.dump(actual_model_columns, f) # serialize the list

# Sample Code for Payload

In [110]:
insurance_claim.columns = insurance_claim.columns.str.upper()
insurance_claims = insurance_claim.copy()
insurance_claims.drop('FRAUD_REPORTED', axis = 1, inplace=True)
#payload = insurance_claims.iloc[0].to_dict()
payload = actual_inference.iloc[0].to_dict()

In [111]:
payload

{'months_as_customer': 213,
 'customer_age': '66',
 'policy_number': 432849,
 'policy_bind_date': '30-10-2001',
 'policy_state': 'CT',
 'policy_csl': '100/300',
 'policy_deductable': 1546,
 'policy_annual_premium': 2208,
 'umbrella_limit': 4146654,
 'insured_zip': '604147',
 'insured_sex': 'MALE',
 'insured_education_level': 'JD',
 'insured_occupation': 'craft-repair',
 'insured_hobbies': 'camping',
 'insured_relationship': 'not-in-family',
 'capital_gains': 67272,
 'capital_loss': -65017,
 'incident_date': datetime.date(2021, 11, 12),
 'incident_type': 'Single Vehicle Collision',
 'collision_type': 'Details not Available',
 'incident_severity': 'Major Damage',
 'authorities_contacted': 'Other',
 'incident_state': 'CT',
 'incident_city': 'Hartford',
 'incident_location': 'Flute',
 'incident_hour_of_the_day': 20,
 'incident_time_of_day': 'Night Time',
 'number_of_vehicles_involved': 1,
 'property_damage': 'No Property Damage',
 'bodily_injuries': 2,
 'witnesses': 3,
 'police_report_avai

In [112]:
req = requests.Request()
req.json = {"payload":payload}
y_req = req
y_out = score(dtc, y_req)
y_out

{'prediction': array(['No Fraud Reported'], dtype=object),
 'probability': array([1.])}

In [113]:
req.json

{'payload': {'months_as_customer': 213,
  'customer_age': '66',
  'policy_number': 432849,
  'policy_bind_date': '30-10-2001',
  'policy_state': 'CT',
  'policy_csl': '100/300',
  'policy_deductable': 1546,
  'policy_annual_premium': 2208,
  'umbrella_limit': 4146654,
  'insured_zip': '604147',
  'insured_sex': 'MALE',
  'insured_education_level': 'JD',
  'insured_occupation': 'craft-repair',
  'insured_hobbies': 'camping',
  'insured_relationship': 'not-in-family',
  'capital_gains': 67272,
  'capital_loss': -65017,
  'incident_date': datetime.date(2021, 11, 12),
  'incident_type': 'Single Vehicle Collision',
  'collision_type': 'Details not Available',
  'incident_severity': 'Major Damage',
  'authorities_contacted': 'Other',
  'incident_state': 'CT',
  'incident_city': 'Hartford',
  'incident_location': 'Flute',
  'incident_hour_of_the_day': 20,
  'incident_time_of_day': 'Night Time',
  'number_of_vehicles_involved': 1,
  'property_damage': 'No Property Damage',
  'bodily_injuries':

In [115]:
y_pred = dtc.predict(X_test)
y_prob = dtc.predict_proba(X_test)[:,1]

In [116]:
y_pred

array(['Fraud Reported', 'No Fraud Reported', 'No Fraud Reported', ...,
       'No Fraud Reported', 'No Fraud Reported', 'Fraud Reported'],
      dtype=object)

In [117]:
y_prob

array([0., 1., 1., ..., 1., 1., 0.])

In [118]:
## registering the model in Fosfor Insight Designer.
model_reg = register_model(dtc, 
               score, 
               name="Claims_Fraud_DTree_Classifier", 
               description="Insurance claim's model trained for Fraud Classification",
               flavour=MLModelFlavours.sklearn,
               model_type="classification",
               init_script="snowflake-connector-python[pandas]",
               y_true=y_test,
               y_pred=y_pred, 
               prob=y_prob,
               features=X_train.columns,
               input_type="json", 
               explain_ai=True,
               x_train=X_train, 
               x_test=X_test, 
               y_train=y_train.tolist(),
               y_test=y_test.tolist(),
               feature_names=X_train.columns.tolist(),
               original_features=X_train.columns.tolist(),
               feature_ids=X_train.columns,
               kyd=True, kyd_score = True)

Calculating build time metrics

Progress: ██████████████████████████████████████████████████████████████████████ 100.0%


VBox(children=(HTML(value='<style>.grad_1{background: #2468a4;} .grad_2{ color:white; background: #2468a4;}</s…

# Version 2 of Decision Tree Fraud Classifier Model

In [125]:
dtc = DecisionTreeClassifier(random_state=7)
dtc.fit(X_train, y_train)

In [126]:
y_pred = dtc.predict(X_test)
y_prob = dtc.predict_proba(X_test)[:,1]

In [127]:
dtc_train_acc = accuracy_score(y_train, dtc.predict(X_train))
dtc_test_acc = accuracy_score(y_test, y_pred)

print(f"Training accuracy of Decision Tree is : {dtc_train_acc}")
print(f"Test accuracy of Decision Tree is : {dtc_test_acc}")

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Training accuracy of Decision Tree is : 1.0
Test accuracy of Decision Tree is : 0.9941263851657076
[[ 8305   156]
 [  188 49918]]
                   precision    recall  f1-score   support

   Fraud Reported       0.98      0.98      0.98      8461
No Fraud Reported       1.00      1.00      1.00     50106

         accuracy                           0.99     58567
        macro avg       0.99      0.99      0.99     58567
     weighted avg       0.99      0.99      0.99     58567



In [128]:
## registering the model in Fosfor Insight Designer using same name.
model_reg = register_model(dtc, 
               score, 
               name="Claims_Fraud_DTree_Classifier", 
               description="Insurance claim's model trained for Fraud Classification",
               flavour=MLModelFlavours.sklearn,
               model_type="classification",
               init_script="snowflake-connector-python[pandas]",
               y_true=y_test,
               y_pred=y_pred, 
               prob=y_prob,
               features=X_train.columns,
               input_type="json", 
               explain_ai=True,
               x_train=X_train, 
               x_test=X_test, 
               y_train=y_train.tolist(),
               y_test=y_test.tolist(),
               feature_names=X_train.columns.tolist(),
               original_features=X_train.columns.tolist(),
               feature_ids=X_train.columns,
               kyd=True, kyd_score = True)

Calculating build time metrics

Progress: ██████████████████████████████████████████████████████████████████████ 100.0%


VBox(children=(HTML(value='<style>.grad_1{background: #2468a4;} .grad_2{ color:white; background: #2468a4;}</s…