# Use Snowpark_OpenAI Notebook template

In [178]:
!!pip install "snowflake-connector-python[pandas]"

['Collecting snowflake-connector-python[pandas]',
 '  Using cached https://files.pythonhosted.org/packages/f8/c7/03d2ca5e460acb3a31d70ee17584fbc6fe828a52c9f83bbb20a392b8988b/snowflake_connector_python-3.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl',
 'Collecting idna<4,>=2.5',
 '  Using cached https://files.pythonhosted.org/packages/c2/e7/a82b05cf63a603df6e68d59ae6a68bf5064484a0718ea5033660af4b54a9/idna-3.6-py3-none-any.whl',
 'Collecting pytz',
 '  Using cached https://files.pythonhosted.org/packages/9c/3d/a121f284241f08268b21359bd425f7d4825cffc5ac5cd0e1b3d82ffd2b10/pytz-2024.1-py2.py3-none-any.whl',
 'Collecting tomlkit',
 '  Using cached https://files.pythonhosted.org/packages/07/fa/c96545d741f2fd47f565e4e06bfef0962add790cb9c2289d900102b55eca/tomlkit-0.12.4-py3-none-any.whl',
 'Collecting certifi>=2017.4.17',
 '  Using cached https://files.pythonhosted.org/packages/ba/06/a07f096c664aeb9f01624f858c3add0a4e913d6c96257acb4fce61e7de14/certifi-2024.2.2-py3-none-any.whl',


In [1]:
import pandas as pd # package for high-performance, easy-to-use data structures and data analysis
import numpy as np # fundamental package for scientific computing with Python

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

import datetime

# Preprocessing, modelling and evaluating
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_auc_score, classification_report, accuracy_score
from sklearn.model_selection import StratifiedKFold, cross_val_score, KFold
from xgboost import XGBClassifier
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from imblearn.over_sampling import RandomOverSampler
from collections import Counter

# Read Customer and Payment data from Snowflake respective tables

In [2]:
#Import all snowflake connection details from template variables.

db_user = os.getenv('sf_user')
db_password =  'Password@2023' #os.getenv('sf_password')
db_account = os.getenv('sf_account')
db_database =  os.getenv('sf_db')
db_role = os.getenv('sf_role')
db_warehouse = 'FOSFOR_FDC'
db_schema = 'PUBLIC'

In [3]:
from snowflake.snowpark.session import Session
connection_params = {
    'user': db_user,
    'password': db_password,
    'account': db_account,
    'warehouse': db_warehouse,
    'database': db_database,
    'schema': db_schema,
    'role': db_role
}
session = Session.builder.configs(connection_params).create()

In [4]:
session.sql('use warehouse FOSFOR_FDC;').collect()
session.sql('use database FDC_DATA_MANISH;').collect()
session.sql('use schema FDC_DATA_MANISH.PUBLIC;').collect()

cc_customer = session.table('FDC_DATA_MANISH.PUBLIC.CC_CUSTOMER_DATA').to_pandas()
cc_payment = session.table('FDC_DATA_MANISH.PUBLIC.CC_PAYMENT_DATA').to_pandas()

# Reverse Data Engineering to get Raw data

In [5]:
cc_customer['FEA_2'] = cc_customer['FEA_2'].replace([0],np.nan)

In [6]:
cc_payment['PROD_LIMIT'] = cc_payment['PROD_LIMIT'].replace([0],np.nan)
cc_payment['HIGHEST_BALANCE'] = cc_payment['HIGHEST_BALANCE'].replace([0],np.nan)

cc_payment['UPDATE_DATE'] = cc_payment['UPDATE_DATE'].replace(['31/12/9999'],np.nan)
cc_payment['REPORT_DATE'] = cc_payment['REPORT_DATE'].replace(['31/12/9999'],np.nan)

In [13]:
cc_customer['FEA_2'] = cc_customer['FEA_2'].replace([np.nan],cc_customer['FEA_2'].mean())

In [14]:
cc_payment['HIGHEST_BALANCE'] = cc_payment['HIGHEST_BALANCE'].replace([np.nan],0)

In [16]:
Total_features=['FEA_1', 'FEA_2', 'FEA_3', 'FEA_4', 'FEA_5', 'FEA_6',
       'FEA_7', 'FEA_8', 'FEA_9', 'FEA_10', 'FEA_11']

In [17]:
cat_features = ['FEA_3','FEA_5','FEA_6','FEA_9']
num_features = ['FEA_1','FEA_2','FEA_4','FEA_7','FEA_8','FEA_10','FEA_11']

In [18]:
def standerdisation(df,num_cols):
# copy of datasets
    df_stand = df.copy()

    # apply standardization on numerical features
    for i in num_cols:

        # fit on training data column
        scale = StandardScaler().fit(df_stand[[i]])

        # transform the training data column
        df_stand[i] = scale.transform(df_stand[[i]])
        
    return df_stand

In [19]:
cc_customer_std =  standerdisation(cc_customer,num_features)

In [20]:
feat_cols=[cols for cols  in cc_customer_std.columns if cols not in ['LABEL','ID']]

# Preparing data for model

In [None]:
X = cc_customer_std[feat_cols]
y = cc_customer_std[['LABEL']]

In [22]:
X.head()

Unnamed: 0,FEA_1,FEA_2,FEA_3,FEA_4,FEA_5,FEA_6,FEA_7,FEA_8,FEA_9,FEA_10,FEA_11
802,1.097352,-1.263992,3.0,-0.552944,2.0,11.0,-1.964028,1.101281,5.0,-0.087361,0.287234
567,-1.072281,0.416866,3.0,-0.971468,2.0,8.0,0.056269,0.767492,4.0,-0.607523,0.577444
574,-1.072281,-1.357373,3.0,-0.824419,2.0,8.0,0.056269,-0.901452,5.0,-0.607516,0.462693
411,1.097352,-0.952722,3.0,-0.620813,2.0,11.0,0.056269,-1.819371,3.0,1.872046,0.706044
70,1.097352,0.759263,1.0,-0.021306,2.0,11.0,0.056269,-0.400769,3.0,-0.685954,0.68522


In [23]:
y.head()

Unnamed: 0,LABEL
802,0.0
567,1.0
574,1.0
411,0.0
70,0.0


In [24]:
X.reset_index(inplace=True)
y.reset_index(inplace=True)

X.drop(columns=['index'],axis=1,inplace=True)
y.drop(columns=['index'],axis=1,inplace=True)

print('Shape of Train features :-',X.shape)
print('Shape of Train Target   :-',y.shape)
y = y['LABEL'].copy()

Shape of Train features :- (956, 11)
Shape of Test features  :- (169, 11)
Shape of Train Target   :- (956, 1)
Shape of Test Target    :- (169, 1)


# Registaring the finalized Model using SDK

In [127]:
!pip install refractml

Collecting refractml
[?25l  Downloading https://files.pythonhosted.org/packages/f9/0d/023d845cf453feb632b08435c91f7e5d050c0df73c5be66bdbbca2f6ba87/refractml-1.0.3-py2.py3-none-any.whl (42kB)
[K     |████████████████████████████████| 51kB 4.0MB/s eta 0:00:01
[?25hCollecting urllib3==1.26.15
[?25l  Downloading https://files.pythonhosted.org/packages/7b/f5/890a0baca17a61c1f92f72b81d3c31523c99bec609e60c292ea55b387ae8/urllib3-1.26.15-py2.py3-none-any.whl (140kB)
[K     |████████████████████████████████| 143kB 17.0MB/s eta 0:00:01
[?25hCollecting requests-toolbelt==0.9.1
[?25l  Downloading https://files.pythonhosted.org/packages/60/ef/7681134338fc097acef8d9b2f8abe0458e4d87559c689a8c306d0957ece5/requests_toolbelt-0.9.1-py2.py3-none-any.whl (54kB)
[K     |████████████████████████████████| 61kB 23.9MB/s eta 0:00:01
[?25hCollecting cloudpickle==1.6.0
  Downloading https://files.pythonhosted.org/packages/e7/e3/898487e5dbeb612054cf2e0c188463acb358167fef749c53c8bb8918cea1/cloudpickle-1.6.0

You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [129]:
from refractml import *
from refractml.constants import MLModelFlavours
import requests

In [130]:
@scoring_func
def score(model, request):
    import json
    payload = request.json["payload"]
    if isinstance(request.json["payload"],str):
        payload_data = eval(payload)
        data = pd.DataFrame(payload_data)
        prediction = model.predict(data)
        return prediction.tolist()
    return "This method is not allowed"

In [132]:
newX_test.iloc[1].to_list()

[1.0973519472438964,
 0.5413737175808717,
 1.0,
 0.2162349125628913,
 2.0,
 11.0,
 0.05626900238247806,
 -0.9014522862446314,
 5.0,
 -0.6862227070682823,
 0.5774440911347185]

In [141]:
# one row as input
payload = str(newX_test.iloc[:1].to_dict())
req = requests.Request()
req.json= {'payload': payload}
print({'payload': payload})
print(score(randmf, req))

{'payload': "{'FEA_1': {0: -1.0722806373771643}, 'FEA_2': {0: -0.6414521108514917}, 'FEA_3': {0: 3.0}, 'FEA_4': {0: -0.41720689397770927}, 'FEA_5': {0: 2.0}, 'FEA_6': {0: 8.0}, 'FEA_7': {0: 0.05626900238247806}, 'FEA_8': {0: 0.5171501143501661}, 'FEA_9': {0: 4.0}, 'FEA_10': {0: -0.6075163209160109}, 'FEA_11': {0: 0.3394082336702392}}"}
[1.0]


In [143]:
# Two rows as input
payload = str(newX_test.iloc[:2].to_dict())
req = requests.Request()
req.json= {'payload': payload}
print({'payload': payload})
print(score(randmf, req))

{'payload': "{'FEA_1': {0: -1.0722806373771643, 1: 1.0973519472438964}, 'FEA_2': {0: -0.6414521108514917, 1: 0.5413737175808717}, 'FEA_3': {0: 3.0, 1: 1.0}, 'FEA_4': {0: -0.41720689397770927, 1: 0.2162349125628913}, 'FEA_5': {0: 2.0, 1: 2.0}, 'FEA_6': {0: 8.0, 1: 11.0}, 'FEA_7': {0: 0.05626900238247806, 1: 0.05626900238247806}, 'FEA_8': {0: 0.5171501143501661, 1: -0.9014522862446314}, 'FEA_9': {0: 4.0, 1: 5.0}, 'FEA_10': {0: -0.6075163209160109, 1: -0.6862227070682823}, 'FEA_11': {0: 0.3394082336702392, 1: 0.5774440911347185}}"}
[1.0, 0.0]


In [150]:
register_model(randmf, 
               score, 
               name="Credit_Risk_Model", 
               description="Credit_Risk_Model_RandomForest",
               flavour=MLModelFlavours.sklearn,
               model_type="classification",
               y_true=newy_test,
               y_pred=newy_pred_rf, 
               features=newX_train.columns,
               input_type="json", 
               explain_ai=True,
               prob=newy_pred_rf,
               x_train=newX_train, 
               x_test=newX_test, 
               y_train=newy_train['LABEL'].tolist(),
               y_test=newy_test['LABEL'].tolist(),
               feature_names=newX_train.columns.tolist(),
               original_features=newX_train.columns.tolist(),
               feature_ids=newX_train.columns,
               kyd=True,
               kyd_score = True)

Calculating build time metrics

Progress: ██████████████████████████████████████████████████████████████████████ 100.0%


VBox(children=(HTML(value='<style>.grad_1{background: #2468a4;} .grad_2{ color:white; background: #2468a4;}</s…

In [149]:
# Sample Payload for future reference
payload

"{'FEA_1': {0: -1.0722806373771643, 1: 1.0973519472438964}, 'FEA_2': {0: -0.6414521108514917, 1: 0.5413737175808717}, 'FEA_3': {0: 3.0, 1: 1.0}, 'FEA_4': {0: -0.41720689397770927, 1: 0.2162349125628913}, 'FEA_5': {0: 2.0, 1: 2.0}, 'FEA_6': {0: 8.0, 1: 11.0}, 'FEA_7': {0: 0.05626900238247806, 1: 0.05626900238247806}, 'FEA_8': {0: 0.5171501143501661, 1: -0.9014522862446314}, 'FEA_9': {0: 4.0, 1: 5.0}, 'FEA_10': {0: -0.6075163209160109, 1: -0.6862227070682823}, 'FEA_11': {0: 0.3394082336702392, 1: 0.5774440911347185}}"