# Use Snowpark_OpenAI Notebook template

In [1]:
!!pip install "snowflake-connector-python[pandas]"

['Collecting snowflake-connector-python[pandas]',
 '  Downloading https://files.pythonhosted.org/packages/f8/c7/03d2ca5e460acb3a31d70ee17584fbc6fe828a52c9f83bbb20a392b8988b/snowflake_connector_python-3.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.6MB)',
 'Collecting pyjwt<3.0.0',
 '  Downloading https://files.pythonhosted.org/packages/2b/4f/e04a8067c7c96c364cef7ef73906504e2f40d690811c021e1a1901473a19/PyJWT-2.8.0-py3-none-any.whl',
 'Collecting asn1crypto<2.0.0,>0.24.0',
 '  Downloading https://files.pythonhosted.org/packages/c9/7f/09065fd9e27da0eda08b4d6897f1c13535066174cc023af248fc2a8d5e5a/asn1crypto-1.5.1-py2.py3-none-any.whl (105kB)',
 'Collecting typing-extensions<5,>=4.3',
 '  Using cached https://files.pythonhosted.org/packages/01/f3/936e209267d6ef7510322191003885de524fc48d1b43269810cd589ceaf5/typing_extensions-4.11.0-py3-none-any.whl',
 'Collecting urllib3<2.0.0,>=1.21.1; python_version < "3.10"',
 '  Downloading https://files.pythonhosted.org/packages/b0/53/a

In [2]:
import pandas as pd # package for high-performance, easy-to-use data structures and data analysis
import numpy as np # fundamental package for scientific computing with Python

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

import datetime

# Preprocessing, modelling and evaluating
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_auc_score, classification_report, accuracy_score
from sklearn.model_selection import StratifiedKFold, cross_val_score, KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

# Read Customer and Payment data from Snowflake respective tables

In [3]:
#Import all snowflake connection details from template variables.

db_user = os.getenv('sf_user')
db_password =  'Password@2023' #os.getenv('sf_password')
db_account = os.getenv('sf_account')
db_database =  os.getenv('sf_db')
db_role = os.getenv('sf_role')
db_warehouse = 'FOSFOR_FDC'
db_schema = 'PUBLIC'

In [4]:
from snowflake.snowpark.session import Session
connection_params = {
    'user': db_user,
    'password': db_password,
    'account': db_account,
    'warehouse': db_warehouse,
    'database': db_database,
    'schema': db_schema,
    'role': db_role
}
session = Session.builder.configs(connection_params).create()

In [5]:
session.sql('use warehouse FOSFOR_FDC;').collect()
session.sql('use database FDC_DATA_MANISH;').collect()
session.sql('use schema FDC_DATA_MANISH.PUBLIC;').collect()

cc_customer = session.table('FDC_DATA_MANISH.PUBLIC.CC_CUSTOMER_DATA').to_pandas()
cc_payment = session.table('FDC_DATA_MANISH.PUBLIC.CC_PAYMENT_DATA').to_pandas()

# Reverse Data Engineering to get Raw data

In [6]:
cc_customer['FEA_2'] = cc_customer['FEA_2'].replace([0],np.nan)

In [7]:
cc_payment['PROD_LIMIT'] = cc_payment['PROD_LIMIT'].replace([0],np.nan)
cc_payment['HIGHEST_BALANCE'] = cc_payment['HIGHEST_BALANCE'].replace([0],np.nan)

cc_payment['UPDATE_DATE'] = cc_payment['UPDATE_DATE'].replace(['31/12/9999'],np.nan)
cc_payment['REPORT_DATE'] = cc_payment['REPORT_DATE'].replace(['31/12/9999'],np.nan)

In [8]:
cc_customer['FEA_2'] = cc_customer['FEA_2'].replace([np.nan],cc_customer['FEA_2'].mean())

In [9]:
cc_payment['HIGHEST_BALANCE'] = cc_payment['HIGHEST_BALANCE'].replace([np.nan],0)

In [10]:
Total_features=['FEA_1', 'FEA_2', 'FEA_3', 'FEA_4', 'FEA_5', 'FEA_6',
       'FEA_7', 'FEA_8', 'FEA_9', 'FEA_10', 'FEA_11']

In [11]:
cat_features = ['FEA_3','FEA_5','FEA_6','FEA_9']
num_features = ['FEA_1','FEA_2','FEA_4','FEA_7','FEA_8','FEA_10','FEA_11']

In [12]:
def standerdisation(df,num_cols):
# copy of datasets
    df_stand = df.copy()

    # apply standardization on numerical features
    for i in num_cols:

        # fit on training data column
        scale = StandardScaler().fit(df_stand[[i]])

        # transform the training data column
        df_stand[i] = scale.transform(df_stand[[i]])
        
    return df_stand

In [13]:
cc_customer_std =  standerdisation(cc_customer,num_features)

In [14]:
feat_cols=[cols for cols  in cc_customer_std.columns if cols not in ['LABEL','ID']]

# Preparing data for model

In [15]:
X = cc_customer_std[feat_cols]
y = cc_customer_std[['LABEL']]

In [16]:
X.head()

Unnamed: 0,FEA_1,FEA_2,FEA_3,FEA_4,FEA_5,FEA_6,FEA_7,FEA_8,FEA_9,FEA_10,FEA_11
0,-0.34907,-0.797087,3.0,-0.496387,2.0,15.0,0.056269,0.684045,5.0,-0.087361,0.976754
1,-1.072281,2.720263,3.0,-0.315404,2.0,8.0,0.056269,1.017833,4.0,-0.087361,1.2336
2,-1.072281,-0.703706,3.0,0.182301,2.0,8.0,0.056269,-0.901452,5.0,-0.607523,-1.190396
3,1.097352,-1.326246,3.0,-0.507699,2.0,11.0,-0.953879,-0.901452,5.0,-0.614069,-1.190396
4,-0.34907,0.0,2.0,-0.801797,2.0,15.0,0.056269,0.600597,4.0,1.871935,0.302052


In [17]:
y.head()

Unnamed: 0,LABEL
0,1.0
1,0.0
2,0.0
3,0.0
4,0.0


In [18]:
X.reset_index(inplace=True)
y.reset_index(inplace=True)

X.drop(columns=['index'],axis=1,inplace=True)
y.drop(columns=['index'],axis=1,inplace=True)

print('Shape of Train features :-',X.shape)
print('Shape of Train Target   :-',y.shape)
y = y['LABEL'].copy()

Shape of Train features :- (1125, 11)
Shape of Train Target   :- (1125, 1)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X.drop(columns=['index'],axis=1,inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y.drop(columns=['index'],axis=1,inplace=True)


In [None]:
import requests
def generate_response(input_payload):
    headers={"Content-type":"application/json","Accept":"application/json"}
    #url = 'http://svc-c321d4a7-c61f-4bae-b94f-b00d974b6f6f:5001/bankchurn/7713fb40-f52e-4d0c-a504-80b05a94df40/score'
    url = ''
    data={"payload" : input_payload}
    resp = requests.post(url, json=data, headers=headers)
    response_json = resp.json()
    return response_json

In [20]:
model_url = 'https://qa.fdc.leni.ai/creditriskmodel/35cc1f4c-fd73-492e-8b8e-f1197e97f62e'

In [21]:
model_url.predict(X)

AttributeError: 'str' object has no attribute 'predict'

In [130]:

def score(model, request):
    import json
    payload = request.json["payload"]
    if isinstance(request.json["payload"],str):
        payload_data = eval(payload)
        data = pd.DataFrame(payload_data)
        prediction = model.predict(data)
        return prediction.tolist()
    return "This method is not allowed"

In [141]:
# one row as input
payload = str(newX_test.iloc[:1].to_dict())
req = requests.Request()
req.json= {'payload': payload}
print({'payload': payload})
print(score(randmf, req))

{'payload': "{'FEA_1': {0: -1.0722806373771643}, 'FEA_2': {0: -0.6414521108514917}, 'FEA_3': {0: 3.0}, 'FEA_4': {0: -0.41720689397770927}, 'FEA_5': {0: 2.0}, 'FEA_6': {0: 8.0}, 'FEA_7': {0: 0.05626900238247806}, 'FEA_8': {0: 0.5171501143501661}, 'FEA_9': {0: 4.0}, 'FEA_10': {0: -0.6075163209160109}, 'FEA_11': {0: 0.3394082336702392}}"}
[1.0]


In [143]:
# Two rows as input
payload = str(newX_test.iloc[:2].to_dict())
req = requests.Request()
req.json= {'payload': payload}
print({'payload': payload})
print(score(randmf, req))

{'payload': "{'FEA_1': {0: -1.0722806373771643, 1: 1.0973519472438964}, 'FEA_2': {0: -0.6414521108514917, 1: 0.5413737175808717}, 'FEA_3': {0: 3.0, 1: 1.0}, 'FEA_4': {0: -0.41720689397770927, 1: 0.2162349125628913}, 'FEA_5': {0: 2.0, 1: 2.0}, 'FEA_6': {0: 8.0, 1: 11.0}, 'FEA_7': {0: 0.05626900238247806, 1: 0.05626900238247806}, 'FEA_8': {0: 0.5171501143501661, 1: -0.9014522862446314}, 'FEA_9': {0: 4.0, 1: 5.0}, 'FEA_10': {0: -0.6075163209160109, 1: -0.6862227070682823}, 'FEA_11': {0: 0.3394082336702392, 1: 0.5774440911347185}}"}
[1.0, 0.0]


In [149]:
# Sample Payload for future reference
payload

"{'FEA_1': {0: -1.0722806373771643, 1: 1.0973519472438964}, 'FEA_2': {0: -0.6414521108514917, 1: 0.5413737175808717}, 'FEA_3': {0: 3.0, 1: 1.0}, 'FEA_4': {0: -0.41720689397770927, 1: 0.2162349125628913}, 'FEA_5': {0: 2.0, 1: 2.0}, 'FEA_6': {0: 8.0, 1: 11.0}, 'FEA_7': {0: 0.05626900238247806, 1: 0.05626900238247806}, 'FEA_8': {0: 0.5171501143501661, 1: -0.9014522862446314}, 'FEA_9': {0: 4.0, 1: 5.0}, 'FEA_10': {0: -0.6075163209160109, 1: -0.6862227070682823}, 'FEA_11': {0: 0.3394082336702392, 1: 0.5774440911347185}}"