# Use Credit Risk Analytics Notebook Template

In [1]:
from snowflake.snowpark import Session
from snowflake.connector.pandas_tools import write_pandas
# Data Science Libs
import pandas as pd
import numpy as np
from scipy.stats import skew
import xgboost as xgb
from sklearn.model_selection import KFold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, recall_score, precision_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from math import sqrt
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from catboost import CatBoostClassifier
import gc

# FosforML to register Model on FDC
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

from joblib import dump, load
import requests

In [2]:
NFOLDS = 3
SEED = 0
NROWS = None

In [3]:
table_name = 'CRA_APPLICATION_TRAIN_DETAILS'

sf_df = my_session.sql("select * from {}".format(table_name))
application_train_sf = sf_df.to_pandas()

In [4]:
table_name = 'CRA_APPLICATION_TEST_DETAILS'

sf_df = my_session.sql("select * from {}".format(table_name))
application_test_sf = sf_df.to_pandas()

In [5]:
table_name = 'CRA_PREVIOUS_APPLICATION_DETAILS'

sf_df = my_session.sql("select * from {}".format(table_name))
previous_application_sf = sf_df.to_pandas()

In [60]:
#application_train_sf  = snowflake.get_dataframe("CRA_APPLICATION_TRAIN_DETAILS")
#application_test_sf  = snowflake.get_dataframe("CRA_APPLICATION_TEST_DETAILS")
#previous_application_sf  = snowflake.get_dataframe("CRA_PREVIOUS_APPLICATION_DETAILS")

# Convert Snowflake data into Pandas dataframes

In [6]:
data = application_train_sf.copy()
test = application_test_sf.copy()
prev = previous_application_sf.copy()

In [7]:
data.drop(['CREATED_BY','CREATED_AT'], axis=1, inplace=True)
test.drop(['CREATED_BY','CREATED_AT'], axis=1, inplace=True)
prev.drop(['CREATED_BY','CREATED_AT'], axis=1, inplace=True)

In [8]:
categorical_feats = [
    f for f in data.columns if data[f].dtype == 'object'
]

In [9]:
for f_ in categorical_feats:
    data[f_], indexer = pd.factorize(data[f_])
    test[f_] = indexer.get_indexer(test[f_])

In [10]:
gc.enable()

y_train = data['TARGET']
del data['TARGET']

In [11]:
prev_cat_features = [
    f_ for f_ in prev.columns if prev[f_].dtype == 'object'
]
for f_ in prev_cat_features:
    prev[f_], _ = pd.factorize(prev[f_])

In [12]:
avg_prev = prev.groupby('SK_ID_CURR').mean()
cnt_prev = prev[['SK_ID_CURR', 'SK_ID_PREV']].groupby('SK_ID_CURR').count()
avg_prev['nb_app'] = cnt_prev['SK_ID_PREV']
del avg_prev['SK_ID_PREV']

In [13]:
x_train = data.merge(right=avg_prev.reset_index(), how='left', on='SK_ID_CURR')
x_test = test.merge(right=avg_prev.reset_index(), how='left', on='SK_ID_CURR')

x_train = x_train.fillna(0)
x_test= x_test.fillna(0)

ntrain = x_train.shape[0]
ntest = x_test.shape[0]

In [14]:
excluded_feats = ['SK_ID_CURR']
features = [f_ for f_ in x_train.columns if f_ not in excluded_feats]

In [15]:
x_train = x_train[features]
x_test = x_test[features]

kf = KFold(n_splits = NFOLDS, shuffle=True, random_state=SEED)

In [16]:
class SklearnWrapper(object):
    def __init__(self, clf, seed=0, params=None):
        params['random_state'] = seed
        self.clf = clf(**params)

    def train(self, x_train, y_train):
        self.clf.fit(x_train, y_train)

    def predict(self, x):
        return self.clf.predict(x)

    def predict_proba(self, x):
        return self.clf.predict_proba(x)[:,1]

In [17]:
class CatboostWrapper(object):
    def __init__(self, clf, seed=0, params=None):
        params['random_seed'] = seed
        self.clf = clf(**params)

    def train(self, x_train, y_train):
        self.clf.fit(x_train, y_train)

    def predict(self, x):
        return self.clf.predict(x)
    
    def predict_proba(self, x):
        return self.clf.predict_proba(x)[:,1]

In [18]:
class XgbWrapper(object):
    def __init__(self, seed=0, params=None):
        self.param = params
        self.param['seed'] = seed
        self.nrounds = params.pop('nrounds', 250)

    def train(self, x_train, y_train):
        dtrain = xgb.DMatrix(x_train, label=y_train)
        self.gbdt = xgb.train(self.param, dtrain, self.nrounds)

    def predict(self, x):
        return self.gbdt.predict(xgb.DMatrix(x))
    
    def predict_proba(self, x):
        return self.gbdt.predict_proba(xgb.DMatrix(x))[:,1]

In [19]:
def get_oof(clf):
    oof_train = np.zeros((ntrain,))
    oof_test = np.zeros((ntest,))
    oof_test_skf = np.empty((NFOLDS, ntest))

    for i, (train_index, test_index) in enumerate(kf.split(x_train)):
        x_tr = x_train.loc[train_index]
        y_tr = y_train.loc[train_index]
        x_te = x_train.loc[test_index]

        clf.train(x_tr, y_tr)

        oof_train[test_index] = clf.predict(x_te)
        oof_test_skf[i, :] = clf.predict(x_test)

    oof_test[:] = oof_test_skf.mean(axis=0)
    return oof_train.reshape(-1, 1), oof_test.reshape(-1, 1)

In [20]:
et_params = {
    'n_jobs': 16,
    'n_estimators': 200,
    'max_features': 0.5,
    'max_depth': 12,
    'min_samples_leaf': 2,
    'random_state':0
}

In [21]:
rf_params = {
    'n_jobs': 16,
    'n_estimators': 200,
    'max_features': 0.2,
    'max_depth': 12,
    'min_samples_leaf': 2,
    'random_state':0
}

In [22]:
xgb_params = {
    'seed': 0,
    'colsample_bytree': 0.7,
    'silent': 1,
    'subsample': 0.7,
    'learning_rate': 0.075,
    'objective': 'binary:logistic',
    'max_depth': 4,
    'num_parallel_tree': 1,
    'min_child_weight': 1,
    'nrounds': 200
}

In [23]:
catboost_params = {
    'iterations': 200,
    'learning_rate': 0.5,
    'depth': 3,
    'l2_leaf_reg': 40,
    'bootstrap_type': 'Bernoulli',
    'subsample': 0.7,
    'scale_pos_weight': 5,
    'eval_metric': 'AUC',
    'od_type': 'Iter',
    'allow_writing_files': False,
    'random_seed':0
}

In [24]:
xg = XgbWrapper(seed=SEED, params=xgb_params)
et = SklearnWrapper(clf=ExtraTreesClassifier, seed=SEED, params=et_params)
rf = SklearnWrapper(clf=RandomForestClassifier, seed=SEED, params=rf_params)
cb = CatboostWrapper(clf= CatBoostClassifier, seed = SEED, params=catboost_params)

# XGBoost Classifier

In [25]:
xg_oof_train, xg_oof_test = get_oof(xg)

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.



In [26]:
param = xgb_params
param['seed'] = 0
nrounds = 250

dtrain = xgb.DMatrix(x_train, label=y_train)
gbdt = xgb.train(param, dtrain, nrounds)

Parameters: { "silent" } are not used.



In [None]:
dtest = xgb.DMatrix(x_test, label=y_test)

In [82]:
#y_prob = gbdt.predict(xgb.DMatrix(x))
#y_pred = np.round(y_prob)

In [27]:
y_prob = gbdt.predict(xgb.DMatrix(x_train))

In [28]:
y_pred = np.round(y_prob)

In [29]:
y_prob

array([0.0776754 , 0.0133677 , 0.05679303, ..., 0.1400755 , 0.05083345,
       0.03295279], dtype=float32)

In [30]:
y_pred

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)

In [34]:
type(x_train),type(x_test), type(y_train),type(y_test), type(y_pred), type(y_prob)

NameError: name 'y_test' is not defined

In [None]:
## registering the model in Fosfor Insight Designer.
register_model(
    model_obj=gbdt, 
    session=my_session,
    x_train=x_train,
    y_train=y_train,
    x_test=x_test,
    y_test=y_test,
    y_pred=y_pred,
    prob=y_prob
    source="Notebook",
    dataset_name="CRA_APPLICATION_TRAIN_DETAILS",
    dataset_source="Snowflake",
    #dataset_source="InMemory",
    name="Credit_Risk_XGB_Classifier",
    description="Credit Risk XGBoost Classification Model",
    flavour="sklearn",
    model_type="classification",
    conda_dependencies=["scikit-learn==1.3.2"]
)

In [91]:
## registering the model in Fosfor.
model_reg = register_model(gbdt,
               score, 
               name="Credit_Risk_XGB_Classifier", 
               description="Credit Risk XGBoost Classification Model",
               flavour=MLModelFlavours.sklearn,
               model_type="classification",
               #init_script="pip install snowflake-ml-python==1.0.11",
               init_script="\\n pip install fosforml \\n pip install fosforio[snowflake] \\n pip install seaborn \\n pip install plotly \\n pip install statsmodels \\n pip install xgboost \\n pip install catboost \\n pip install snowflake-connector-python[pandas]",                           
               y_true=y_train,
               y_pred=y_pred,
               prob=y_prob,
               features=x_train.columns,
               input_type="json", 
               explain_ai=True,
               x_train=x_train, 
               x_test=x_train, 
               y_train=y_train.tolist(),
               y_test=y_train.tolist(),
               feature_names=x_train.columns.tolist(),
               original_features=x_train.columns.tolist(),
               feature_ids=x_train.columns,
               kyd=True, kyd_score = True)

Calculating build time metrics

Progress: ██████████████████████████████████████████████████████████████████████ 100.0%


VBox(children=(HTML(value='<style>.grad_1{background: #2468a4;} .grad_2{ color:white; background: #2468a4;}</s…

In [92]:
import pickle

In [93]:
with open('/data/Output/CreditRisk_XGBclassifier_v1.pkl', 'wb') as f:  # open a text file
    pickle.dump(gbdt, f) # serialize the list

In [94]:
#with open('model_artifacts/CreditRisk_XGBclassifier_v1.pkl', 'wb') as f:  # open a text file
#    pickle.dump(gbdt, f) # serialize the list

# RandomForest Classifier

In [None]:
RFC = RandomForestClassifier(**rf_params)
RFC.fit(x_train, y_train)

In [None]:
y_pred = RFC.predict(x_train)

In [None]:
y_prob = RFC.predict_proba(x_train)[:,1]

In [None]:
y_pred

In [None]:
y_prob

In [None]:
@scoring_func
def score(model, request):
    payload_dict = request.json["payload"]
    data = pd.DataFrame(payload_dict,index=[0])
    y_pred = model.predict(data)
    y_prob = model.predict_proba(data)[:,1]
    #temp_dict = {"Prediction: ": np.round(y_pred), "Probability: ": y_prob }
    temp_dict = str([np.round(y_pred)[0], y_prob[0]])
    return temp_dict

In [None]:
payload  = x_train.iloc[0].to_dict()
print ('{ "payload": ', payload, "}")

In [None]:
req = requests.Request()
req.json = {"payload":payload}
y_req = req
score(RFC, y_req)

In [None]:
## registering the model in Fosfor.
model_reg = register_model(RFC,
               score, 
               name="Credit_Risk_RandomForest_Classifier", 
               description="Credit Risk RandomForest Classification Model",
               flavour=MLModelFlavours.sklearn,
               model_type="classification",
               #init_script="pip install snowflake-ml-python==1.0.11",
               init_script="\\n pip install fosforml \\n pip install fosforio[snowflake] \\n pip install seaborn \\n pip install plotly \\n pip install statsmodels \\n pip install xgboost \\n pip install catboost \\n pip install snowflake-connector-python[pandas]",                           
               y_true=y_train,
               y_pred=y_pred,
               prob=y_prob,
               features=x_train.columns,
               input_type="json", 
               explain_ai=True,
               x_train=x_train, 
               x_test=x_train, 
               y_train=y_train.tolist(),
               y_test=y_train.tolist(),
               feature_names=x_train.columns.tolist(),
               original_features=x_train.columns.tolist(),
               feature_ids=x_train.columns,
               kyd=True, kyd_score = True)

In [None]:
with open('/data/Output/CreditRisk_RFclassifier_v1.pkl', 'wb') as f:  # open a text file
    pickle.dump(RFC, f) # serialize the list

In [None]:
#with open('model_artifacts/CreditRisk_RFclassifier_v1.pkl', 'wb') as f:  # open a text file
#    pickle.dump(RFC, f) # serialize the list

# Extra Tree Classifier

In [None]:
ETC = ExtraTreesClassifier(**et_params)
ETC.fit(x_train, y_train)

In [None]:
y_pred = ETC.predict(x_train)
y_prob = ETC.predict_proba(x_train)[:,1]

In [None]:
y_pred

In [None]:
y_prob

In [None]:
@scoring_func
def score(model, request):
    payload_dict = request.json["payload"]
    data = pd.DataFrame(payload_dict,index=[0])
    y_pred = model.predict(data)
    y_prob = model.predict_proba(data)[:,1]
    #temp_dict = {"Prediction: ": np.round(y_pred), "Probability: ": y_prob }
    temp_dict = str([np.round(y_pred)[0], y_prob[0]])
    return temp_dict

In [None]:
payload  = x_train.iloc[0].to_dict()
print ('{ "payload": ', payload, "}")

In [None]:
req = requests.Request()
req.json = {"payload":payload}
y_req = req
score(ETC, y_req)

In [None]:
## registering the model in Fosfor.
model_reg = register_model(ETC,
               score, 
               name="Credit_Risk_ExtraTree_Classifier", 
               description="Credit Risk ExtraTree Classification Model",
               flavour=MLModelFlavours.sklearn,
               model_type="classification",
               #init_script="pip install snowflake-ml-python==1.0.11",
               init_script="\\n pip install fosforml \\n pip install fosforio[snowflake] \\n pip install seaborn \\n pip install plotly \\n pip install statsmodels \\n pip install xgboost \\n pip install catboost \\n pip install snowflake-connector-python[pandas]",                              
               y_true=y_train,
               y_pred=y_pred,
               prob=y_prob,
               features=x_train.columns,
               input_type="json", 
               explain_ai=True,
               x_train=x_train, 
               x_test=x_train, 
               y_train=y_train.tolist(),
               y_test=y_train.tolist(),
               feature_names=x_train.columns.tolist(),
               original_features=x_train.columns.tolist(),
               feature_ids=x_train.columns,
               kyd=True, kyd_score = True)

In [None]:
with open('/data/Output/CreditRisk_ETclassifier_v1.pkl', 'wb') as f:  # open a text file
    pickle.dump(ETC, f) # serialize the list

In [None]:
#with open('model_artifacts/CreditRisk_ETclassifier_v1.pkl', 'wb') as f:  # open a text file
#    pickle.dump(ETC, f) # serialize the list

# CATBoost Classifier

In [None]:
CBC = CatBoostClassifier(**catboost_params)
CBC.fit(x_train, y_train)

In [None]:
y_pred = CBC.predict(x_train)
y_prob = CBC.predict_proba(x_train)[:,1]

In [None]:
y_pred

In [None]:
y_prob

In [None]:
@scoring_func
def score(model, request):
    payload_dict = request.json["payload"]
    data = pd.DataFrame(payload_dict,index=[0])
    y_pred = model.predict(data)
    y_prob = model.predict_proba(data)[:,1]
    #temp_dict = {"Prediction: ": np.round(y_pred), "Probability: ": y_prob }
    temp_dict = str([np.round(y_pred)[0], y_prob[0]])
    return temp_dict

In [None]:
payload  = x_train.iloc[0].to_dict()
print ('{ "payload": ', payload, "}")

In [None]:
req = requests.Request()
req.json = {"payload":payload}
y_req = req
score(CBC, y_req)

In [None]:
## registering the model in Fosfor.
model_reg = register_model(CBC,
               score, 
               name="Credit_Risk_CATBoost_Classifier", 
               description="Credit Risk CatBoost Classification Model",
               flavour=MLModelFlavours.sklearn,
               model_type="classification",
               #init_script="pip install snowflake-ml-python==1.0.11",
               init_script="\\n pip install fosforml \\n pip install fosforio[snowflake] \\n pip install seaborn \\n pip install plotly \\n pip install statsmodels \\n pip install xgboost \\n pip install catboost \\n pip install snowflake-connector-python[pandas]",                              
               y_true=y_train,
               y_pred=y_pred,
               prob=y_prob,
               features=x_train.columns,
               input_type="json", 
               explain_ai=True,
               x_train=x_train, 
               x_test=x_train, 
               y_train=y_train.tolist(),
               y_test=y_train.tolist(),
               feature_names=x_train.columns.tolist(),
               original_features=x_train.columns.tolist(),
               feature_ids=x_train.columns,
               kyd=True, kyd_score = True)

In [None]:
with open('/data/Output/CreditRisk_CatBoostclassifier_v1.pkl', 'wb') as f:  # open a text file
    pickle.dump(CBC, f) # serialize the list

In [None]:
#with open('model_artifacts/CreditRisk_CatBoostclassifier_v1.pkl', 'wb') as f:  # open a text file
#    pickle.dump(CBC, f) # serialize the list

# Version 1 of Stacking Classifier

In [None]:
x_train_XGB = gbdt.predict(xgb.DMatrix(x_train))
x_train_RFC = RFC.predict_proba(x_train)[:,1]
x_train_ETC = ETC.predict_proba(x_train)[:,1]
x_train_CBC = CBC.predict_proba(x_train)[:,1]

In [None]:
x_test_XGB = gbdt.predict(xgb.DMatrix(x_test))
x_test_RFC = RFC.predict_proba(x_test)[:,1]
x_test_ETC = ETC.predict_proba(x_test)[:,1]
x_test_CBC = CBC.predict_proba(x_test)[:,1]

In [None]:
x_train_XGBT = x_train_XGB.reshape(-1,1)
x_train_RFCT = x_train_RFC.reshape(-1,1)
x_train_ETCT = x_train_ETC.reshape(-1,1)
x_train_CBCT = x_train_CBC.reshape(-1,1)

In [None]:
x_train_stk = np.concatenate((x_train_XGBT, x_train_RFCT, x_train_ETCT, x_train_CBCT),axis=1)

In [None]:
x_train_stk.shape

In [None]:
x_test_XGBT = x_test_XGB.reshape(-1,1)
x_test_RFCT = x_test_RFC.reshape(-1,1)
x_test_ETCT = x_test_ETC.reshape(-1,1)
x_test_CBCT = x_test_CBC.reshape(-1,1)

In [None]:
x_test_stk = np.concatenate((x_test_XGBT, x_test_RFCT, x_test_ETCT, x_test_CBCT),axis=1)
x_test_stk.shape

In [None]:
RFC_Stack = RandomForestClassifier()
RFC_Stack.fit(x_train_stk,y_train)

In [None]:
y_pred_stk = RFC_Stack.predict(x_train_stk)
y_prob_stk = RFC_Stack.predict_proba(x_train_stk)[:,1]

In [None]:
ytest_pred_stk = RFC_Stack.predict(x_test_stk)
ytest_prob_stk = RFC_Stack.predict_proba(x_test_stk)[:,1]

In [None]:
y_pred_stk

In [None]:
y_prob_stk

In [None]:
@scoring_func
def score(model, request):
    payload_dict = request.json["payload"]
    data = pd.DataFrame(payload_dict,index=[0])
    
    data_XGB = gbdt.predict(xgb.DMatrix(data))
    data_RFC = RFC.predict_proba(data)[:,1]
    data_ETC = ETC.predict_proba(data)[:,1]
    data_CBC = CBC.predict_proba(data)[:,1]
    
    data_XGBT = data_XGB.reshape(-1,1)
    data_RFCT = data_RFC.reshape(-1,1)
    data_ETCT = data_ETC.reshape(-1,1)
    data_CBCT = data_CBC.reshape(-1,1)
    
    data = np.concatenate((data_XGBT, data_RFCT, data_ETCT, data_CBCT), axis=1)
    
    y_pred = model.predict(data)
    y_prob = model.predict_proba(data)[:,1]
    #temp_dict = {"Prediction: ": np.round(y_pred), "Probability: ": y_prob }
    temp_dict = str([np.round(y_pred)[0],y_prob[0]])
    return temp_dict

In [None]:
payload  = x_train.iloc[0].to_dict()
print ('{ "payload": ', payload, "}")

In [None]:
req = requests.Request()
req.json = {"payload":payload}
y_req = req
score(RFC_Stack, y_req)

In [None]:
## registering the model in Fosfor.
model_reg = register_model(RFC_Stack,
               score, 
               name="Credit_Risk_StackedRF_Classifier", 
               description="Credit Risk Stacked RF Classification Model",
               flavour=MLModelFlavours.sklearn,
               model_type="classification",
               #init_script="pip install snowflake-ml-python==1.0.11",
               init_script="\\n pip install fosforml \\n pip install fosforio[snowflake] \\n pip install seaborn \\n pip install plotly \\n pip install statsmodels \\n pip install xgboost \\n pip install catboost \\n pip install snowflake-connector-python[pandas]",                              
               y_true=y_train,
               y_pred=y_pred,
               prob=y_prob,
               features=x_train.columns,
               input_type="json", 
               explain_ai=True,
               x_train=x_train, 
               x_test=x_train, 
               y_train=y_train.tolist(),
               y_test=y_train.tolist(),
               feature_names=x_train.columns.tolist(),
               original_features=x_train.columns.tolist(),
               feature_ids=x_train.columns,
               kyd=True, kyd_score = True)

In [None]:
with open('/data/Output/CreditRisk_StackedRFclassifier_v1.pkl', 'wb') as f:  # open a text file
    pickle.dump(RFC_Stack, f) # serialize the list

In [None]:
#with open('model_artifacts/CreditRisk_StackedRFclassifier_v1.pkl', 'wb') as f:  # open a text file
#    pickle.dump(RFC_Stack, f) # serialize the list

# Version 2 of Stacking Classifier

In [None]:
x_train_stk = np.concatenate((x_train_RFCT, x_train_ETCT, x_train_CBCT),axis=1)
x_test_stk = np.concatenate(( x_test_RFCT, x_test_ETCT, x_test_CBCT),axis=1)

RFC_Stack = RandomForestClassifier()
RFC_Stack.fit(x_train_stk,y_train)

y_pred_stk = RFC_Stack.predict(x_train_stk)
y_prob_stk = RFC_Stack.predict_proba(x_train_stk)[:,1]

ytest_pred_stk = RFC_Stack.predict(x_test_stk)
ytest_prob_stk = RFC_Stack.predict_proba(x_test_stk)[:,1]

y_pred_stk
y_prob_stk

In [None]:
@scoring_func
def score(model, request):
    payload_dict = request.json["payload"]
    data = pd.DataFrame(payload_dict,index=[0])
    
    #data_XGB = gbdt.predict(xgb.DMatrix(data))
    data_RFC = RFC.predict_proba(data)[:,1]
    data_ETC = ETC.predict_proba(data)[:,1]
    data_CBC = CBC.predict_proba(data)[:,1]
    
    #data_XGBT = data_XGB.reshape(-1,1)
    data_RFCT = data_RFC.reshape(-1,1)
    data_ETCT = data_ETC.reshape(-1,1)
    data_CBCT = data_CBC.reshape(-1,1)
    
    #data = np.concatenate((data_XGBT, data_RFCT, data_ETCT, data_CBCT), axis=1)
    data = np.concatenate((data_RFCT, data_ETCT, data_CBCT), axis=1)
    
    y_pred = model.predict(data)
    y_prob = model.predict_proba(data)[:,1]
    #temp_dict = {"Prediction: ": np.round(y_pred), "Probability: ": y_prob }
    temp_dict = str([np.round(y_pred)[0], y_prob[0]])
    return temp_dict

In [None]:
## registering the model in Fosfor with correct output (y_pred_stk, y_prob_stk.
model_reg = register_model(RFC_Stack,
               score, 
               name="Credit_Risk_StackedRF_Classifier", 
               description="Credit Risk Stacked RF Classification Model",
               flavour=MLModelFlavours.sklearn,
               model_type="classification",
               #init_script="pip install snowflake-ml-python==1.0.11",
               init_script="\\n pip install fosforml \\n pip install fosforio[snowflake] \\n pip install seaborn \\n pip install plotly \\n pip install statsmodels \\n pip install xgboost \\n pip install catboost \\n pip install snowflake-connector-python[pandas]",                              
               y_true=y_train,
               y_pred=y_pred_stk,
               prob=y_prob_stk,
               features=x_train.columns,
               input_type="json", 
               explain_ai=True,
               x_train=x_train, 
               x_test=x_train, 
               y_train=y_train.tolist(),
               y_test=y_train.tolist(),
               feature_names=x_train.columns.tolist(),
               original_features=x_train.columns.tolist(),
               feature_ids=x_train.columns,
               kyd=True, kyd_score = True)

In [None]:
with open('/data/Output/CreditRisk_StackedRFclassifier_v2.pkl', 'wb') as f:  # open a text file
    pickle.dump(RFC_Stack, f) # serialize the list

In [None]:
#with open('model_artifacts/CreditRisk_StackedRFclassifier_v2.pkl', 'wb') as f:  # open a text file
#    pickle.dump(RFC_Stack, f) # serialize the list

# Create Sample Dataframe/Table for Monitoring Setup 

In [None]:
x_train.shape, y_train.shape, y_pred_stk.shape, y_prob_stk.shape

In [None]:
x_test.shape ,  ytest_pred_stk.shape, ytest_prob_stk.shape

In [None]:
type(x_train), type(y_train), type(y_pred_stk), type(y_prob_stk)

In [None]:
df_pred_stk = pd.DataFrame(y_pred_stk, columns=['Prediction'])

In [None]:
df_prob_stk = pd.DataFrame(y_prob_stk, columns=['Probability'])

In [None]:
df_train = pd.DataFrame(y_train, columns=['TARGET'])

In [None]:
temp_df = pd.concat([x_train, df_train, df_pred_stk, df_prob_stk], axis=1)

In [None]:
temp_df.to_csv('/data/Output/application_train_output.csv')

In [None]:
temp_df = pd.read_csv('/data/Output/application_train_output.csv')

In [None]:
temp_df_1 = temp_df.sample(frac = 0.7)
temp_df_2 = temp_df.drop(temp_df_1.index)

In [None]:
print (temp_df.shape)
print (temp_df_1.shape)
print (temp_df_2.shape)

In [None]:
type(temp_df_1)

In [None]:
<Suppose to fail here>

In [None]:
df_model=session.createDataFrame(
        temp_df_1.values.tolist(),
        schema=temp_df_1.columns.tolist())
df_model.write.mode("overwrite").save_as_table("FDC_Banking_FS.PUBLIC.CRA_APPLICATION_OUTPUT_BATCH1")

In [None]:
df_model=session.createDataFrame(
        temp_df_2.values.tolist(),
        schema=temp_df_2.columns.tolist())
df_model.write.mode("overwrite").save_as_table("FDC_Banking_FS.PUBLIC.CRA_APPLICATION_OUTPUT_BATCH2")