In [2]:
# !pip install --q fosforml==1.0.1b1

In [1]:
#Snowpark lib
from snowflake.snowpark import Session

# Data Science Libs
import numpy as np
import pandas as pd

# create_temp_table warning suppresion
import warnings; warnings.simplefilter('ignore')

#ConfigParser to read ini file
import configparser

import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.cluster import DBSCAN
import pickle

from fosforml import *
from fosforml import scoring_func, register_model
from fosforml.constants import MLModelFlavours
import requests

np.random.seed(0)

config = configparser.ConfigParser()
config.read("/notebooks/notebooks/credentials.ini")

connection_parameters = {
    "user": f'{config["Snowflake"]["user"]}',
    "password": f'{config["Snowflake"]["password"]}',
    "account": f'{config["Snowflake"]["account"]}',
    "WAREHOUSE": f'{config["Snowflake"]["WAREHOUSE"]}',
    "DATABASE": f'{config["Snowflake"]["DATABASE"]}',
    "SCHEMA": f'{config["Snowflake"]["SCHEMA"]}'
}

def snowflake_connector(conn):
    try:
        session = Session.builder.configs(conn).create()
        print("connection successful!")
    except:
        raise ValueError("error while connecting with db")
    return session

session = snowflake_connector(connection_parameters)

connection successful!


In [2]:
with open('churn_model.pkl','rb') as f:  ## use rb while reading the fie
    churn_model = pickle.load(f)

In [3]:
@scoring_func
def score(model, request):
    payload_dict = eval(request.json["payload"])
    data = pd.DataFrame.from_dict(payload_dict)
    frame = data[['MEMBER_GENDER', 'MEMBER_STATE', 'MEMBER_CONTACT_VERIFIED','FUND_TOTAL_ASSETS','FUND_RETURN_TARGET_PERCENTAGE',
            'INVESTMENT_RISK_CATEGORY', 'CASH_BENCHMARK_ALLOCATION','FIXED_INCOME_BENCHMARK_ALLOCATION',
            'DOMESTIC_LISTED_EQUITY_BENCHMARK_ALLOCATION','INTERNATIONAL_LISTED_EQUITY_BENCHMARK_ALLOCATION',
            'UNLISTED_EQUITY_BENCHMARK_ALLOCATION', 'EQUITY_BENCHMARK_ALLOCATION','PROPERTY_BENCHMARK_ALLOCATION',
            'INFRA_BENCHMARK_ALLOCATION','COMMODITIES_BENCHMARK_ALLOCATION', 'OTHERS_BENCHMARK_ALLOCATION','FUND_RISK_LEVEL', 
            'FUND_RISK_CATEGORY','NEG_NETRETURN_SINCE_INCEPTION', 'YEAR_1_RETURNS', 'YEAR_3_RETURNS','YEAR_5_RETURNS',
            'YEAR_7_RETURNS','YEAR_10_RETURNS', 'SUPER_FEES','PENSION_FEES',
            'INVESTMENT_AGE_GROUP', 'RETIREMENT_AGE_GROUP', 'TOTAL_FUNDS_INVESTED']].copy()
    result = model.predict(frame)
    prediction = pd.DataFrame({"CHURN_PREDICTION":result})
    prediction["CHURN_PREDICTION"] = prediction["CHURN_PREDICTION"].apply(lambda x: "Y" if x == 1 else "N")
    probability = model.predict_proba(frame)
    prediction["Probability"] = [round(k[1],4) for k in probability]
    prediction = str(prediction.to_dict())
    return prediction

In [4]:
df = session.table("MEMBER_FUNDS_ENRICHED_DETAILS").to_pandas()
df = df.drop(["MEMBER_DOB_DT", 'ALLOCATION_DT', "CHURN_DT", "RETIREMENT_DT"], axis =1 )

In [5]:
payload = df.head(10).to_dict()
req = requests.Request()
req.json = {"payload":str(payload)}
y = req
yo = score(churn_model, y)
yo

"{'CHURN_PREDICTION': {0: 'Y', 1: 'Y', 2: 'Y', 3: 'Y', 4: 'Y', 5: 'Y', 6: 'Y', 7: 'Y', 8: 'Y', 9: 'Y'}, 'Probability': {0: 0.9907, 1: 0.6788, 2: 0.9668, 3: 0.7273, 4: 0.9719, 5: 0.9878, 6: 0.967, 7: 0.5665, 8: 0.814, 9: 0.8004}}"

In [6]:
frame = df[['MEMBER_GENDER', 'MEMBER_STATE', 'MEMBER_CONTACT_VERIFIED','FUND_TOTAL_ASSETS','FUND_RETURN_TARGET_PERCENTAGE',
            'INVESTMENT_RISK_CATEGORY', 'CASH_BENCHMARK_ALLOCATION','FIXED_INCOME_BENCHMARK_ALLOCATION',
            'DOMESTIC_LISTED_EQUITY_BENCHMARK_ALLOCATION','INTERNATIONAL_LISTED_EQUITY_BENCHMARK_ALLOCATION',
            'UNLISTED_EQUITY_BENCHMARK_ALLOCATION', 'EQUITY_BENCHMARK_ALLOCATION','PROPERTY_BENCHMARK_ALLOCATION',
            'INFRA_BENCHMARK_ALLOCATION','COMMODITIES_BENCHMARK_ALLOCATION', 'OTHERS_BENCHMARK_ALLOCATION','FUND_RISK_LEVEL', 
            'FUND_RISK_CATEGORY','NEG_NETRETURN_SINCE_INCEPTION', 'YEAR_1_RETURNS', 'YEAR_3_RETURNS','YEAR_5_RETURNS',
            'YEAR_7_RETURNS','YEAR_10_RETURNS', 'SUPER_FEES','PENSION_FEES',
            'INVESTMENT_AGE_GROUP', 'RETIREMENT_AGE_GROUP', 'TOTAL_FUNDS_INVESTED','CHURN_FLAG']].copy()

X = frame.drop("CHURN_FLAG", axis=1)
y = frame["CHURN_FLAG"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [7]:
payload = X_test.to_dict()
req = requests.Request()
req.json = {"payload":str(payload)}
y = req
pred_dic = score(churn_model, y)

In [8]:
temp_ = eval(pred_dic)
print("string to dict")
pred = pd.DataFrame.from_dict(temp_)

string to dict


In [9]:
y_pred = pred["CHURN_PREDICTION"]
y_prob = pred["Probability"]

In [10]:
y_test

96958     N
19699     N
6860      N
261926    N
34660     N
         ..
187949    Y
269979    N
16694     Y
46808     N
91910     N
Name: CHURN_FLAG, Length: 67200, dtype: object

In [11]:
y_pred

0        N
1        N
2        N
3        N
4        N
        ..
67195    Y
67196    N
67197    Y
67198    N
67199    N
Name: CHURN_PREDICTION, Length: 67200, dtype: object

In [20]:
## registering the model in Fosfor.
model_reg = register_model(churn_model,
               score, 
               name="CHURN_BINARY_MODEL", 
               description="PREDICTING_FUND_CHURN",
               flavour=MLModelFlavours.sklearn,
               model_type="classification",
               init_script="\\n pip install --q fosforml==1.0.1b1",
               y_true=y_test,
               y_pred=y_pred,
               prob=y_prob,
               features=X_train.columns,
               input_type="json", 
               explain_ai=True,
               x_train=X_train, 
               x_test=X_test, 
               y_train=y_train,
               y_test=y_test,
               feature_names=X_train.columns.tolist(),
               original_features=X_train.columns.tolist(),
               feature_ids=X_train.columns,
               kyd=True, kyd_score = True)

Calculating build time metrics

Progress: ██████████████████████████████████████████████████████████████████████ 100.0%


VBox(children=(HTML(value='<style>.grad_1{background: #2468a4;} .grad_2{ color:white; background: #2468a4;}</s…

# CHURN REASON MODEL

In [12]:
with open('reason_model.pkl','rb') as f:  ## use rb while reading the fie
    reason_model = pickle.load(f)

In [13]:
churned = df[df["CHURN_FLAG"] == "Y"]

In [15]:
r2id = {'NO REASON IDENTIFIED': 0,
 'HIGH ACCOUNT FEE': 1,
 'POOR FUND PERFORMANCE': 2,
 'HIGH TRANSACTION FEE': 3,
 'FUND REPUTATION DECLINING': 4,
 'LIFE EVENT': 5}
id2r = {v:k for k, v in r2id.items()}
id2r

{0: 'NO REASON IDENTIFIED',
 1: 'HIGH ACCOUNT FEE',
 2: 'POOR FUND PERFORMANCE',
 3: 'HIGH TRANSACTION FEE',
 4: 'FUND REPUTATION DECLINING',
 5: 'LIFE EVENT'}

In [18]:
@scoring_func
def score(model, request):
    payload_dict = eval(request.json["payload"])
    data = pd.DataFrame.from_dict(payload_dict)
    frame = churned[['MEMBER_GENDER', 'MEMBER_STATE', 'MEMBER_CONTACT_VERIFIED','FUND_TOTAL_ASSETS','FUND_RETURN_TARGET_PERCENTAGE',
            'INVESTMENT_RISK_CATEGORY', 'CASH_BENCHMARK_ALLOCATION','FIXED_INCOME_BENCHMARK_ALLOCATION',
            'DOMESTIC_LISTED_EQUITY_BENCHMARK_ALLOCATION','INTERNATIONAL_LISTED_EQUITY_BENCHMARK_ALLOCATION',
            'UNLISTED_EQUITY_BENCHMARK_ALLOCATION', 'EQUITY_BENCHMARK_ALLOCATION','PROPERTY_BENCHMARK_ALLOCATION',
            'INFRA_BENCHMARK_ALLOCATION','COMMODITIES_BENCHMARK_ALLOCATION', 'OTHERS_BENCHMARK_ALLOCATION','FUND_RISK_LEVEL', 
            'FUND_RISK_CATEGORY','NEG_NETRETURN_SINCE_INCEPTION', 'YEAR_1_RETURNS', 'YEAR_3_RETURNS','YEAR_5_RETURNS',
            'YEAR_7_RETURNS','YEAR_10_RETURNS', 'SUPER_FEES','PENSION_FEES',
            'INVESTMENT_AGE_GROUP', 'RETIREMENT_AGE_GROUP', 'TOTAL_FUNDS_INVESTED']].copy()
    id2r = {0: 'NO REASON IDENTIFIED',
             1: 'HIGH ACCOUNT FEE',
             2: 'POOR FUND PERFORMANCE',
             3: 'HIGH TRANSACTION FEE',
             4: 'FUND REPUTATION DECLINING',
             5: 'LIFE EVENT'}
    result = model.predict(frame)
    prediction = pd.DataFrame({"REASON_PREDICTION":result})
    prediction["REASON_PREDICTION"] = prediction["REASON_PREDICTION"].apply(lambda x: id2r[x])
    prediction = str(prediction.to_dict())
    return prediction

In [19]:
payload = churned.head(10).to_dict()
req = requests.Request()
req.json = {"payload":str(payload)}
y = req
yo = score(reason_model, y)
yo

"{'REASON_PREDICTION': {0: 'NO REASON IDENTIFIED', 1: 'NO REASON IDENTIFIED', 2: 'HIGH ACCOUNT FEE', 3: 'POOR FUND PERFORMANCE', 4: 'HIGH TRANSACTION FEE', 5: 'HIGH TRANSACTION FEE', 6: 'FUND REPUTATION DECLINING', 7: 'FUND REPUTATION DECLINING', 8: 'HIGH TRANSACTION FEE', 9: 'FUND REPUTATION DECLINING', 10: 'POOR FUND PERFORMANCE', 11: 'NO REASON IDENTIFIED', 12: 'NO REASON IDENTIFIED', 13: 'LIFE EVENT', 14: 'NO REASON IDENTIFIED', 15: 'HIGH TRANSACTION FEE', 16: 'HIGH ACCOUNT FEE', 17: 'FUND REPUTATION DECLINING', 18: 'HIGH ACCOUNT FEE', 19: 'NO REASON IDENTIFIED', 20: 'FUND REPUTATION DECLINING', 21: 'FUND REPUTATION DECLINING', 22: 'NO REASON IDENTIFIED', 23: 'HIGH TRANSACTION FEE', 24: 'NO REASON IDENTIFIED', 25: 'FUND REPUTATION DECLINING', 26: 'NO REASON IDENTIFIED', 27: 'FUND REPUTATION DECLINING', 28: 'HIGH TRANSACTION FEE', 29: 'HIGH ACCOUNT FEE', 30: 'FUND REPUTATION DECLINING', 31: 'FUND REPUTATION DECLINING', 32: 'FUND REPUTATION DECLINING', 33: 'HIGH ACCOUNT FEE', 34: 'FU