# Use SUPER_ANNU_Template customized notebook template

In [1]:
#Snowpark lib
from snowflake.snowpark import Session

# Data Science Libs
import numpy as np
import pandas as pd

# create_temp_table warning suppresion
import warnings; warnings.simplefilter('ignore')

# Sklearn Libraries
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.cluster import DBSCAN

np.random.seed(0)

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
import pickle
with open('reason_model.pkl','rb') as f:  ## use rb while reading the fie
    reason_model = pickle.load(f)

In [4]:
table_name = 'MEMBER_FUNDS_ENRICHED_DETAILS'
sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [5]:
churned = df[df["CHURN_FLAG"] == "Y"]

In [6]:
r2id = {'NO REASON IDENTIFIED': 0,
 'HIGH ACCOUNT FEE': 1,
 'POOR FUND PERFORMANCE': 2,
 'HIGH TRANSACTION FEE': 3,
 'FUND REPUTATION DECLINING': 4,
 'LIFE EVENT': 5}
id2r = {v:k for k, v in r2id.items()}
id2r

{0: 'NO REASON IDENTIFIED',
 1: 'HIGH ACCOUNT FEE',
 2: 'POOR FUND PERFORMANCE',
 3: 'HIGH TRANSACTION FEE',
 4: 'FUND REPUTATION DECLINING',
 5: 'LIFE EVENT'}

In [36]:
df = df.drop(["MEMBER_DOB_DT", 'ALLOCATION_DT', "CHURN_DT", "RETIREMENT_DT"], axis =1 )

In [7]:
def score(model, request):
    payload_dict = eval(request.json["payload"])
    data = pd.DataFrame.from_dict(payload_dict)
    frame = data[['MEMBER_GENDER', 'MEMBER_STATE', 'MEMBER_CONTACT_VERIFIED','FUND_TOTAL_ASSETS','FUND_RETURN_TARGET_PERCENTAGE',
            'INVESTMENT_RISK_CATEGORY', 'CASH_BENCHMARK_ALLOCATION','FIXED_INCOME_BENCHMARK_ALLOCATION',
            'DOMESTIC_LISTED_EQUITY_BENCHMARK_ALLOCATION','INTERNATIONAL_LISTED_EQUITY_BENCHMARK_ALLOCATION',
            'UNLISTED_EQUITY_BENCHMARK_ALLOCATION', 'EQUITY_BENCHMARK_ALLOCATION','PROPERTY_BENCHMARK_ALLOCATION',
            'INFRA_BENCHMARK_ALLOCATION','COMMODITIES_BENCHMARK_ALLOCATION', 'OTHERS_BENCHMARK_ALLOCATION','FUND_RISK_LEVEL', 
            'FUND_RISK_CATEGORY','NEG_NETRETURN_SINCE_INCEPTION', 'YEAR_1_RETURNS', 'YEAR_3_RETURNS','YEAR_5_RETURNS',
            'YEAR_7_RETURNS','YEAR_10_RETURNS', 'SUPER_FEES','PENSION_FEES',
            'INVESTMENT_AGE_GROUP', 'RETIREMENT_AGE_GROUP', 'TOTAL_FUNDS_INVESTED']].copy()
    id2r = {0: 'NO REASON IDENTIFIED',
             1: 'HIGH ACCOUNT FEE',
             2: 'POOR FUND PERFORMANCE',
             3: 'HIGH TRANSACTION FEE',
             4: 'FUND REPUTATION DECLINING',
             5: 'LIFE EVENT'}
    result = model.predict(frame)
    prediction = pd.DataFrame({"REASON_PREDICTION":result})
    prediction["REASON_PREDICTION"] = prediction["REASON_PREDICTION"].apply(lambda x: id2r[x])
    prediction = str(prediction.to_dict())
    return prediction

In [11]:
import requests
import datetime

In [12]:
payload = churned.head(10).to_dict()
req = requests.Request()
req.json = {"payload":str(payload)}
y = req
yo = score(reason_model, y)
yo

"{'REASON_PREDICTION': {0: 'HIGH TRANSACTION FEE', 1: 'NO REASON IDENTIFIED', 2: 'NO REASON IDENTIFIED', 3: 'HIGH ACCOUNT FEE', 4: 'FUND REPUTATION DECLINING', 5: 'NO REASON IDENTIFIED', 6: 'HIGH ACCOUNT FEE', 7: 'FUND REPUTATION DECLINING', 8: 'LIFE EVENT', 9: 'FUND REPUTATION DECLINING'}}"

In [13]:
X = churned.drop("CHURN_REASON", axis=1)
y = churned["CHURN_REASON"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [14]:
payload = X_test.to_dict()
req = requests.Request()
req.json = {"payload":str(payload)}
y = req
pred_dic = score(reason_model, y)

In [15]:
temp_ = eval(pred_dic)
print("string to dict")
pred = pd.DataFrame.from_dict(temp_)

string to dict


In [17]:
y_pred = pred["REASON_PREDICTION"]

In [18]:
y_prob = reason_model.predict_proba(X_test)

In [19]:
y_test

240048         NO REASON IDENTIFIED
154866         HIGH TRANSACTION FEE
158950         HIGH TRANSACTION FEE
228750         NO REASON IDENTIFIED
247815    FUND REPUTATION DECLINING
                    ...            
302560         NO REASON IDENTIFIED
208132    FUND REPUTATION DECLINING
264294                   LIFE EVENT
326956             HIGH ACCOUNT FEE
223727    FUND REPUTATION DECLINING
Name: CHURN_REASON, Length: 18491, dtype: object

In [23]:
y_train = y_train.to_frame()
y_test = y_test.to_frame()
y_pred = y_pred.to_frame()

In [28]:
type(X_train), type(X_test),type(y_train), type(y_test), type(y_pred)

(pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame)

In [26]:
from fosforml import register_model

In [38]:
y_prob_df = pd.DataFrame(y_prob)

In [42]:
register_model(
    model_obj=reason_model, 
    session=my_session,
    x_train=X_train,
    y_train=y_train,
    x_test=X_test,
    y_test=y_test,
    y_pred=y_pred,
    #y_prob = y_prob_df,
    source="Notebook",
    dataset_name="MEMBER_FUNDS_ENRICHED_DETAILS",
    dataset_source="Snowflake",
    #dataset_source="InMemory",
    name="Churn_Reason_Classifier_Model",
    description="PREDICTING_FUND_REASON",
    flavour="sklearn",
    model_type="classification",
    conda_dependencies=["scikit-learn==1.3.2"]
)

'Failed to load model artifacts. EOL while scanning string literal (<string>, line 1)'