# Use SUPER_ANNU_Template customized notebook template

In [30]:
#Snowpark lib
from snowflake.snowpark import Session

# Data Science Libs
import numpy as np
import pandas as pd

# create_temp_table warning suppresion
import warnings; warnings.simplefilter('ignore')

# Sklearn Libraries
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.cluster import DBSCAN

np.random.seed(0)

In [31]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [32]:
table_name = 'MEMBER_FUNDS_ENRICHED_DETAILS'

In [33]:
sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [34]:
df.shape

(335999, 50)

In [35]:
frame = df[['MEMBER_GENDER', 'MEMBER_STATE', 'MEMBER_CONTACT_VERIFIED','FUND_TOTAL_ASSETS','FUND_RETURN_TARGET_PERCENTAGE',
            'INVESTMENT_RISK_CATEGORY', 'CASH_BENCHMARK_ALLOCATION','FIXED_INCOME_BENCHMARK_ALLOCATION',
            'DOMESTIC_LISTED_EQUITY_BENCHMARK_ALLOCATION','INTERNATIONAL_LISTED_EQUITY_BENCHMARK_ALLOCATION',
            'UNLISTED_EQUITY_BENCHMARK_ALLOCATION', 'EQUITY_BENCHMARK_ALLOCATION','PROPERTY_BENCHMARK_ALLOCATION',
            'INFRA_BENCHMARK_ALLOCATION','COMMODITIES_BENCHMARK_ALLOCATION', 'OTHERS_BENCHMARK_ALLOCATION','FUND_RISK_LEVEL', 
            'FUND_RISK_CATEGORY','NEG_NETRETURN_SINCE_INCEPTION', 'YEAR_1_RETURNS', 'YEAR_3_RETURNS','YEAR_5_RETURNS',
            'YEAR_7_RETURNS','YEAR_10_RETURNS', 'SUPER_FEES','PENSION_FEES',
            'INVESTMENT_AGE_GROUP', 'RETIREMENT_AGE_GROUP', 'TOTAL_FUNDS_INVESTED','CHURN_FLAG']].copy()

In [36]:
frame["CHURN_FLAG"] = frame["CHURN_FLAG"].apply(lambda x: 1 if x =="Y" else 0)

In [37]:
numeric_features = ["FUND_TOTAL_ASSETS", 'FUND_RETURN_TARGET_PERCENTAGE','CASH_BENCHMARK_ALLOCATION',
       'FIXED_INCOME_BENCHMARK_ALLOCATION','DOMESTIC_LISTED_EQUITY_BENCHMARK_ALLOCATION','INTERNATIONAL_LISTED_EQUITY_BENCHMARK_ALLOCATION',
       'UNLISTED_EQUITY_BENCHMARK_ALLOCATION', 'EQUITY_BENCHMARK_ALLOCATION','PROPERTY_BENCHMARK_ALLOCATION', 'INFRA_BENCHMARK_ALLOCATION',
       'COMMODITIES_BENCHMARK_ALLOCATION', 'OTHERS_BENCHMARK_ALLOCATION', 'YEAR_1_RETURNS', 'YEAR_3_RETURNS',
       'YEAR_5_RETURNS', 'YEAR_7_RETURNS', 'YEAR_10_RETURNS', 'SUPER_FEES','PENSION_FEES',]
numeric_transformer = Pipeline(
    steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]
)

categorical_features = ["MEMBER_GENDER", "MEMBER_STATE", "MEMBER_CONTACT_VERIFIED","INVESTMENT_RISK_CATEGORY",'FUND_RISK_LEVEL',
                        'FUND_RISK_CATEGORY',"NEG_NETRETURN_SINCE_INCEPTION", 'INVESTMENT_AGE_GROUP', 'RETIREMENT_AGE_GROUP', "TOTAL_FUNDS_INVESTED"]
categorical_transformer = Pipeline(
    steps=[
        ("encoder", OneHotEncoder(handle_unknown="ignore")),
        ("selector", SelectPercentile(chi2, percentile=50)),
    ]
)
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)

In [38]:
clf = Pipeline(
    steps=[("preprocessor", preprocessor), ("classifier", RandomForestClassifier(n_estimators=500, max_depth=8, 
                                                                                 max_features=0.6,
                                                                                 bootstrap=True, max_samples=0.8))]
)

In [39]:
X = frame.drop("CHURN_FLAG", axis=1)
y = frame["CHURN_FLAG"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [40]:
clf.fit(X_train, y_train)
print("model score: %.3f" % clf.score(X_test, y_test))

model score: 0.989


In [41]:
frame_1 = clf.predict(frame)
df["CHURN_PREDICTED"] = frame_1

In [42]:
frame_2 = clf.predict_proba(frame)

In [43]:
frame_2[:,1]

array([0.85256477, 0.98829558, 0.56701558, ..., 0.87599826, 0.87532976,
       0.96371547])

In [44]:
frame_1

array([1, 1, 1, ..., 1, 1, 1])

In [45]:
df["CHURN_PROBABILITY"] =  frame_2[:,1]
y_pred = frame_2[:,1]

In [46]:
from fosforml import register_model

In [47]:
from snowflake.ml.registry import Registry

model_registry = Registry(session=my_session,
                          database_name='FDC_Banking_FS',
                          schema_name='BFS_SUPER_ANNU_SCHEMA'
                         )

In [49]:
model_registry.log_model(clf,
                         model_name="Binary_Churn_RF_Classifier",
                         version_name="v2",
                         comment="Random Forest Churn Classifier",
                         conda_dependencies=["scikit-learn==1.3.2"],
                         metrics=[],
                         sample_input_data= X_train,
                         python_version='3.9')

<snowflake.ml.model._client.model.model_version_impl.ModelVersion at 0x7fa81c007d30>

In [50]:
y_pred_series = pd.Series(y_pred)
y_pred_series.name = 'CHURN_PREDICTED'

In [51]:
type(X_train), type(X_test), type(y_train), type(y_test), type(y_pred_series)

(pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame,
 pandas.core.series.Series,
 pandas.core.series.Series,
 pandas.core.series.Series)

In [53]:
print(y_train.name)
print(y_test.name)
print(y_pred_series.name)

CHURN_FLAG
CHURN_FLAG
CHURN_PREDICTED


In [54]:
y_train_df = y_train.to_frame()
y_test_df = y_test.to_frame()
y_pred_df = y_pred_series.to_frame()

In [None]:
## registering the model in Fosfor Insight Designer.
register_model(
    model_obj=clf, 
    session=my_session,
    x_train=X_train,
    y_train=y_train_df,
    x_test=X_test,
    y_test=y_test_df,
    y_pred=y_pred_df,
    y_prob = y_pred_df,
    source="Notebook",
    dataset_name="MEMBER_FUNDS_ENRICHED_DETAILS",
    dataset_source="Snowflake",
    #dataset_source="InMemory",
    name="Binary_Churn_RF_Classifier",
    description="Random_Forest_Churn_Classifier",
    flavour="sklearn",
    model_type="classification",
    conda_dependencies=["scikit-learn==1.3.2"]
)

## Attempt 2 ##

In [70]:
im
with open('Churn_Classifier.pkl','rb') as f:  ## use rb while reading the fie
    churn_model = pickle.load(f)

NameError: name 'pickle' is not defined

In [57]:
table_name = 'MEMBER_FUNDS_ENRICHED_DETAILS'
sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [58]:
df = df.drop(["MEMBER_DOB_DT", 'ALLOCATION_DT', "CHURN_DT", "RETIREMENT_DT"], axis =1 )

In [59]:
frame = df[['MEMBER_GENDER', 'MEMBER_STATE', 'MEMBER_CONTACT_VERIFIED','FUND_TOTAL_ASSETS','FUND_RETURN_TARGET_PERCENTAGE',
            'INVESTMENT_RISK_CATEGORY', 'CASH_BENCHMARK_ALLOCATION','FIXED_INCOME_BENCHMARK_ALLOCATION',
            'DOMESTIC_LISTED_EQUITY_BENCHMARK_ALLOCATION','INTERNATIONAL_LISTED_EQUITY_BENCHMARK_ALLOCATION',
            'UNLISTED_EQUITY_BENCHMARK_ALLOCATION', 'EQUITY_BENCHMARK_ALLOCATION','PROPERTY_BENCHMARK_ALLOCATION',
            'INFRA_BENCHMARK_ALLOCATION','COMMODITIES_BENCHMARK_ALLOCATION', 'OTHERS_BENCHMARK_ALLOCATION','FUND_RISK_LEVEL', 
            'FUND_RISK_CATEGORY','NEG_NETRETURN_SINCE_INCEPTION', 'YEAR_1_RETURNS', 'YEAR_3_RETURNS','YEAR_5_RETURNS',
            'YEAR_7_RETURNS','YEAR_10_RETURNS', 'SUPER_FEES','PENSION_FEES',
            'INVESTMENT_AGE_GROUP', 'RETIREMENT_AGE_GROUP', 'TOTAL_FUNDS_INVESTED','CHURN_FLAG']].copy()

In [60]:
X = frame.drop("CHURN_FLAG", axis=1)
y = frame["CHURN_FLAG"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [61]:
def score(model, request):
    payload_dict = eval(request.json["payload"])
    data = pd.DataFrame.from_dict(payload_dict)
    frame = data[['MEMBER_GENDER', 'MEMBER_STATE', 'MEMBER_CONTACT_VERIFIED','FUND_TOTAL_ASSETS','FUND_RETURN_TARGET_PERCENTAGE',
            'INVESTMENT_RISK_CATEGORY', 'CASH_BENCHMARK_ALLOCATION','FIXED_INCOME_BENCHMARK_ALLOCATION',
            'DOMESTIC_LISTED_EQUITY_BENCHMARK_ALLOCATION','INTERNATIONAL_LISTED_EQUITY_BENCHMARK_ALLOCATION',
            'UNLISTED_EQUITY_BENCHMARK_ALLOCATION', 'EQUITY_BENCHMARK_ALLOCATION','PROPERTY_BENCHMARK_ALLOCATION',
            'INFRA_BENCHMARK_ALLOCATION','COMMODITIES_BENCHMARK_ALLOCATION', 'OTHERS_BENCHMARK_ALLOCATION','FUND_RISK_LEVEL', 
            'FUND_RISK_CATEGORY','NEG_NETRETURN_SINCE_INCEPTION', 'YEAR_1_RETURNS', 'YEAR_3_RETURNS','YEAR_5_RETURNS',
            'YEAR_7_RETURNS','YEAR_10_RETURNS', 'SUPER_FEES','PENSION_FEES',
            'INVESTMENT_AGE_GROUP', 'RETIREMENT_AGE_GROUP', 'TOTAL_FUNDS_INVESTED']].copy()
    result = model.predict(frame)
    prediction = pd.DataFrame({"CHURN_PREDICTION":result})
    prediction["CHURN_PREDICTION"] = prediction["CHURN_PREDICTION"].apply(lambda x: "Y" if x == 1 else "N")
    probability = model.predict_proba(frame)
    prediction["Probability"] = [round(k[1],4) for k in probability]    
    prediction = prediction.to_dict()
    return pd.DataFrame(prediction)

In [63]:
import requests

In [64]:
payload = df.head(10).to_dict()
req = requests.Request()
req.json = {"payload":str(payload)}
y = req
yo = score(clf, y)
yo

Unnamed: 0,CHURN_PREDICTION,Probability
0,Y,0.8526
1,Y,0.9883
2,Y,0.567
3,Y,0.7961
4,Y,0.8032
5,Y,0.5919
6,Y,0.8782
7,Y,0.5755
8,Y,0.9691
9,Y,0.9606


In [66]:
y_pred = yo["CHURN_PREDICTION"]
y_prob = yo["Probability"]

In [67]:
y_train = y_train.to_frame()
y_test = y_test.to_frame()
y_pred = y_pred.to_frame()
y_prob = y_prob.to_frame()

In [68]:
type(X_train), type(X_test),type(y_train), type(y_test), type(y_pred)

(pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame,
 pandas.core.frame.DataFrame)

In [69]:
register_model(
    model_obj=clf, 
    session=my_session,
    x_train=X_train,
    y_train=y_train,
    x_test=X_test,
    y_test=y_test,
    y_pred=y_pred,
    y_prob = y_prob,
    source="Notebook",
    dataset_name="MEMBER_FUNDS_ENRICHED_DETAILS",
    dataset_source="Snowflake",
    #dataset_source="InMemory",
    name="Binary_Churn_RF_Classifier",
    description="PREDICTING_FUND_CHURN",
    flavour="sklearn",
    model_type="classification",
    conda_dependencies=["scikit-learn==1.3.2"]
)

'Failed to load model artifacts. EOL while scanning string literal (<string>, line 1)'