# Use SUPER_ANNU_Template customized notebook template

In [1]:
#Snowpark lib
from snowflake.snowpark import Session

# Data Science Libs
import numpy as np
import pandas as pd

# create_temp_table warning suppresion
import warnings; warnings.simplefilter('ignore')

#ConfigParser to read ini file
import configparser

import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.cluster import DBSCAN

np.random.seed(0)

connection successful!


In [None]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [None]:
table_name = 'MEMBER_FUNDS_ENRICHED_DETAILS'

In [None]:
sf_df = my_session.sql("select * from {}".format(table_name))
df = sf_df.to_pandas()

In [3]:
frame = df[['MEMBER_GENDER', 'MEMBER_STATE', 'MEMBER_CONTACT_VERIFIED','FUND_TOTAL_ASSETS','FUND_RETURN_TARGET_PERCENTAGE',
            'INVESTMENT_RISK_CATEGORY', 'CASH_BENCHMARK_ALLOCATION','FIXED_INCOME_BENCHMARK_ALLOCATION',
            'DOMESTIC_LISTED_EQUITY_BENCHMARK_ALLOCATION','INTERNATIONAL_LISTED_EQUITY_BENCHMARK_ALLOCATION',
            'UNLISTED_EQUITY_BENCHMARK_ALLOCATION', 'EQUITY_BENCHMARK_ALLOCATION','PROPERTY_BENCHMARK_ALLOCATION',
            'INFRA_BENCHMARK_ALLOCATION','COMMODITIES_BENCHMARK_ALLOCATION', 'OTHERS_BENCHMARK_ALLOCATION','FUND_RISK_LEVEL', 
            'FUND_RISK_CATEGORY','NEG_NETRETURN_SINCE_INCEPTION', 'YEAR_1_RETURNS', 'YEAR_3_RETURNS','YEAR_5_RETURNS',
            'YEAR_7_RETURNS','YEAR_10_RETURNS', 'SUPER_FEES','PENSION_FEES',
            'INVESTMENT_AGE_GROUP', 'RETIREMENT_AGE_GROUP', 'TOTAL_FUNDS_INVESTED','CHURN_FLAG']].copy()

In [4]:
frame["CHURN_FLAG"] = frame["CHURN_FLAG"].apply(lambda x: 1 if x =="Y" else 0)

The distribution of CHURN is pretty common bettween the employers and hence wont contribute inn modelling

# Pull Model to Predict Churn Reason

In [9]:
import pickle

with open('reason_model.pkl','rb') as f:  ## use rb while reading the fie
    clf = pickle.load(f)

In [11]:
frame = clf.predict(frame)
df["CHURN_PREDICTED"] = frame

# FUND RECOMMENDATION

In [None]:
table_name = 'FUND_ALLOCATION_MASTER'

sf_df = my_session.sql("select * from {}".format(table_name))
fund_allocation = sf_df.to_pandas()

In [None]:
table_name = 'FUND_MASTER'

sf_df = my_session.sql("select * from {}".format(table_name))
fund_master = sf_df.to_pandas()

In [None]:
table_name = 'FUND_PERFORMANCE_MASTER'

sf_df = my_session.sql("select * from {}".format(table_name))
fund_performance = sf_df.to_pandas()

In [24]:
fund = pd.merge(fund_allocation, fund_master, on="FUND_ID")
fund_total = pd.merge(fund, fund_performance, on="FUND_ID")
fund_charge = [float(i) for i in fund_total["FUND_FEES_CHARGES"] if i != 'Not Available']
avg_fund_charge = sum(fund_charge) / len(fund_charge)

fund_total["FUND_FEES_CHARGES"] = fund_total["FUND_FEES_CHARGES"].apply(lambda x: avg_fund_charge if x == 'Not Available' else x)
fund_total["FUND_FEES_CHARGES"] = fund_total["FUND_FEES_CHARGES"].astype(float) 

fund_meta = fund_total[["FUND_ID","FUNDNAME"]]
fund_cluster = fund_total.drop(["FUND_ID","FUNDNAME","FUND_NAME_y","FUND_TRUSTEE","FUND_INCEPTION_DATE"], axis=1)

In [25]:
fund_data = pd.get_dummies(fund_cluster)

fund_data = fund_data.replace(False, 0)
fund_data = fund_data.replace(True, 1)

In [26]:
clustering = DBSCAN(eps=2, min_samples=3).fit(fund_data)

In [27]:
fund_total["cluster"] = clustering.labels_
fund_total["avg_return"] = fund_total[["YEAR_1_RETURNS","YEAR_3_RETURNS",
                                 "YEAR_5_RETURNS","YEAR_7_RETURNS",
                                 "YEAR_10_RETURNS"]].apply(lambda x:
                                                        (x[0] + (0.9*x[1]) + (0.75*x[2]) + (0.6*x[3]) + (0.55*x[4]))/5, axis=1)

In [28]:
def recommendation(fund_id, reason):
    if reason == 'NO REASON IDENTIFIED' or reason == "NOT APPLICABLE":
        return "Not Applicable"
    elif reason == 'HIGH ACCOUNT FEE' or reason == 'HIGH TRANSACTION FEE':
        clus_frame = fund_total[fund_total["FUND_ID"] == fund_id]
        clus = list(clus_frame["cluster"])[0]
        clus_frame["charge"] = clus_frame[["SUPER_FEES","PENSION_FEES","FUND_FEES_CHARGES"]].apply(lambda x: (x[0]+x[1]+x[2])/3, axis=1)
        total_charge = list(clus_frame["charge"])[0]
        group = fund_total[fund_total["cluster"] == clus]
        group["avg_fee"] = group[["SUPER_FEES","PENSION_FEES","FUND_FEES_CHARGES"]].apply(lambda x: (x[0]+x[1]+x[2])/3, axis=1)
        group["rec"] = group["avg_fee"].apply(lambda x: 1 if x < total_charge else 0)
        group = group.sort_values("avg_return")
        recommendations = list(group[group["rec"] == 1]["FUND_ID"])
        if fund_id in recommendations:
            recommendations.remove(fund_id)
        recommendations = recommendations[:5]
        if len(recommendations) >= 1:
            recommendations = np.random.choice(recommendations,1)[0]
        else:
            return "Not Applicable"
        return recommendations

    elif reason == 'FUND REPUTATION DECLINING' or reason == 'POOR FUND PERFORMANCE':
        clus_frame = fund_total[fund_total["FUND_ID"] == fund_id]
        clus = list(clus_frame["cluster"])[0]
        group = fund_total[fund_total["cluster"] == clus]
        group = group.sort_values("avg_return")
        recommendations = list(group["FUND_ID"])
        if fund_id in recommendations:
            recommendations.remove(fund_id)
        recommendations = recommendations[:5]
        if len(recommendations) >= 1:
            recommendations = np.random.choice(recommendations,1)[0]
        else:
            return "Not Applicable"
        return recommendations
    
    else:
        return "Not Applicable"

rec = recommendation("FID000014","HIGH ACCOUNT FEE")
print(len(rec))

9


In [29]:
rec

'FID000016'

In [29]:
from tqdm import tqdm

In [30]:
rec_fund = []
for fund_id, f_reason, churn in tqdm(zip(df["FUND_ID"], df["CHURN_REASON"], df["CHURN_PREDICTED"])):
    if churn == 1:
        rec = recommendation(fund_id,f_reason)
        rec_fund.append(rec)
    else:
        rec_fund.append("Not Applicable")
    

335999it [03:33, 1576.72it/s]


In [31]:
df["funds_rec"] = rec_fund

In [32]:
#df.to_csv("/data/funds_recommendated.csv", index=False)

In [33]:
#fund_total.to_csv("/data/FUNDS_COMPLETE_DATA.csv", index=False)

In [100]:
df = df.rename(columns={'FUNDS RECOMMENDATIONS': 'FUNDS_RECOMMENDATIONS'})
df["FUNDS_RECOMMENDATIONS"] = df["FUNDS_RECOMMENDATIONS"].astype(str)

In [98]:
for i, val in tqdm(enumerate(df['FUNDS_RECOMMENDATIONS'])):
    df["FUNDS_RECOMMENDATIONS"].iloc[i] = str(val)

In [110]:
from snowflake.snowpark.types import StructType, StructField, StringType, IntegerType, FloatType

schema = StructType([
    StructField("MEMBER_ID", StringType()),
    StructField("MEMBER_NAME", StringType()),
    StructField("MEMBER_EMPLOYMENT", StringType()),
    StructField("MEMBER_GENDER", StringType()),
    StructField("MEMBER_CITY_TOWN", StringType()),
    StructField("MEMBER_STATE", StringType()),
    StructField("MEMBER_CONTACT_VERIFIED", StringType()),
    StructField("FUND_ID", StringType()),
    StructField("CHURN_REASON", StringType()),
    StructField("LATITUDE", FloatType()),
    StructField("LONGITUDE", FloatType()),
    StructField("COUNTRY", StringType()),
    StructField("COUNTRYCODE", StringType()),
    StructField("CAPITAL", StringType()),
    StructField("POPULATION", IntegerType()),
    StructField("FUND_TOTAL_ASSETS", FloatType()),
    StructField("FUND_RETURN_TARGET_PERCENTAGE", FloatType()),
    StructField("INVESTMENT_RISK_LEVEL", FloatType()),
    StructField("INVESTMENT_RISK_CATEGORY", StringType()),
    StructField("CASH_BENCHMARK_ALLOCATION", FloatType()),
    StructField("FIXED_INCOME_BENCHMARK_ALLOCATION", FloatType()),
    StructField("DOMESTIC_LISTED_EQUITY_BENCHMARK_ALLOCATION", FloatType()),
    StructField("INTERNATIONAL_LISTED_EQUITY_BENCHMARK_ALLOCATION", FloatType()),
    StructField("UNLISTED_EQUITY_BENCHMARK_ALLOCATION", FloatType()),
    StructField("EQUITY_BENCHMARK_ALLOCATION", FloatType()),
    StructField("PROPERTY_BENCHMARK_ALLOCATION", FloatType()),
    StructField("INFRA_BENCHMARK_ALLOCATION", FloatType()),
    StructField("COMMODITIES_BENCHMARK_ALLOCATION", FloatType()),
    StructField("OTHERS_BENCHMARK_ALLOCATION", FloatType()),
    StructField("FUND_RISK_LEVEL", FloatType()),
    StructField("FUND_RISK_CATEGORY", StringType()),
    StructField("NEG_NETRETURN_SINCE_INCEPTION", StringType()),
    StructField("YEAR_1_RETURNS", FloatType()),
    StructField("YEAR_3_RETURNS", FloatType()),
    StructField("YEAR_5_RETURNS", FloatType()),
    StructField("YEAR_7_RETURNS", FloatType()),
    StructField("YEAR_10_RETURNS", FloatType()),
    StructField("SUPER_FEES", FloatType()),
    StructField("PENSION_FEES", FloatType()),
    StructField("CHURN_FLAG", StringType()),
    StructField("MEMBER_AGE", IntegerType()),
    StructField("INVESTMENT_AGE_GROUP", StringType()),
    StructField("RETIREMENT_AGE", IntegerType()),
    StructField("RETIREMENT_AGE_GROUP", StringType()),
    StructField("TOTAL_FUNDS_INVESTED", IntegerType()),
    StructField("NO_OF_CHURN", StringType()),
    StructField("MEMBER_DOB_DT", StringType()),
    StructField("ALLOCATION_DT", StringType()),
    StructField("CHURN_DT", StringType()),
    StructField("RETIREMENT_DT", StringType()),
    StructField("CHURN_PREDICTED", IntegerType()),
    StructField("unique_id", IntegerType()),
    StructField("CHURN_REASON_PREDICTED", StringType()),
    StructField("FUNDS_RECOMMENDATIONS", StringType())
])

In [111]:
df_snowflake = session.createDataFrame(df.values.tolist(), schema=schema)

In [112]:
df_snowflake.write.mode("overwrite").save_as_table("FDC_Banking_FS.BFS_SUPER_ANNU_SCHEMA.FUNDS_COMPLETE_DATA")

In [108]:
temp_ = {}
for i, val in tqdm(enumerate(df['FUNDS_RECOMMENDATIONS'])):
    df["FUNDS_RECOMMENDATIONS"].iloc[i] = str(val)
#     temp_[val] =type(val)
    

335999it [02:19, 2414.77it/s]


In [115]:
fund_master["WEIGHTED_AVG_RETURN"] = fund_total["avg_return"]

In [116]:
fund_master["AVG_FEES"] = fund_total[["SUPER_FEES","PENSION_FEES","FUND_FEES_CHARGES"]].apply(lambda x: (x[0]+x[1]+x[2])/3, axis=1)

In [120]:
def get_return_rating(x):
    if x == '2 to 3':
        return 4.0
    if x == '1 to 2': 
        return 4.2
    if x == '4 to 6':
        return 3.5
    if x == '0.5 to 1':
        return  4.5
    if x == '3 to 4':
        return 3.8
    else:
        return 4.8

fund_master["NEG_RETURN_RATING"] = fund_total["NEG_NETRETURN_SINCE_INCEPTION"].apply(get_return_rating)

In [122]:
fund_master = fund_master.drop("AVG_RETURN", axis=1)

In [123]:
df_snowflake = session.createDataFrame(fund_master.values.tolist(), schema=fund_master.columns.tolist())

In [124]:
df_snowflake.write.mode("overwrite").save_as_table("FDC_Banking_FS.BFS_SUPER_ANNU_SCHEMA.FUND_MASTER")