In [263]:
import tensorflow as tf
import pandas as pd
from tensorflow.keras.models import load_model
import pickle
from sklearn.preprocessing import MinMaxScaler
from flask import jsonify
from flask import request, make_response,Response
from tensorflow.keras.initializers import glorot_uniform
import numpy as np
import warnings
import json 
from flask import Flask
warnings.filterwarnings("ignore")

In [259]:
template={"score":"","max_score": "700", "improve":""}

In [266]:
def get_response(json):
    try:
        
        return Response(json, mimetype='application/json')
    except Exception as ex:
        print(ex,json)
        return get_exception_response(ex)

In [265]:
def get_exception_response(ex):
    return Response(ex,500)

In [313]:
def get_scoring_result(user_profile):
    try:
        user_df=pd.read_json(user_profile)
        user_data=get_formatted_input(user_df) 
        score=get_user_score(user_data)
        improvements=get_improvement_prediction(user_data)
        template["score"]=score
        template["improve"]=improvements
        template["max_score"]="700"
        
        json_data=json.dumps(template)
        return score,improvements
    except Exception as ex:
        return get_exception_response(ex)

In [278]:
def get_user_score(user_data):
    
    #Reading the model from JSON file
    with open("model_user_score.json", 'r') as json_file:
        json_savedModel= json_file.read()
    #load the model architecture 
    model = tf.keras.models.model_from_json(json_savedModel)    
    model.load_weights("Weights-048--21.52199.hdf5") # load it
    model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])    
    
    predictions = int(model.predict(user_data))
    return predictions

In [154]:
def get_improvement_prediction(user_data):
    IMPROVEMENT_MODEL="improvement_nn_model.h5"
    model = load_model(IMPROVEMENT_MODEL, compile = False)
    
    predictions = model.predict(user_data)
    top_2=(-predictions.ravel()).argsort()[:2]
    improvements=pd.read_pickle("improvement_mapping.pkl")
    indexs=improvements[(improvements["index"].isin(top_2))]["improvement"].to_list()
    improve_df=pd.DataFrame(improvement_dict)
    improve_text=improve_df[improve_df["improvement"].isin(indexs)]["text"].to_list()
    return improve_text

In [129]:
improvement_dict=[{'improvement': 'diversity_score',
  'text': "Diversify your mutual funds. Don't invest only in one type of funds"},
 {'improvement': 'roi_score',
  'text': 'Your overall mutual fund rate of return is low. Try to invest in high return funds'},
 {'improvement': 'risk_score',
  'text': 'Diversify your risk. Most of the funds are under same risk.'},
 {'improvement': 'investment_score',
  'text': "You can have funds available for investments. It's a good practice to invest 10-15% of your funds"},
 {'improvement': 'time_score',
  'text': 'Its good to invest the mutual funds for a longer period to get the power of compound interest'},
 {'improvement': 'credit_score',
  'text': 'Try to pay outstanding on time as impacts'},
 {'improvement': 'confidence_score',
  'text': 'Try to invest for more time and more funds, to get higher returns using the power of compound interest'}]

In [130]:
def cat_types_weights(cat_types_freq):
    total=np.sum(list(cat_types_freq.values()))
    mf_types_weight =np.sum([0.2*(freq/total) for mf,freq in cat_types_freq.items()]) 
    return mf_types_weight

In [68]:
def calculate_cumulative_freq(df,col,freq_col):
        cumulative_dict=df[col].value_counts().to_dict()
        print(cumulative_dict)
        cumulative_sum= np.sum([row[col]*row[freq_col] for idx,row in df.iterrows()])
        return cumulative_sum

In [104]:
def get_freq(df,col,key):
    cumulative_dict=df[col].value_counts().to_dict()
    if key not in cumulative_dict.keys():
        return 0
    return cumulative_dict[key]

In [84]:
def calculate_cumulative_sum(df,col,categoy=False,func=None):
        if categoy:
            df[col]=df[col].astype("category").cat.codes
            df[col]=df[col]+1
        
        cumulative_dict=df[col].value_counts().to_dict()
        if func is not None:
            return func(cumulative_dict)
        cumulative_sum= np.sum([k*v for k,v in cumulative_dict.items()])
        return cumulative_sum

In [281]:
def get_formatted_input(df):    
    #Converting the datatype to Datetime
    df['last_payment_date']=pd.to_datetime(df["last_payment_date"],format='%Y-%m-%d')
    #Converting the datatype to Datetime
    df['first_loan_start_date']=pd.to_datetime(df["first_loan_start_date"],format='%Y-%m-%d')
    #credit history
    df['credit_history']=(df['last_payment_date']-df['first_loan_start_date'])/np.timedelta64(1, 'D')

    #current level of indebtedness
    df['indebtedness']=df['loan_payment_till_date']/df['total_loan']*100
    if (df['indebtedness'].isnull().values.any()):
        df['indebtedness']=0
        
    df["cumulative_tenure"]=calculate_cumulative_sum(df,"mutual_fund_held_since")


    df["cumulative_inv_amount"]=calculate_cumulative_sum(df,"mutual_fund_amount")
    df["cumulative_risk"]=calculate_cumulative_sum(df,"mutual_fund_risk",False,cat_types_weights)

    df["cumulative_mf_cat"]=calculate_cumulative_sum(df,"mutual_fund_category",False,cat_types_weights)
    df["total_tenure"]=np.sum(df["mutual_fund_held_since"])
    df["avg_mutual_fund_amount"]=np.sum(df["mutual_fund_amount"])
    df["avg_mutual_fund_return"]=np.sum(df["mutual_fund_return"])
    
    
    mf_cat_cols={"Equity Scheme":'mf_cat_equity_scheme',
     "Debt Scheme":'mf_cat_debt_scheme',
     "Other Scheme":'mf_cat_other_scheme',
     "Hybrid Scheme":'mf_cat_hybrid_scheme',
     "Solution Scheme":'mf_cat_solution_scheme',
     "Others":'mf_cat_others'}

    for k,v in mf_cat_cols.items():
        df[v]=get_freq(df,"mutual_fund_category",v)

    model_columns=['months_since_default', 'credit_history', 'open_acc', 'indebtedness',
           'avg_mutual_fund_amount', 'avg_mutual_fund_return', 'account_balance', 'cumulative_tenure',
           'cumulative_inv_amount', 'cumulative_risk', 'cumulative_mf_cat', 'total_tenure',
           'mf_cat_equity_scheme', 'mf_cat_debt_scheme', 'mf_cat_other_scheme',
           'mf_cat_hybrid_scheme', 'mf_cat_solution_scheme', 'mf_cat_others']
    
    input_data=df[model_columns].drop_duplicates().head(1)
#     scaler=MinMaxScaler()
#     user_data=scaler.fit_transform(input_data)
    return input_data

In [117]:
df.to_pickle("test_train_score_data.pkl.bz2",compression="bz2")

In [212]:
df2=pd.read_pickle("../test_train_scoring_data.pkl.bz2",compression="bz2")

In [213]:
df2

Unnamed: 0,last_payment_date,first_loan_start_date,open_acc,loan_payment_till_date,total_loan,member_id,mutual_fund_amount,mutual_fund_id,defaul_payment_hist,mutual_fund_return,account_balance,mutual_fund_held_since
0,2015-01-01,1985-01-01,3,0.0,4975.0,1296599,16153,101186,28.0,1696065.00,108776.0,13
1,2015-01-01,1985-01-01,3,0.0,4975.0,1296599,7764,126708,28.0,1871124.00,108776.0,12
2,2015-01-01,1985-01-01,3,0.0,4975.0,1296599,11489,119819,28.0,11489.00,108776.0,18
3,2015-01-01,1985-01-01,3,0.0,4975.0,1296599,7467,141288,28.0,485355.00,108776.0,8
4,2015-01-01,1985-01-01,3,0.0,4975.0,1296599,19289,141929,28.0,19289.00,108776.0,5
...,...,...,...,...,...,...,...,...,...,...,...,...
383076,2008-01-01,1988-11-01,17,0.0,650.0,89243,16057,118318,22.0,15254.15,292704.5,5
383077,2010-06-01,2003-10-01,7,0.0,800.0,86999,8805,138897,0.0,7660.35,61465.5,13
383078,2010-06-01,2003-10-01,7,0.0,800.0,86999,4798,102760,0.0,273486.00,61465.5,7
383079,2010-06-01,2003-10-01,7,0.0,800.0,86999,14989,144579,0.0,14989.00,61465.5,19


In [286]:
user_sample=df3[df3["member_id"]==89243]

for col in user_sample.columns:
    user_sample[col]=user_sample[col].astype(str)

user_sample.to_json("test_data_user1_score.json",orient="records")

In [143]:
df["first_loan_start_date"]=df["first_loan_start_date"].astype(str)

In [194]:
df["member_id"].value_counts()

1219327    1
804094     1
713301     1
367929     1
1108890    1
          ..
1110004    1
448641     1
809383     1
713397     1
1190406    1
Name: member_id, Length: 128, dtype: int64

In [169]:
df.tail(1).fillna(0).to_json("test_data_user_score.json",orient="records")

In [209]:
mf_df=pd.read_pickle("mf_data.pkl.bz2",compression="bz2")

In [214]:
df3=df2.merge(mf_df,left_on="mutual_fund_id",right_on="scheme_code",how="inner").drop("scheme_code",axis=1)

In [216]:
cols=["scheme_name","scheme_category","popularity","rank","mf_sub_category"]

In [217]:
df3.drop(cols,axis=1,inplace=True)

In [229]:
df3.rename(columns={"defaul_payment_hist":"months_since_default"},inplace=True)

In [None]:
df3.rename(columns={})

In [291]:
pd.read_json("test_data_user1_score.json").T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
last_payment_date,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01,2008-01-01
first_loan_start_date,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01,1988-11-01
open_acc,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17
loan_payment_till_date,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
total_loan,650,650,650,650,650,650,650,650,650,650,650,650,650,650,650,650
member_id,89243,89243,89243,89243,89243,89243,89243,89243,89243,89243,89243,89243,89243,89243,89243,89243
mutual_fund_amount,6310,12790,13966,12503,17539,19310,4278,11551,6300,15075,13875,10910,3192,19538,14459,16057
mutual_fund_id,121145,120391,129736,128989,147866,148266,129330,146609,126399,128924,144337,125328,146595,107525,143433,118318
months_since_default,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22,22
mutual_fund_return,1.24307e+06,121505,11731.4,1.76292e+06,1.70128e+06,1.25515e+06,551862,11551,5859,1.10048e+06,3.89888e+06,9600.8,20748,18561.1,2.21223e+06,15254.1


In [292]:
user_formatted=get_formatted_input(pd.read_json("test_data_user1_score.json"))

In [293]:
model_columns=['months_since_default', 'credit_history', 'open_acc', 'indebtedness',
           'avg_mutual_fund_amount', 'avg_mutual_fund_return', 'account_balance', 'cumulative_tenure',
           'cumulative_inv_amount', 'cumulative_risk', 'cumulative_mf_cat', 'total_tenure',
           'mf_cat_equity_scheme', 'mf_cat_debt_scheme', 'mf_cat_other_scheme',
           'mf_cat_hybrid_scheme', 'mf_cat_solution_scheme', 'mf_cat_others']

In [294]:
user_formatted

Unnamed: 0,months_since_default,credit_history,open_acc,indebtedness,avg_mutual_fund_amount,avg_mutual_fund_return,account_balance,cumulative_tenure,cumulative_inv_amount,cumulative_risk,cumulative_mf_cat,total_tenure,mf_cat_equity_scheme,mf_cat_debt_scheme,mf_cat_other_scheme,mf_cat_hybrid_scheme,mf_cat_solution_scheme,mf_cat_others
0,22,7000.0,17,0.0,197653,13940675.49,292704.5,188,197653,0.2,0.2,188,0,0,0,0,0,0


In [324]:
df_user=pd.DataFrame(columns=["user","score","improvement1","improvement2"])

In [325]:
from tqdm.notebook import tqdm

In [None]:
df_user["user"]=list(set(df3["member_id"]))
for idx,user in tqdm(df_user["user"].items(),total=len(df_user["user"])):
    user_sample=df3[df3["member_id"]==user]

    for col in user_sample.columns:
        user_sample[col]=user_sample[col].astype(str)

    json_sample=user_sample.to_json(orient="records")
    score,improve=get_scoring_result(json_sample)
    df_user.at[idx,"score"]=score
    df_user.at[idx,"improvement1"]=improve[0]
    df_user.at[idx,"improvement2"]=improve[1]
    

  0%|          | 0/37693 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [300]:
get_scoring_result("test_data_user1_score.json")

{'score': 178, 'max_score': '700', 'improve': ['Its good to invest the mutual funds for a longer period to get the power of compound interest', 'Try to invest for more time and more funds, to get higher returns using the power of compound interest']}


178

In [329]:
df_user["score"].value_counts()

188    19
187    19
190    19
189    17
191    16
       ..
224     1
220     1
218     1
396     1
162     1
Name: score, Length: 90, dtype: int64