In [1]:
#import data processing libraries

import os
import pandas as pd
import numpy as np
import math as math
import datetime as dt
from scipy import stats

#db connection libraries
import psycopg2
from sqlalchemy import create_engine
import psycopg2.extras as extras

In [2]:
pd.set_option('display.max_colwidth', 500)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 300)

#suppress scientific notation to 2 decimal places

pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [3]:
host = '157.245.248.249'
port = int(5432)
dbname = 'ubuntu'
user = 'jacklinengenia'
password = 'x3MX&8#!'


def get_query_results_postgres():
    with psycopg2.connect(host = host,
                          port = port,
                          database = dbname,
                          user = user,
                          password = password) as conn:
        sql = "SELECT * FROM bloomlive.idm_refresh_view"
        df = pd.read_sql(sql, conn)
        
    return df

    conn.close()
    
if __name__ == "__main__":
    get_query_results_postgres() 

In [4]:
idm_df = get_query_results_postgres()

idm_df.head(3)

Unnamed: 0,id,idnumber,mydpdl90d,maxloanamamountlast6months,nownloans,nrepaidloans,nrepaidloanshighutiliz,maxowndpdever,recommendeddecision,creditlimit7days,creditlimit30days,cipscore,cipriskgrade,mobilescore,mobilescoreriskgrade,idm_refresh_date,created_at,full_name,date_of_birth,age,gender,marital_status,employment_status,reference_number,rnk
0,85996,0,,,,,,,Reject,,,,,,,2022-06-18,2022-06-19 12:39:55.131599,Subject Not Found in IPRS and CBS,,,,,,31769321-0,1
1,176400,193,,,,,,,Reject,,,,,,,2022-07-05,2022-07-06 12:22:00.039305,Subject Not Found in IPRS and CBS,,,,,,32113515-941433372,1
2,192877,6255,,,,,,,Approve,0.0,0.0,,,,,2022-07-19,2022-07-20 12:22:19.238376,Felix Kibue,1951-12-30,70.0,Male,NotSpecified,NotSpecified,32314943-792461395,1


In [5]:
#rename columns

idm_df.rename(columns = {"idnumber": "national_id",
                        "recommendeddecision": "IDM_recommendation",
                        "creditlimit30days": "IDM_limit"}, inplace = True)

In [6]:
#trim df to only the relevant columns

idm_df = idm_df[["national_id","IDM_recommendation","IDM_limit"]]

In [7]:
# remove any rows without a national id
idm_df = idm_df[idm_df["national_id"].notnull()]


#convert national id to string column type
idm_df["national_id"] = idm_df["national_id"].astype(str)

In [8]:
#remove duplicate national_id column

idm_df = idm_df.loc[:,~idm_df.columns.duplicated()]

In [9]:
#filter out rows where "IDM_recommendation" column is blank

idm_df = idm_df.loc[idm_df["IDM_recommendation"].notnull()]

In [10]:
#remove any whitespaces that may cause issues for specific column

idm_df["national_id"] = idm_df["national_id"].apply(lambda x: x.split(".")[0])
idm_df["national_id"] = idm_df["national_id"].apply(lambda x: x.split(" ")[0])

In [11]:
idm_df.shape

(122572, 3)

In [12]:
idm_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 122572 entries, 0 to 122592
Data columns (total 3 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   national_id         122572 non-null  object 
 1   IDM_recommendation  122572 non-null  object 
 2   IDM_limit           99225 non-null   float64
dtypes: float64(1), object(2)
memory usage: 3.7+ MB


In [13]:
idm_df['IDM_recommendation'].value_counts()

Reject     87217
Approve    35355
Name: IDM_recommendation, dtype: int64

In [14]:
host = '157.245.248.249'
port = int(5432)
dbname = 'ubuntu'
user = 'jacklinengenia'
password = 'x3MX&8#!'


def get_query_results_postgres():
    with psycopg2.connect(host = host,
                          port = port,
                          database = dbname,
                          user = user,
                          password = password) as conn:
        sql = "select * from bloomlive.client_summary_view csv2 where is_iprs_validated is true"
        df = pd.read_sql(sql, conn)
        
    return df

    conn.close()

In [15]:
# loading data from scoring results table
store_nums_df = get_query_results_postgres()

store_nums_df.head(3)

Unnamed: 0,bloom_version,surrogate_id,mifos_id,mobile_number,store_number,national_id,first_name,middle_name,last_name,iprs_first_name,iprs_other_name,iprs_surname,date_of_birth,gender,status,submitted_on_date,client_type,company_name,provided_first_name,iprs_name_matched,is_iprs_checked,is_iprs_validated
0,2.0,174245,92337,254727059406,7357462,28167918,Sylvia,Mumbi,Ngunga,Sylvia,Mumbi,Ngunga,2022-04-29,,Active,2022-04-29,,SYLVIA MUMBI,,True,True,True
1,2.0,173856,91945,254714958267,7783183,25721072,Edigar,Litunda,Segero,Edigar,Litunda,Segero,2022-04-28,,Active,2022-04-28,,EDIGAR LITUNDA,,True,True,True
2,2.0,173340,91422,254724904951,7476806,25243763,Salome,Wairimu,Ingosi,Salome,Wairimu,Ingosi,2022-04-27,,Active,2022-04-27,,SALOME WAIRIMU,,True,True,True


In [16]:
store_nums_df['bloom_version'].nunique()

2

In [17]:
store_nums_df['is_iprs_validated'].nunique()

1

In [18]:
store_nums_df.shape

(130914, 22)

In [19]:
store_nums_df = store_nums_df.drop_duplicates(subset=['store_number'], keep='first')

store_nums_df.shape

(95046, 22)

In [20]:
#load mapping for id and store numbers
#store_number_path = "C:\\Project_summaries\\Bloom\\Bloom all_loans\\20220721\\"


#load df
#store_nums_df = pd.read_excel(store_number_path+"client_summaries.xlsx")


#trim df to only target cols
store_nums_cols = ["national_id", "store_number", "is_iprs_validated"]
store_nums_df = store_nums_df[store_nums_cols]


#convert col to string
store_nums_df["national_id"] = store_nums_df["national_id"].astype("str")
store_nums_df["is_iprs_validated"] = store_nums_df["is_iprs_validated"].astype("str")

#store_nums_df["store_number"] = store_nums_df["store_number"].astype("str")



#clean up mobile number column
#store_nums_df[["national_id_2", "temp"]] = store_nums_df["national_id"].astype("str").str.split(".", expand=True)


#drop newly created temp column
#store_nums_df.drop(["temp","national_id"], axis=1, inplace=True)


#rename columns
#store_nums_df.rename(columns={"SHORT_CODE":"store_number","national_id":"national_id"}, inplace=True)

In [21]:
store_nums_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 95046 entries, 0 to 130911
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   national_id        95046 non-null  object
 1   store_number       95045 non-null  object
 2   is_iprs_validated  95046 non-null  object
dtypes: object(3)
memory usage: 2.9+ MB


In [22]:
#left merge to assign idm df with store numbers

idm_df = pd.merge(idm_df, store_nums_df, how="left", on="national_id")

In [23]:
idm_df.head(3)

Unnamed: 0,national_id,IDM_recommendation,IDM_limit,store_number,is_iprs_validated
0,0,Reject,,,
1,193,Reject,,,
2,6255,Approve,0.0,986801.0,True


In [24]:
idm_df['IDM_recommendation'].value_counts()

Reject     90514
Approve    36787
Name: IDM_recommendation, dtype: int64

In [25]:
idm_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 127301 entries, 0 to 127300
Data columns (total 5 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   national_id         127301 non-null  object 
 1   IDM_recommendation  127301 non-null  object 
 2   IDM_limit           103916 non-null  float64
 3   store_number        95053 non-null   object 
 4   is_iprs_validated   95054 non-null   object 
dtypes: float64(1), object(4)
memory usage: 5.8+ MB


In [26]:
idm_df.shape

(127301, 5)

---

#### Import Ubuntu scoring summary data

In [27]:
#till_data path

till_data_path = "C:\\Project_summaries\\Bloom\\Bloom all_loans\\20220721\\Till_data_summaries\\"

#load till df
ubuntu_scoring_summary_df = pd.read_excel(till_data_path+"Bloom_scoring_trx_data_20220721.xlsx")


#drop any extra unnecessary columns
ubuntu_scoring_summary_df = ubuntu_scoring_summary_df[ubuntu_scoring_summary_df.columns.drop(list(ubuntu_scoring_summary_df.filter(regex="Unnamed")))]


# remove any rows without a national id
ubuntu_scoring_summary_df = ubuntu_scoring_summary_df[ubuntu_scoring_summary_df["store_number"].notnull()]

In [28]:
#create  copy of df

till_data = ubuntu_scoring_summary_df.copy()

#remove duplicate rows from till_data
till_duplicates = till_data.loc[till_data.duplicated()]
till_data = till_data.loc[~till_data.duplicated()]


#convert national_id column to string
till_data["national_id"] = till_data["national_id"].astype(str)


#remove any whitespaces that may cause issues for specific column
till_data["national_id"] = till_data["national_id"].apply(lambda x: x.split(".")[0])
till_data["national_id"] = till_data["national_id"].apply(lambda x: x.split(" ")[0])

#drop national id column
till_data.drop(columns="national_id", inplace=True)

In [29]:
idm_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 127301 entries, 0 to 127300
Data columns (total 5 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   national_id         127301 non-null  object 
 1   IDM_recommendation  127301 non-null  object 
 2   IDM_limit           103916 non-null  float64
 3   store_number        95053 non-null   object 
 4   is_iprs_validated   95054 non-null   object 
dtypes: float64(1), object(4)
memory usage: 5.8+ MB


In [30]:
till_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 80289 entries, 0 to 80288
Data columns (total 12 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   mobile_number                      64309 non-null  object        
 1   store_number                       80289 non-null  int64         
 2   approx_30_days_trx_val             80289 non-null  float64       
 3   most_recent_trx_date_past_30_days  80289 non-null  datetime64[ns]
 4   last_trx_date                      80289 non-null  datetime64[ns]
 5   expected_trx_days                  80289 non-null  int64         
 6   actual_trx_days                    80289 non-null  int64         
 7   page_active_days                   80289 non-null  float64       
 8   inference_col                      80289 non-null  object        
 9   days_since_last_trx                80289 non-null  int64         
 10  transacted_last_5_days            

In [31]:
till_data['store_number'] = till_data['store_number'].astype(str)

In [32]:
#merge idm_df & till_data

df = till_data.merge(idm_df, how="left", on="store_number")

In [33]:
df.head(3)

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,IDM_recommendation,IDM_limit,is_iprs_validated
0,254711519875,101212,12530.0,2022-07-04,2022-07-21,18,6,0.33,No_rules_relaxed,4,Yes,1.0,8026792,Approve,5000.0,True
1,254716180492,105295,543428.75,2022-06-26,2022-07-24,29,28,0.97,relax_rules,1,Yes,1.0,27881033,Approve,4500.0,True
2,254701582431,105581,3603.0,2022-07-05,2022-07-22,18,4,0.22,No_rules_relaxed,3,Yes,1.0,32339396,Reject,0.0,True


In [34]:
df['IDM_recommendation'].value_counts()

Reject     42083
Approve    22228
Name: IDM_recommendation, dtype: int64

In [35]:
df.shape

(80297, 16)

---

#### Load loans summary

In [36]:
#load loans summary df

path_2 = "C:\\Project_summaries\\Bloom\\Bloom all_loans\\20220721\\Analysis_summaries\\"

loans_summary = pd.read_excel(path_2+"Bloom_clients_loans_summary_20220721.xlsx")


#drop unnecassary column
loans_summary = loans_summary[loans_summary.columns.drop(list(loans_summary.filter(regex="Unnamed")))]

In [37]:
loans_summary.head(3)

Unnamed: 0,client_mobile_number,store_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,inference_col,weight_dpd
0,254726604388.0,6,7,300,7,200000,0.0,2022-07-24,2022-07-31,NaT,2022-07-31,-7.0,2,current_active,200000,2022-07-04,3,0.43,0,No_rules_relaxed,0.0
1,,7,21,300,7,1300,0.0,2022-07-23,2022-07-30,NaT,2022-07-30,-6.0,2,current_active,200000,2022-04-12,10,0.48,1,No_rules_relaxed,0.0
2,254720272826.0,11,3,600,30,5000,5000.0,2019-12-01,2019-12-31,2019-12-30,2019-12-31,-1.0,1,closed_early_repayment,5000,2019-12-01,3,1.0,966,relax_rules,1.0


In [38]:
loans_summary.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45309 entries, 0 to 45308
Data columns (total 21 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   client_mobile_number              42645 non-null  float64       
 1   store_number                      45309 non-null  int64         
 2   loan_count                        45309 non-null  int64         
 3   loan_status                       45309 non-null  int64         
 4   term_frequency                    45309 non-null  int64         
 5   principal_disbursed               45309 non-null  int64         
 6   principal_repaid                  45309 non-null  float64       
 7   disbursed_on_date                 45309 non-null  datetime64[ns]
 8   expected_matured_on_date          45309 non-null  datetime64[ns]
 9   closed_on_date                    14642 non-null  datetime64[ns]
 10  due_date_fixed                    45309 non-nu

In [39]:
#which customers do we have loans history but no till data summaries?

bloom_customers_no_till = list(set(loans_summary["store_number"]) - set(df["store_number"]))

In [40]:
#which customers do we have loans history but no reported store_number?
bloom_customers_no_store_num = loans_summary[loans_summary["store_number"].isnull()][["client_mobile_number","store_number","bloom_version","loan_count","loan_repayment_status"]]


#remove customers whom we don't have tills for
#loans_summary = loans_summary[loans_summary["store_number"].notnull()]

In [41]:
loans_summary.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45309 entries, 0 to 45308
Data columns (total 21 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   client_mobile_number              42645 non-null  float64       
 1   store_number                      45309 non-null  int64         
 2   loan_count                        45309 non-null  int64         
 3   loan_status                       45309 non-null  int64         
 4   term_frequency                    45309 non-null  int64         
 5   principal_disbursed               45309 non-null  int64         
 6   principal_repaid                  45309 non-null  float64       
 7   disbursed_on_date                 45309 non-null  datetime64[ns]
 8   expected_matured_on_date          45309 non-null  datetime64[ns]
 9   closed_on_date                    14642 non-null  datetime64[ns]
 10  due_date_fixed                    45309 non-nu

In [42]:
loans_summary['store_number'] = loans_summary['store_number'].astype(str)

In [43]:
#merge filtered_df & till_data

df = pd.merge(df, loans_summary, how="left", on="store_number")

In [44]:
df.shape

(80297, 36)

In [45]:
#delete duplicate errors created because of broken ranking feature
df = df.sort_values("disbursed_on_date", ascending=True).drop_duplicates("store_number",keep='last')


#remove duplicate rows from df
df_duplicates = df.loc[df.duplicated()]
df = df.loc[~df.duplicated()]

In [46]:
~df.duplicated()

73971    True
67827    True
1168     True
46470    True
79597    True
         ... 
80280    True
80289    True
80291    True
80293    True
80295    True
Length: 80289, dtype: bool

In [47]:
df.shape

(80289, 36)

In [48]:
#fillna loan_count column

cols_fillna = ["loan_count"]
# replace 'NaN' with zero in these columns
for col in cols_fillna:
    df[col].fillna(0,inplace=True)

In [49]:
#fillna IDM_recommendation column

cols_fillna = ["IDM_recommendation"]
# replace 'NaN' with zero in these columns
for col in cols_fillna:
    df[col].fillna("Reject",inplace=True)

In [50]:
df.drop(columns="inference_col_y", inplace=True)
df.rename(columns = {"inference_col_x":"inference_col"}, inplace=True)

In [51]:
df['IDM_recommendation'].value_counts()

Reject     58063
Approve    22226
Name: IDM_recommendation, dtype: int64

In [52]:
df.head()

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,IDM_recommendation,IDM_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd
73971,254722978849,822369,2020.0,2022-07-21,2022-07-21,1,1,1.0,relax_rules,4,Yes,1.0,22128407,Approve,0.0,True,254722978849.0,4.0,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1687.0,1.0
67827,254723173634,786152,1280.0,2022-07-15,2022-07-15,1,1,1.0,No_rules_relaxed,10,No,0.0,494024,Approve,0.0,True,254723173634.0,1.0,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1679.0,0.0
1168,254723916436,165978,906714.4,2022-06-26,2022-07-24,29,28,0.97,No_rules_relaxed,1,Yes,1.0,20417564,Approve,0.0,True,254723916436.0,1.0,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1679.0,0.0
46470,254720475133,745142,176190.62,2022-06-27,2022-07-24,28,27,0.96,No_rules_relaxed,1,Yes,1.0,22893884,Reject,0.0,True,254720475133.0,1.0,601.0,30.0,28684.0,0.0,2017-12-21,2018-01-20,2019-12-31,2018-01-20,710.0,1.0,written-off_default,28684.0,2017-12-21,0.0,0.0,1676.0,0.0
79597,254724756423,978161,1100.0,2022-07-12,2022-07-15,4,2,0.5,No_rules_relaxed,10,No,0.0,13261352,Approve,0.0,True,254724756423.0,2.0,601.0,30.0,28600.0,0.0,2018-01-07,2018-02-06,2018-12-31,2018-02-06,328.0,1.0,written-off_default,28680.0,2017-12-19,1.0,0.5,1659.0,0.0


In [53]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 80289 entries, 73971 to 80295
Data columns (total 35 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   mobile_number                      64309 non-null  object        
 1   store_number                       80289 non-null  object        
 2   approx_30_days_trx_val             80289 non-null  float64       
 3   most_recent_trx_date_past_30_days  80289 non-null  datetime64[ns]
 4   last_trx_date                      80289 non-null  datetime64[ns]
 5   expected_trx_days                  80289 non-null  int64         
 6   actual_trx_days                    80289 non-null  int64         
 7   page_active_days                   80289 non-null  float64       
 8   inference_col                      80289 non-null  object        
 9   days_since_last_trx                80289 non-null  int64         
 10  transacted_last_5_days        

#### Stabilisation & scoring adjustments

In [54]:
#create new loan count column

df["adjusted_loan_count"] = df["loan_count"]

In [55]:
#adjust loan count col

df.loc[(df["IDM_recommendation"]=="Reject")&(df["loan_count"]<6), "adjusted_loan_count"] = 0
df.loc[df["num_days_since_last_disbursement"]> 180, "adjusted_loan_count"] = 0

In [56]:
def calc_limit_factor_21(df):
    
    idm_recommendation = df["IDM_recommendation"]
    CRB_approve_limit_factor = 0.50
    CRB_reject_limit_factor = 0.35
    
    if idm_recommendation == 'Approve':
        return CRB_approve_limit_factor
    else:
        return CRB_reject_limit_factor    
    
    
def calc_limit_factor_7(df):
    
    idm_recommendation = df["IDM_recommendation"]
    CRB_approve_limit_factor = 0.17
    CRB_reject_limit_factor = 0.12
    
    if idm_recommendation == 'Approve':
        return CRB_approve_limit_factor
    else:
        return CRB_reject_limit_factor
    
    
    
def calc_limit_factor_1(df):
    
    idm_recommendation = df["IDM_recommendation"]
    CRB_approve_limit_factor = 0.17
    CRB_reject_limit_factor = 0.12
    
    if idm_recommendation == 'Approve':
        return CRB_approve_limit_factor
    else:
        return CRB_reject_limit_factor

In [57]:
df['limit_factor_21'] = df.apply(lambda x: calc_limit_factor_21(x), axis = 1)
df['limit_factor_7'] = df.apply(lambda x: calc_limit_factor_7(x), axis = 1)
df['limit_factor_1'] = df.apply(lambda x: calc_limit_factor_1(x), axis = 1)

df.head(3)

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,IDM_recommendation,IDM_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1
73971,254722978849,822369,2020.0,2022-07-21,2022-07-21,1,1,1.0,relax_rules,4,Yes,1.0,22128407,Approve,0.0,True,254722978849.0,4.0,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1687.0,1.0,0.0,0.5,0.17,0.17
67827,254723173634,786152,1280.0,2022-07-15,2022-07-15,1,1,1.0,No_rules_relaxed,10,No,0.0,494024,Approve,0.0,True,254723173634.0,1.0,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17
1168,254723916436,165978,906714.4,2022-06-26,2022-07-24,29,28,0.97,No_rules_relaxed,1,Yes,1.0,20417564,Approve,0.0,True,254723916436.0,1.0,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17


In [58]:
# adding IDM_factor_21, IDM_factor_7, IDM_factor_1 columns to the dataframe
df['IDM_factor_21'] = df['limit_factor_21'] / 0.5
df['IDM_factor_7'] = df['limit_factor_7'] / 0.17
df['IDM_factor_1'] = df['limit_factor_1'] / 0.17

df.head(3)

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,IDM_recommendation,IDM_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,IDM_factor_21,IDM_factor_7,IDM_factor_1
73971,254722978849,822369,2020.0,2022-07-21,2022-07-21,1,1,1.0,relax_rules,4,Yes,1.0,22128407,Approve,0.0,True,254722978849.0,4.0,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1687.0,1.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0
67827,254723173634,786152,1280.0,2022-07-15,2022-07-15,1,1,1.0,No_rules_relaxed,10,No,0.0,494024,Approve,0.0,True,254723173634.0,1.0,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0
1168,254723916436,165978,906714.4,2022-06-26,2022-07-24,29,28,0.97,No_rules_relaxed,1,Yes,1.0,20417564,Approve,0.0,True,254723916436.0,1.0,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0


In [59]:
# defining a function for the limit factors

# defining a function to create bands for trading consistency
def calc_trading_consistency_bands(df):
    count = df['page_active_days']
    
    if count >= 0 and count < 0.30:
        return 'Band 1'
    elif count >= 0.30 and count < 0.50:
        return 'Band 2'
    elif count >= 0.50 and count < 0.60:
        return 'Band 3'
    elif count >= 0.60 and count < 0.70:
        return 'Band 4'
    elif count >= 0.70 and count < 0.80:
        return 'Band 5'
    elif count >= 0.80 and count < 0.90:
        return 'Band 6'
    elif count >= 0.90 and count <= 1.00:
        return 'Band 7'


# defining a function to create bands for loan count
def calc_loan_count_bands(df):
    count = df['loan_count']
  
    if count == 0:
        return 'Band 1'
    elif count == 1 or count == 2:
        return 'Band 2'
    elif count == 3 or count == 4:
        return 'Band 3'
    elif count == 5 or count == 6:
        return 'Band 4'
    elif count == 7 or count == 8:
        return 'Band 5'
    elif count == 9 or count == 10:
        return 'Band 6'
    elif count == 11 or count == 12:
        return 'Band 7'
    elif count > 12:
        return 'Band 8'


# defining a function to use the bands for both trading consistency and loan count to get limit factors
def calc_limit_factor_21(df):
    trading_consistency = df['trading_consistency_bands']
    loan_count = df['loan_count_bands']
  
    if (trading_consistency == 'Band 1' and loan_count == 'Band 1') or (trading_consistency == 'Band 1' and loan_count == 'Band 2') or (trading_consistency == 'Band 1' and loan_count == 'Band 3')  or (trading_consistency == 'Band 2' and loan_count == 'Band 1') or (trading_consistency == 'Band 2' and loan_count == 'Band 2') or (trading_consistency == 'Band 3' and loan_count == 'Band 1') or (trading_consistency == 'Band 4' and loan_count == 'Band 1') or (trading_consistency == 'Band 5' and loan_count == 'Band 1') or (trading_consistency == 'Band 6' and loan_count == 'Band 1') or (trading_consistency == 'Band 7' and loan_count == 'Band 1'):
        return 0.00
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 4') or (trading_consistency == 'Band 1' and loan_count == 'Band 5') or (trading_consistency == 'Band 2' and loan_count == 'Band 3') or (trading_consistency == 'Band 2' and loan_count == 'Band 4') or (trading_consistency == 'Band 3' and loan_count == 'Band 2') or (trading_consistency == 'Band 3' and loan_count == 'Band 3'):
        return 0.05
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 6') or (trading_consistency == 'Band 2' and loan_count == 'Band 5') or (trading_consistency == 'Band 3' and loan_count == 'Band 4') or (trading_consistency == 'Band 4' and loan_count == 'Band 2') or (trading_consistency == 'Band 4' and loan_count == 'Band 3'):
        return 0.10
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 7') or (trading_consistency == 'Band 2' and loan_count == 'Band 6') or (trading_consistency == 'Band 3' and loan_count == 'Band 5') or (trading_consistency == 'Band 4' and loan_count == 'Band 4') or (trading_consistency == 'Band 5' and loan_count == 'Band 2') or (trading_consistency == 'Band 5' and loan_count == 'Band 3'):
        return 0.15
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 8') or (trading_consistency == 'Band 2' and loan_count == 'Band 7') or (trading_consistency == 'Band 3' and loan_count == 'Band 6') or (trading_consistency == 'Band 4' and loan_count == 'Band 5') or (trading_consistency == 'Band 5' and loan_count == 'Band 4') or (trading_consistency == 'Band 6' and loan_count == 'Band 2'):
        return 0.20
    elif (trading_consistency == 'Band 2' and loan_count == 'Band 8') or (trading_consistency == 'Band 3' and loan_count == 'Band 7') or (trading_consistency == 'Band 4' and loan_count == 'Band 6') or (trading_consistency == 'Band 5' and loan_count == 'Band 5') or (trading_consistency == 'Band 6' and loan_count == 'Band 3') or (trading_consistency == 'Band 7' and loan_count == 'Band 2'):
        return 0.25
    elif (trading_consistency == 'Band 3' and loan_count == 'Band 8') or (trading_consistency == 'Band 4' and loan_count == 'Band 7') or (trading_consistency == 'Band 5' and loan_count == 'Band 6') or (trading_consistency == 'Band 6' and loan_count == 'Band 4') or (trading_consistency == 'Band 7' and loan_count == 'Band 3'):
        return 0.30
    elif (trading_consistency == 'Band 4' and loan_count == 'Band 8') or (trading_consistency == 'Band 5' and loan_count == 'Band 7') or (trading_consistency == 'Band 6' and loan_count == 'Band 5') or (trading_consistency == 'Band 7' and loan_count == 'Band 4'):
        return 0.35
    elif (trading_consistency == 'Band 5' and loan_count == 'Band 8') or (trading_consistency == 'Band 6' and loan_count == 'Band 6') or (trading_consistency == 'Band 7' and loan_count == 'Band 5'):
        return 0.40
    elif (trading_consistency == 'Band 6' and loan_count == 'Band 7') or (trading_consistency == 'Band 7' and loan_count == 'Band 6'):
        return 0.45
    elif (trading_consistency == 'Band 6' and loan_count == 'Band 8') or (trading_consistency == 'Band 7' and loan_count == 'Band 7'):
        return 0.50
    elif trading_consistency == 'Band 7' and loan_count == 'Band 8':
        return 0.55



def calc_limit_factor_7(df):
    trading_consistency = df['trading_consistency_bands']
    loan_count = df['loan_count_bands']
  
    if (trading_consistency == 'Band 1' and loan_count == 'Band 1') or (trading_consistency == 'Band 1' and loan_count == 'Band 2') or (trading_consistency == 'Band 1' and loan_count == 'Band 3') or (trading_consistency == 'Band 2' and loan_count == 'Band 1') or (trading_consistency == 'Band 2' and loan_count == 'Band 2'):
        return 0.00
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 4') or (trading_consistency == 'Band 1' and loan_count == 'Band 5') or (trading_consistency == 'Band 2' and loan_count == 'Band 3') or (trading_consistency == 'Band 3' and loan_count == 'Band 1') or (trading_consistency == 'Band 3' and loan_count == 'Band 2') or (trading_consistency == 'Band 4' and loan_count == 'Band 1'):
        return 0.075
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 6') or (trading_consistency == 'Band 2' and loan_count == 'Band 4') or (trading_consistency == 'Band 2' and loan_count == 'Band 5') or (trading_consistency == 'Band 3' and loan_count == 'Band 3') or (trading_consistency == 'Band 4' and loan_count == 'Band 2') or (trading_consistency == 'Band 5' and loan_count == 'Band 1'):
        return 0.10
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 7') or (trading_consistency == 'Band 2' and loan_count == 'Band 6') or (trading_consistency == 'Band 3' and loan_count == 'Band 4') or (trading_consistency == 'Band 4' and loan_count == 'Band 3') or (trading_consistency == 'Band 5' and loan_count == 'Band 2') or (trading_consistency == 'Band 6' and loan_count == 'Band 1') or (trading_consistency == 'Band 7' and loan_count == 'Band 1'):
        return 0.125
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 8') or (trading_consistency == 'Band 2' and loan_count == 'Band 7') or (trading_consistency == 'Band 3' and loan_count == 'Band 5') or (trading_consistency == 'Band 3' and loan_count == 'Band 6') or (trading_consistency == 'Band 4' and loan_count == 'Band 4') or (trading_consistency == 'Band 4' and loan_count == 'Band 5') or (trading_consistency == 'Band 5' and loan_count == 'Band 3') or (trading_consistency == 'Band 6' and loan_count == 'Band 2') or (trading_consistency == 'Band 6' and loan_count == 'Band 3') or (trading_consistency == 'Band 7' and loan_count == 'Band 2'):
        return 0.15
    elif (trading_consistency == 'Band 2' and loan_count == 'Band 8') or (trading_consistency == 'Band 3' and loan_count == 'Band 7') or (trading_consistency == 'Band 4' and loan_count == 'Band 6') or (trading_consistency == 'Band 5' and loan_count == 'Band 4') or (trading_consistency == 'Band 5' and loan_count == 'Band 5') or (trading_consistency == 'Band 6' and loan_count == 'Band 4') or (trading_consistency == 'Band 7' and loan_count == 'Band 3'):
        return 0.175
    elif (trading_consistency == 'Band 3' and loan_count == 'Band 8') or (trading_consistency == 'Band 4' and loan_count == 'Band 7') or (trading_consistency == 'Band 5' and loan_count == 'Band 6') or (trading_consistency == 'Band 6' and loan_count == 'Band 5') or (trading_consistency == 'Band 7' and loan_count == 'Band 4'):
        return 0.20
    elif (trading_consistency == 'Band 4' and loan_count == 'Band 8') or (trading_consistency == 'Band 5' and loan_count == 'Band 7') or (trading_consistency == 'Band 6' and loan_count == 'Band 6') or (trading_consistency == 'Band 7' and loan_count == 'Band 5'):
        return 0.225
    elif (trading_consistency == 'Band 5' and loan_count == 'Band 8') or (trading_consistency == 'Band 6' and loan_count == 'Band 7') or (trading_consistency == 'Band 7' and loan_count == 'Band 6'):
        return 0.25
    elif (trading_consistency == 'Band 6' and loan_count == 'Band 8') or (trading_consistency == 'Band 7' and loan_count == 'Band 7'):
        return 0.275
    elif (trading_consistency == 'Band 7' and loan_count == 'Band 8'):
        return 0.30




def calc_limit_factor_1(df):
    trading_consistency = df['trading_consistency_bands']
    loan_count = df['loan_count_bands']
  
    if (trading_consistency == 'Band 1' and loan_count == 'Band 1') or (trading_consistency == 'Band 1' and loan_count == 'Band 2') or (trading_consistency == 'Band 1' and loan_count == 'Band 3') or (trading_consistency == 'Band 2' and loan_count == 'Band 1') or (trading_consistency == 'Band 2' and loan_count == 'Band 2'):
        return 0.00
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 4') or (trading_consistency == 'Band 1' and loan_count == 'Band 5') or (trading_consistency == 'Band 2' and loan_count == 'Band 3') or (trading_consistency == 'Band 3' and loan_count == 'Band 1') or (trading_consistency == 'Band 3' and loan_count == 'Band 2') or (trading_consistency == 'Band 4' and loan_count == 'Band 1'):
        return 0.075
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 6') or (trading_consistency == 'Band 2' and loan_count == 'Band 4') or (trading_consistency == 'Band 2' and loan_count == 'Band 5') or (trading_consistency == 'Band 3' and loan_count == 'Band 3') or (trading_consistency == 'Band 4' and loan_count == 'Band 2') or (trading_consistency == 'Band 5' and loan_count == 'Band 1'):
        return 0.10
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 7') or (trading_consistency == 'Band 2' and loan_count == 'Band 6') or (trading_consistency == 'Band 3' and loan_count == 'Band 4') or (trading_consistency == 'Band 4' and loan_count == 'Band 3') or (trading_consistency == 'Band 5' and loan_count == 'Band 2') or (trading_consistency == 'Band 6' and loan_count == 'Band 1') or (trading_consistency == 'Band 7' and loan_count == 'Band 1'):
        return 0.125
    elif (trading_consistency == 'Band 1' and loan_count == 'Band 8') or (trading_consistency == 'Band 2' and loan_count == 'Band 7') or (trading_consistency == 'Band 3' and loan_count == 'Band 5') or (trading_consistency == 'Band 3' and loan_count == 'Band 6') or (trading_consistency == 'Band 4' and loan_count == 'Band 4') or (trading_consistency == 'Band 4' and loan_count == 'Band 5') or (trading_consistency == 'Band 5' and loan_count == 'Band 3') or (trading_consistency == 'Band 6' and loan_count == 'Band 2') or (trading_consistency == 'Band 6' and loan_count == 'Band 3') or (trading_consistency == 'Band 7' and loan_count == 'Band 2'):
        return 0.15
    elif (trading_consistency == 'Band 2' and loan_count == 'Band 8') or (trading_consistency == 'Band 3' and loan_count == 'Band 7') or (trading_consistency == 'Band 4' and loan_count == 'Band 6') or (trading_consistency == 'Band 5' and loan_count == 'Band 4') or (trading_consistency == 'Band 5' and loan_count == 'Band 5') or (trading_consistency == 'Band 6' and loan_count == 'Band 4') or (trading_consistency == 'Band 7' and loan_count == 'Band 3'):
        return 0.175
    elif (trading_consistency == 'Band 3' and loan_count == 'Band 8') or (trading_consistency == 'Band 4' and loan_count == 'Band 7') or (trading_consistency == 'Band 5' and loan_count == 'Band 6') or (trading_consistency == 'Band 6' and loan_count == 'Band 5') or (trading_consistency == 'Band 7' and loan_count == 'Band 4'):
        return 0.20
    elif (trading_consistency == 'Band 4' and loan_count == 'Band 8') or (trading_consistency == 'Band 5' and loan_count == 'Band 7') or (trading_consistency == 'Band 6' and loan_count == 'Band 6') or (trading_consistency == 'Band 7' and loan_count == 'Band 5'):
        return 0.225
    elif (trading_consistency == 'Band 5' and loan_count == 'Band 8') or (trading_consistency == 'Band 6' and loan_count == 'Band 7') or (trading_consistency == 'Band 7' and loan_count == 'Band 6'):
        return 0.25
    elif (trading_consistency == 'Band 6' and loan_count == 'Band 8') or (trading_consistency == 'Band 7' and loan_count == 'Band 7'):
        return 0.275
    elif (trading_consistency == 'Band 7' and loan_count == 'Band 8'):
        return 0.30

In [60]:
# adding trading_consistency_bands, loan_count_bands and limit_factor columns to the dataframe by applying the functions to the dataframe
df['trading_consistency_bands'] = df.apply(lambda x: calc_trading_consistency_bands(x), axis = 1)
df['loan_count_bands'] = df.apply(lambda x: calc_loan_count_bands(x), axis = 1)

df['new_limit_factor_21'] = df.apply(lambda x: calc_limit_factor_21(x), axis = 1)
df['new_limit_factor_7'] = df.apply(lambda x: calc_limit_factor_7(x), axis = 1)
df['new_limit_factor_1'] = df.apply(lambda x: calc_limit_factor_1(x), axis = 1)

df.head(3)

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,IDM_recommendation,IDM_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,IDM_factor_21,IDM_factor_7,IDM_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1
73971,254722978849,822369,2020.0,2022-07-21,2022-07-21,1,1,1.0,relax_rules,4,Yes,1.0,22128407,Approve,0.0,True,254722978849.0,4.0,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1687.0,1.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 3,0.3,0.17,0.17
67827,254723173634,786152,1280.0,2022-07-15,2022-07-15,1,1,1.0,No_rules_relaxed,10,No,0.0,494024,Approve,0.0,True,254723173634.0,1.0,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15
1168,254723916436,165978,906714.4,2022-06-26,2022-07-24,29,28,0.97,No_rules_relaxed,1,Yes,1.0,20417564,Approve,0.0,True,254723916436.0,1.0,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15


In [61]:
def cal_weight_dpd(df):
    weight_dpd = df['weight_dpd']
    loan_count = df['loan_count']
    
    if loan_count == 0:
        return 1
    else:
        return weight_dpd
    
    
def cal_good_loans_repayment_ratio(df):
    good_loans_repayment_ratio = df['good_loans_repayment_ratio']
    loan_count = df['loan_count']
    
    if loan_count == 0:
        return 1
    else:
        return good_loans_repayment_ratio
    
    
# defining a function for consistency weights
def calc_weight_consistency(df):
    page_active_days = df['page_active_days']
  
    if page_active_days >= 0.7:
        return 1.0
    elif page_active_days <= 0.69 and page_active_days >= 0.63:
        return 0.9
    elif page_active_days <= 0.62 and page_active_days >= 0.56:
        return 0.8
    elif page_active_days <= 0.57 and page_active_days >= 0.49:
        return 0.7
    elif page_active_days < 0.49:
        return 0

In [62]:
df['weight_dpd'] = df.apply(lambda x: cal_weight_dpd(x), axis = 1)
df['good_loans_repayment_ratio'] = df.apply(lambda x: cal_good_loans_repayment_ratio(x), axis = 1)
df['weight_consistency'] = df.apply(lambda x: calc_weight_consistency(x), axis = 1)

df.head(3)

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,IDM_recommendation,IDM_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,IDM_factor_21,IDM_factor_7,IDM_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency
73971,254722978849,822369,2020.0,2022-07-21,2022-07-21,1,1,1.0,relax_rules,4,Yes,1.0,22128407,Approve,0.0,True,254722978849.0,4.0,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1687.0,1.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 3,0.3,0.17,0.17,1.0
67827,254723173634,786152,1280.0,2022-07-15,2022-07-15,1,1,1.0,No_rules_relaxed,10,No,0.0,494024,Approve,0.0,True,254723173634.0,1.0,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0
1168,254723916436,165978,906714.4,2022-06-26,2022-07-24,29,28,0.97,No_rules_relaxed,1,Yes,1.0,20417564,Approve,0.0,True,254723916436.0,1.0,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0


In [63]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 80289 entries, 73971 to 80295
Data columns (total 48 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   mobile_number                      64309 non-null  object        
 1   store_number                       80289 non-null  object        
 2   approx_30_days_trx_val             80289 non-null  float64       
 3   most_recent_trx_date_past_30_days  80289 non-null  datetime64[ns]
 4   last_trx_date                      80289 non-null  datetime64[ns]
 5   expected_trx_days                  80289 non-null  int64         
 6   actual_trx_days                    80289 non-null  int64         
 7   page_active_days                   80289 non-null  float64       
 8   inference_col                      80289 non-null  object        
 9   days_since_last_trx                80289 non-null  int64         
 10  transacted_last_5_days        

In [64]:
# adding risk_rules_factor column to the dataframe
df['risk_rules_factor'] = (df['weight_dpd'] + df['weight_till_recency'] + df['weight_consistency'] + df['good_loans_repayment_ratio']) / 4

df.head(3)

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,IDM_recommendation,IDM_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,IDM_factor_21,IDM_factor_7,IDM_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor
73971,254722978849,822369,2020.0,2022-07-21,2022-07-21,1,1,1.0,relax_rules,4,Yes,1.0,22128407,Approve,0.0,True,254722978849.0,4.0,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1687.0,1.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 3,0.3,0.17,0.17,1.0,1.0
67827,254723173634,786152,1280.0,2022-07-15,2022-07-15,1,1,1.0,No_rules_relaxed,10,No,0.0,494024,Approve,0.0,True,254723173634.0,1.0,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0,0.25
1168,254723916436,165978,906714.4,2022-06-26,2022-07-24,29,28,0.97,No_rules_relaxed,1,Yes,1.0,20417564,Approve,0.0,True,254723916436.0,1.0,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0,0.5


In [65]:
df['good_loans_repayment_ratio'].mean()

0.8317425799299984

In [66]:
# adding ultimate_factor_21, ultimate_factor_7, ultimate_factor_1 columns to the dataframe
df['ultimate_factor_21'] = df['risk_rules_factor'] * df['IDM_factor_21'] * df['new_limit_factor_21']
df['ultimate_factor_7'] = df['risk_rules_factor'] * df['IDM_factor_7'] * df['new_limit_factor_7']
df['ultimate_factor_1'] = df['risk_rules_factor'] * df['IDM_factor_1'] * df['new_limit_factor_1']

In [67]:
df['limit_21_day'] = df['approx_30_days_trx_val'] * df['ultimate_factor_21']
df['limit_7_day'] = df['approx_30_days_trx_val'] * df['ultimate_factor_7']
df['limit_1_day'] = df['approx_30_days_trx_val'] * df['ultimate_factor_1']

df.head(3)

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,IDM_recommendation,IDM_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,IDM_factor_21,IDM_factor_7,IDM_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day
73971,254722978849,822369,2020.0,2022-07-21,2022-07-21,1,1,1.0,relax_rules,4,Yes,1.0,22128407,Approve,0.0,True,254722978849.0,4.0,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1687.0,1.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 3,0.3,0.17,0.17,1.0,1.0,0.3,0.17,0.17,606.0,353.5,353.5
67827,254723173634,786152,1280.0,2022-07-15,2022-07-15,1,1,1.0,No_rules_relaxed,10,No,0.0,494024,Approve,0.0,True,254723173634.0,1.0,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0,0.25,0.06,0.04,0.04,80.0,48.0,48.0
1168,254723916436,165978,906714.4,2022-06-26,2022-07-24,29,28,0.97,No_rules_relaxed,1,Yes,1.0,20417564,Approve,0.0,True,254723916436.0,1.0,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0,0.5,0.12,0.07,0.07,113339.3,68003.58,68003.58


In [68]:
df['IDM_recommendation'].value_counts()

Reject     58063
Approve    22226
Name: IDM_recommendation, dtype: int64

In [69]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 80289 entries, 73971 to 80295
Data columns (total 55 columns):
 #   Column                             Non-Null Count  Dtype         
---  ------                             --------------  -----         
 0   mobile_number                      64309 non-null  object        
 1   store_number                       80289 non-null  object        
 2   approx_30_days_trx_val             80289 non-null  float64       
 3   most_recent_trx_date_past_30_days  80289 non-null  datetime64[ns]
 4   last_trx_date                      80289 non-null  datetime64[ns]
 5   expected_trx_days                  80289 non-null  int64         
 6   actual_trx_days                    80289 non-null  int64         
 7   page_active_days                   80289 non-null  float64       
 8   inference_col                      80289 non-null  object        
 9   days_since_last_trx                80289 non-null  int64         
 10  transacted_last_5_days        

In [70]:
df["inference_col"].value_counts()

No_rules_relaxed    64585
relax_rules         15704
Name: inference_col, dtype: int64

In [71]:
def limits_decrease_zeroization_21(df):
    """
    function to adjust allocated limits in line with past repayment behavior
    
    Inputs:
    1) num of loans taken,
    2) previously allocated limit,
    3) term frequency for a loan,
    4) days past due for the most recent loan
    5) good loans repayment ratio
    6) inference variable i.e declaring whether borrower qualifies for limit stabilization OR not
    
    Output:
    adjusted limit based on rollover/default patterns of most recent loan
    """
    dpd_allowance = 10
    repayment_ratio_threshold = 0.6
    zeroize = 0
    
    #loan_count = df["loan_count"]
    loan_count = df["adjusted_loan_count"]
    limit_col = df["limit_21_day"]
    latest_loan_dpd = df["days_past_due"]
    term_frequency = df["term_frequency"]
    repayment_ratio = df["good_loans_repayment_ratio"]
    inference_col = df["inference_col"]
    
    
    conditions = [
        loan_count.isna(),
        loan_count.eq(0),
        inference_col.str.match("relax_rules"),
        inference_col.str.match("No_rules_relaxed") & latest_loan_dpd.ge(term_frequency+30),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.lt(repayment_ratio_threshold) & latest_loan_dpd.gt(term_frequency + dpd_allowance),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.lt(repayment_ratio_threshold) & latest_loan_dpd.le(term_frequency + dpd_allowance),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.ge(term_frequency + 30),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.le(term_frequency + dpd_allowance),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.ge(term_frequency + 20),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.ge(term_frequency + 29),
        
    ]  
    
    
    choices = [
        limit_col,
        limit_col,
        limit_col,
        zeroize,
        zeroize,
        limit_col,
        zeroize,
        limit_col,
        limit_col * 0.7,
        limit_col * 0.3,
        
    ]
    
    limit_column = np.select(conditions, choices)
    
    return limit_column


#apply the function to the df to create 21 day adjusted product limit allocation
df["adjusted_21_limit"] = limits_decrease_zeroization_21(df)

In [72]:
def limits_decrease_zeroization_7(df):
    """
    function to adjust allocated limits in line with past repayment behavior
    
    Inputs:
    1) num of loans taken,
    2) previously allocated limit,
    3) term frequency for a loan,
    4) days past due for the most recent loan
    5) good loans repayment ratio
    6) inference variable i.e declaring whether borrower qualifies for limit stabilization OR not
    
    Output:
    adjusted limit based on rollover/default patterns of most recent loan
    """
    dpd_allowance = 10
    repayment_ratio_threshold = 0.6
    zeroize = 0
    
    #loan_count = df["loan_count"]
    loan_count = df["adjusted_loan_count"]
    limit_col = df["limit_7_day"]
    latest_loan_dpd = df["days_past_due"]
    term_frequency = df["term_frequency"]
    repayment_ratio = df["good_loans_repayment_ratio"]
    inference_col = df["inference_col"]
    
    
    conditions = [
        loan_count.isna(),
        loan_count.eq(0),
        inference_col.str.match("relax_rules"),
        inference_col.str.match("No_rules_relaxed") & latest_loan_dpd.ge(term_frequency+30),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.lt(repayment_ratio_threshold) & latest_loan_dpd.gt(term_frequency + dpd_allowance),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.lt(repayment_ratio_threshold) & latest_loan_dpd.le(term_frequency + dpd_allowance),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.ge(term_frequency + 30),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.le(term_frequency + dpd_allowance),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.ge(term_frequency + 20),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.ge(term_frequency + 29),
        
    ]  
    
    
    choices = [
        limit_col,
        limit_col,
        limit_col,
        zeroize,
        zeroize,
        limit_col,
        zeroize,
        limit_col,
        limit_col * 0.7,
        limit_col * 0.3,
        
    ]
    
    limit_column = np.select(conditions, choices)
    
    return limit_column


#apply the function to the df to create 21 day adjusted product limit allocation
df["adjusted_7_limit"] = limits_decrease_zeroization_7(df)

In [73]:
def limits_decrease_zeroization_1(df):
    """
    function to adjust allocated limits in line with past repayment behavior
    
    Inputs:
    1) num of loans taken,
    2) previously allocated limit,
    3) term frequency for a loan,
    4) days past due for the most recent loan
    5) good loans repayment ratio
    6) inference variable i.e declaring whether borrower qualifies for limit stabilization OR not
    
    Output:
    adjusted limit based on rollover/default patterns of most recent loan
    """
    dpd_allowance = 10
    repayment_ratio_threshold = 0.6
    zeroize = 0
    
    #loan_count = df["loan_count"]
    loan_count = df["adjusted_loan_count"]
    limit_col = df["limit_1_day"]
    latest_loan_dpd = df["days_past_due"]
    term_frequency = df["term_frequency"]
    repayment_ratio = df["good_loans_repayment_ratio"]
    inference_col = df["inference_col"]
    
    
    conditions = [
        loan_count.isna(),
        loan_count.eq(0),
        inference_col.str.match("relax_rules"),
        inference_col.str.match("No_rules_relaxed") & latest_loan_dpd.ge(term_frequency+30),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.lt(repayment_ratio_threshold) & latest_loan_dpd.gt(term_frequency + dpd_allowance),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.lt(repayment_ratio_threshold) & latest_loan_dpd.le(term_frequency + dpd_allowance),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.ge(term_frequency + 30),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.le(term_frequency + dpd_allowance),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.ge(term_frequency + 20),
        inference_col.str.match("No_rules_relaxed") & repayment_ratio.ge(repayment_ratio_threshold) & latest_loan_dpd.ge(term_frequency + 29),
        
    ]  
    
    
    choices = [
        limit_col,
        limit_col,
        limit_col,
        zeroize,
        zeroize,
        limit_col,
        zeroize,
        limit_col,
        limit_col * 0.7,
        limit_col * 0.3,
        
    ]
    
    limit_column = np.select(conditions, choices)
    
    return limit_column


#apply the function to the df to create 21 day adjusted product limit allocation
df["adjusted_1_limit"] = limits_decrease_zeroization_1(df)

In [74]:
df[["adjusted_21_limit","adjusted_7_limit","adjusted_1_limit"]].sum()

adjusted_21_limit   1132987721.35
adjusted_7_limit     947672320.83
adjusted_1_limit     947672320.83
dtype: float64

In [75]:
df.shape

(80289, 58)

In [76]:
def limit_zeroization_till_summary_21(df):
    """
    function to adjust limits based on till summaries i.e till activity and recency
    
    Inputs:
    1) recency of transactions boolean check,
    2) till consistency calculated probability,
    3) previously allocated limit,
    4) inference variable i.e declaring whether borrower qualifies for limit stabilization OR not
    
    Output:
    adjusted limits in line with till summaries i.e limits zeroized for any customer who does not meet set threshold
    """
    transaction_boolean_accepted = "Yes"
    transaction_boolean_rejected = "No"
    consistency_threshold = 0.7
    zeroize = 0
    
    transaction_boolean_col = df["transacted_last_5_days"]
    consistency_col = df["page_active_days"]
    limit_col = df["adjusted_21_limit"]
    inference_col = df["inference_col"]
    
    conditions = [
        inference_col.str.match("relax_rules"),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_accepted) & consistency_col.ge(consistency_threshold),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_accepted) & consistency_col.lt(consistency_threshold),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_rejected) & consistency_col.ge(consistency_threshold),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_rejected) & consistency_col.lt(consistency_threshold),
        
        
    ]
    
    choices = [
        limit_col,
        limit_col,
        zeroize,
        zeroize,
        zeroize,
    ]
    
    limit_column = np.select(conditions, choices)
    
    return limit_column

#apply the function to the df to create 21 day adjusted product limit allocation
df["adjusted_21_limit"] = limit_zeroization_till_summary_21(df)

In [77]:
def limit_zeroization_till_summary_7(df):
    """
    function to adjust limits based on till summaries i.e till activity and recency
    
    Inputs:
    1) recency of transactions boolean check,
    2) till consistency calculated probability,
    3) previously allocated limit,
    4) inference variable i.e declaring whether borrower qualifies for limit stabilization OR not
    
    Output:
    adjusted limits in line with till summaries i.e limits zeroized for any customer who does not meet set threshold
    """
    
    transaction_boolean_accepted = "Yes"
    transaction_boolean_rejected = "No"
    consistency_threshold = 0.7
    zeroize = 0
    
    transaction_boolean_col = df["transacted_last_5_days"]
    consistency_col = df["page_active_days"]
    limit_col = df["adjusted_7_limit"]
    inference_col = df["inference_col"]
    
    conditions = [
        inference_col.str.match("relax_rules"),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_accepted) & consistency_col.ge(consistency_threshold),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_accepted) & consistency_col.lt(consistency_threshold),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_rejected) & consistency_col.ge(consistency_threshold),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_rejected) & consistency_col.lt(consistency_threshold),
        
        
    ]
    
    choices = [
        limit_col,
        limit_col,
        zeroize,
        zeroize,
        zeroize,
    ]
    
    limit_column = np.select(conditions, choices)
    
    return limit_column

#apply the function to the df to create 21 day adjusted product limit allocation
df["adjusted_7_limit"] = limit_zeroization_till_summary_7(df)

In [78]:
def limit_zeroization_till_summary_1(df):
    """
    function to adjust limits based on till summaries i.e till activity and recency
    
    Inputs:
    1) recency of transactions boolean check,
    2) till consistency calculated probability,
    3) previously allocated limit,
    4) inference variable i.e declaring whether borrower qualifies for limit stabilization OR not
    
    Output:
    adjusted limits in line with till summaries i.e limits zeroized for any customer who does not meet set threshold
    """
    transaction_boolean_accepted = "Yes"
    transaction_boolean_rejected = "No"
    consistency_threshold = 0.7
    zeroize = 0
    
    transaction_boolean_col = df["transacted_last_5_days"]
    consistency_col = df["page_active_days"]
    limit_col = df["adjusted_1_limit"]
    inference_col = df["inference_col"]
    
    conditions = [
        inference_col.str.match("relax_rules"),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_accepted) & consistency_col.ge(consistency_threshold),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_accepted) & consistency_col.lt(consistency_threshold),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_rejected) & consistency_col.ge(consistency_threshold),
        inference_col.str.match("No_rules_relaxed") & transaction_boolean_col.str.contains(transaction_boolean_rejected) & consistency_col.lt(consistency_threshold),
        
        
    ]
    
    choices = [
        limit_col,
        limit_col,
        zeroize,
        zeroize,
        zeroize,
    ]
    
    limit_column = np.select(conditions, choices)
    
    return limit_column

#apply the function to the df to create 21 day adjusted product limit allocation
df["adjusted_1_limit"] = limit_zeroization_till_summary_1(df)

In [79]:
df[["adjusted_21_limit","adjusted_7_limit","adjusted_1_limit"]].sum()

adjusted_21_limit   1127461900.67
adjusted_7_limit     928649606.39
adjusted_1_limit     928649606.39
dtype: float64

In [80]:
def adjust_limits_to_loan_bands_21(df):
    """
    function to limit loan limits based on loan bands
    
    Inputs:
    1) loan count of a borrower, 
    2) allocated limit
    
    Output:
    adjusted limit based on loan band caps
    """
    #loan band matrix
    no_loans = 0
    
    
    #loan_count = df["loan_count"]
    loan_count = df["adjusted_loan_count"]
    limit_col = df["adjusted_21_limit"]

    conditions = [
        loan_count.isna(),
        loan_count.isna(),
        loan_count.eq(0),
        loan_count.eq(0),
        loan_count.gt(0)
    ]
    
    
    choices = [
        no_loans,
        no_loans,
        no_loans,
        no_loans,
        limit_col
    ]
    
    limit_column = np.select(conditions, choices)
    
    return limit_column


df["adjusted_21_limit"] = adjust_limits_to_loan_bands_21(df)

In [81]:
df[["adjusted_21_limit","adjusted_7_limit","adjusted_1_limit"]].sum()

adjusted_21_limit   838487053.89
adjusted_7_limit    928649606.39
adjusted_1_limit    928649606.39
dtype: float64

In [82]:
# defining a function for the limit caps
def calc_new_final_21_limit(df):
    adjusted_21_limit = df['adjusted_21_limit']
  
    if adjusted_21_limit < 1000:
        return 0
    elif adjusted_21_limit > 200000:
        return 200000
    else:
        return adjusted_21_limit


def calc_new_final_7_limit(df):
    adjusted_7_limit = df['adjusted_7_limit']
  
    if adjusted_7_limit < 1000:
        return 0
    elif adjusted_7_limit > 200000:
        return 200000
    else:
        return adjusted_7_limit


def calc_new_final_1_limit(df):
    adjusted_1_limit = df['adjusted_1_limit']
  
    if adjusted_1_limit < 200:
        return 0
    elif adjusted_1_limit > 200000:
        return 200000
    else:
        return adjusted_1_limit

In [83]:
# adding new_final_21_limit, new_final_7_limit and new_final_1_limit columns to the dataframe by applying the functions to the dataframe
df['final_21_limit'] = df.apply(lambda x: calc_new_final_21_limit(x), axis = 1)
df['final_7_limit'] = df.apply(lambda x: calc_new_final_7_limit(x), axis = 1)
df['final_1_limit'] = df.apply(lambda x: calc_new_final_1_limit(x), axis = 1)

df.head(3)

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,IDM_recommendation,IDM_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,IDM_factor_21,IDM_factor_7,IDM_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit
73971,254722978849,822369,2020.0,2022-07-21,2022-07-21,1,1,1.0,relax_rules,4,Yes,1.0,22128407,Approve,0.0,True,254722978849.0,4.0,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1687.0,1.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 3,0.3,0.17,0.17,1.0,1.0,0.3,0.17,0.17,606.0,353.5,353.5,0.0,353.5,353.5,0.0,0.0,353.5
67827,254723173634,786152,1280.0,2022-07-15,2022-07-15,1,1,1.0,No_rules_relaxed,10,No,0.0,494024,Approve,0.0,True,254723173634.0,1.0,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0,0.25,0.06,0.04,0.04,80.0,48.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0
1168,254723916436,165978,906714.4,2022-06-26,2022-07-24,29,28,0.97,No_rules_relaxed,1,Yes,1.0,20417564,Approve,0.0,True,254723916436.0,1.0,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0,0.5,0.12,0.07,0.07,113339.3,68003.58,68003.58,0.0,68003.58,68003.58,0.0,68003.58,68003.58


In [84]:
#convert store_number col to string type
df['final_21_limit'] = df['final_21_limit'].fillna(0)
df['final_7_limit'] = df['final_7_limit'].fillna(0)
df['final_1_limit'] = df['final_1_limit'].fillna(0)

In [85]:
#apply ceiling 100 to final limit columns to adjust limits to the nearest 100 value 

df["final_21_limit"] = (np.ceil(df["final_21_limit"] / 100) * 100).astype(int)
df["final_7_limit"] = (np.ceil(df["final_7_limit"] / 100) * 100).astype(int)
df["final_1_limit"] = (np.ceil(df["final_1_limit"] / 100) * 100).astype(int)

In [86]:
#drop duplicates based on store_number/national id

df = df.drop_duplicates(subset="store_number", keep="first")

In [87]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

final_21_limit    592468100
final_7_limit     805389500
final_1_limit     808429700
dtype: int64

In [88]:
df.shape

(80289, 61)

In [89]:
df['IDM_recommendation'].value_counts()

Reject     58063
Approve    22226
Name: IDM_recommendation, dtype: int64

In [90]:
#check against MFS list of defaults

mfs_default_path = "C:\\Project_summaries\\Bloom\\Bloom all_loans\\20220721\\MFS_crosscheck\\"

mfs_defaulters_list = pd.read_excel(mfs_default_path+"mfs_defaulters_list.xlsx")

In [91]:
mfs_defaulters_list.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8191 entries, 0 to 8190
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   store_number  8191 non-null   int64 
 1   national_id   8191 non-null   object
dtypes: int64(1), object(1)
memory usage: 128.1+ KB


In [92]:
mfs_defaulters_list['store_number'] = mfs_defaulters_list['store_number'].astype(str)

mfs_defaulters_list.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8191 entries, 0 to 8190
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   store_number  8191 non-null   object
 1   national_id   8191 non-null   object
dtypes: object(2)
memory usage: 128.1+ KB


In [93]:
#get list of defaulters

mfs_defaulters_list = list(mfs_defaulters_list["store_number"].unique())

In [94]:
#assign a zero limit to every national id that matches the mfs defaulters list

df.loc[(df["store_number"].isin(mfs_defaulters_list))&(df["final_21_limit"]>0), "final_21_limit"] = 0
df.loc[(df["store_number"].isin(mfs_defaulters_list))&(df["final_7_limit"]>0), "final_7_limit"] = 0
df.loc[(df["store_number"].isin(mfs_defaulters_list))&(df["final_1_limit"]>0), "final_1_limit"] = 0

In [95]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

final_21_limit    592468100
final_7_limit     801434200
final_1_limit     804397800
dtype: int64

In [96]:
#exempt totally new customers who have not taken any loan but have failed our rules

excluded_first_time_df = df[((df["loan_count"]==0)&(df["final_7_limit"]==0)&(df["final_1_limit"]==0))]
df = df[~((df["loan_count"]==0)&(df["final_7_limit"]==0)&(df["final_1_limit"]==0))]

In [97]:
df.shape

(53407, 61)

In [98]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

final_21_limit    592468100
final_7_limit     801434200
final_1_limit     804397800
dtype: int64

In [99]:
df['IDM_recommendation'].value_counts()

Reject     37088
Approve    16319
Name: IDM_recommendation, dtype: int64

In [100]:
def calc_blacklist_flag(df):
    
    days_past_due = 
    bloom_version = 
    loan_status = 
    
    if (bloom_version == 1 and loan_status == 300) or days_past_due >= 60:
        return 1
    else:
        return 0

In [101]:
df['blacklist_flag'] = df.apply(lambda x: calc_blacklist_flag(x), axis = 1)

df.head(3)

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,IDM_recommendation,IDM_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,IDM_factor_21,IDM_factor_7,IDM_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag
73971,254722978849,822369,2020.0,2022-07-21,2022-07-21,1,1,1.0,relax_rules,4,Yes,1.0,22128407,Approve,0.0,True,254722978849.0,4.0,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1687.0,1.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 3,0.3,0.17,0.17,1.0,1.0,0.3,0.17,0.17,606.0,353.5,353.5,0.0,353.5,353.5,0,0,400,0
67827,254723173634,786152,1280.0,2022-07-15,2022-07-15,1,1,1.0,No_rules_relaxed,10,No,0.0,494024,Approve,0.0,True,254723173634.0,1.0,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0,0.25,0.06,0.04,0.04,80.0,48.0,48.0,0.0,0.0,0.0,0,0,0,1
1168,254723916436,165978,906714.4,2022-06-26,2022-07-24,29,28,0.97,No_rules_relaxed,1,Yes,1.0,20417564,Approve,0.0,True,254723916436.0,1.0,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0,0.5,0.12,0.07,0.07,113339.3,68003.58,68003.58,0.0,68003.58,68003.58,0,68100,68100,1


In [102]:
df['blacklist_flag'].value_counts()

0    41657
1    11750
Name: blacklist_flag, dtype: int64

In [103]:
df[df['store_number'] == 7926899]

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,IDM_recommendation,IDM_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,IDM_factor_21,IDM_factor_7,IDM_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag


In [104]:
#remove any customers not blacklist and limits allocated across board is zero

#df = df.loc[~((df["blacklist_flag"]==0)&(df["final_7_limit"]==0)&(df["final_7_limit"]==0)&(df["final_1_limit"]==0))]

In [105]:
#make all column headers to be in small letters & trim out any trailing or white spaces

df.columns = df.columns.str.lower()

In [106]:
df['blacklist_flag'].value_counts()

0    41657
1    11750
Name: blacklist_flag, dtype: int64

In [107]:
#add limit factor column

def declare_limit_factors_21(df):
    """
    function to declare limit factors used to allocate limits based on defined conditions i.e IDM recommendation
    
    Inputs:
    1) IDM recommendation column i.e IDM Approve VS IDM Reject
    
    Output:
    assigned limit factors used to allocate limits based on defined conditions i.e IDM recommendation
    """
    
    idm_recommendation = df["idm_recommendation"]
    
    limit_21_approve = 0.5
    limit_21_reject = 0.35
   
    conditions = [
        idm_recommendation.str.match("Approve"),
        idm_recommendation.str.match("Reject"),              
    ]
    
    choices = [
        limit_21_approve,
        limit_21_reject,
    ]
    
    limit_factor_21 = np.select(conditions, choices)
    
    return limit_factor_21

df["limit_factor_21"] = declare_limit_factors_21(df)

In [108]:
#add limit factor column

def declare_limit_factors_7(df):
    """
    function to declare limit factors used to allocate limits based on defined conditions i.e IDM recommendation
    
    Inputs:
    1) IDM recommendation column i.e IDM Approve VS IDM Reject
    
    Output:
    assigned limit factors used to allocate limits based on defined conditions i.e IDM recommendation
    """
    idm_recommendation = df["idm_recommendation"]
    
    limit_7_approve = 0.17
    limit_7_reject = 0.12
   
    conditions = [
        idm_recommendation.str.match("Approve"),
        idm_recommendation.str.match("Reject"),              
    ]
    
    choices = [
        limit_7_approve,
        limit_7_reject,
    ]
    
    limit_factor_7 = np.select(conditions, choices)
    
    return limit_factor_7

df["limit_factor_7"] = declare_limit_factors_7(df)

In [109]:
#add limit factor column

def declare_limit_factors_1(df):
    """
    function to declare limit factors used to allocate limits based on defined conditions i.e IDM recommendation
    
    Inputs:
    1) IDM recommendation column i.e IDM Approve VS IDM Reject
    
    Output:
    assigned limit factors used to allocate limits based on defined conditions i.e IDM recommendation
    """
    idm_recommendation = df["idm_recommendation"]
    
    limit_1_approve = 0.17
    limit_1_reject = 0.12
   
    conditions = [
        idm_recommendation.str.match("Approve"),
        idm_recommendation.str.match("Reject"),              
    ]
    
    choices = [
        limit_1_approve,
        limit_1_reject,
    ]
    
    limit_factor_1 = np.select(conditions, choices)
    
    return limit_factor_1

df["limit_factor_1"] = declare_limit_factors_1(df)

In [110]:
df.head(3)

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,idm_recommendation,idm_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag
73971,254722978849,822369,2020.0,2022-07-21,2022-07-21,1,1,1.0,relax_rules,4,Yes,1.0,22128407,Approve,0.0,True,254722978849.0,4.0,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1687.0,1.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 3,0.3,0.17,0.17,1.0,1.0,0.3,0.17,0.17,606.0,353.5,353.5,0.0,353.5,353.5,0,0,400,0
67827,254723173634,786152,1280.0,2022-07-15,2022-07-15,1,1,1.0,No_rules_relaxed,10,No,0.0,494024,Approve,0.0,True,254723173634.0,1.0,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0,0.25,0.06,0.04,0.04,80.0,48.0,48.0,0.0,0.0,0.0,0,0,0,1
1168,254723916436,165978,906714.4,2022-06-26,2022-07-24,29,28,0.97,No_rules_relaxed,1,Yes,1.0,20417564,Approve,0.0,True,254723916436.0,1.0,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1679.0,0.0,0.0,0.5,0.17,0.17,1.0,1.0,1.0,Band 7,Band 2,0.25,0.15,0.15,1.0,0.5,0.12,0.07,0.07,113339.3,68003.58,68003.58,0.0,68003.58,68003.58,0,68100,68100,1


In [111]:
df.shape

(53407, 62)

In [112]:
df[df['store_number'] == 7926899]

Unnamed: 0,mobile_number,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,national_id,idm_recommendation,idm_limit,is_iprs_validated,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag


In [113]:
df['idm_recommendation'].value_counts()

Reject     37088
Approve    16319
Name: idm_recommendation, dtype: int64

In [114]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

final_21_limit    592468100
final_7_limit     801434200
final_1_limit     804397800
dtype: int64

#### no new 21-day customers

In [115]:
host = '157.245.248.249'
port = int(5432)
dbname = 'ubuntu'
user = 'jacklinengenia'
password = 'x3MX&8#!'


def get_query_results_postgres():
    with psycopg2.connect(host = host,
                          port = port,
                          database = dbname,
                          user = user,
                          password = password) as conn:
        sql = "SELECT * FROM bloomlive.scoring_results"
        df = pd.read_sql(sql, conn)
        
    return df

    conn.close()
    
if __name__ == "__main__":
    get_query_results_postgres() 

In [None]:
scoring_results = get_query_results_postgres()

scoring_results.head(3)

In [None]:
scoring_results_sum = scoring_results.groupby(['store_number'], as_index=False)['final_21_limit'].sum()
scoring_results_sum = scoring_results_sum.rename(columns={'final_21_limit':'total_final_21_limit'})

scoring_results_sum.head(10)

In [None]:
df = pd.merge(df, scoring_results_sum, on = 'store_number', how = 'left')

df.head(3)

In [None]:
df.info()

In [None]:
cols_fillna = ["total_final_21_limit"]
# replace 'NaN' with zero in these columns
for col in cols_fillna:
    df[col].fillna(0,inplace=True)

In [None]:
df.info()

In [None]:
def calc_final_21_limit(df):
    total_final_21_limit = df['total_final_21_limit']
    final_21_limit = df['final_21_limit']

    if total_final_21_limit == 0:
        return 0
    elif total_final_21_limit > 0:
        return final_21_limit

In [None]:
df['final_21_limit'] = df.apply(lambda x: calc_final_21_limit(x), axis = 1)

In [None]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

#### cap limits by up to 25% of previous limit

In [None]:
#pull data from db so as to cap limits by up to 25% of previous limit

# Connection parameters
param_dic = {
    "host"      : "157.245.248.249",
    "database"  : "ubuntu",
    "user"      : "jacklinengenia",
    "password"  : "x3MX&8#!"
}

def connect(params_dic):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params_dic)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    print("Connection successful")
    return conn

In [None]:
def postgresql_to_dataframe(conn, select_query, column_names):
    """
    Tranform a SELECT query into a pandas dataframe
    """
    cursor = conn.cursor()
    try:
        cursor.execute(select_query)
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        cursor.close()
        return 1
    
    # Naturally we get a list of tupples
    tupples = cursor.fetchall()
    cursor.close()
    
    # Convert the list of tuples in to a pandas dataframe
    previous_results_df = pd.DataFrame(tupples, columns=column_names)
    return previous_results_df

In [None]:
# Connect to the database and load the scoring data bloomlive table
conn = connect(param_dic)

columns = "store_number, previous_21_limit, previous_7_limit, previous_1_limit"

column_names = columns.strip().split(",")
# Execute the "SELECT cols" query
previous_results_df = postgresql_to_dataframe(conn,
                             "select \
                             store_number, final_21_limit as previous_21_limit,final_7_limit as previous_7_limit,final_1_limit as previous_1_limit\
                             from bloomlive.scoring_results\
                             where model_version='2022-004[2022-05-14, 2022-06-30]'",
                             column_names)

#drop duplicates
previous_results_df = previous_results_df.drop_duplicates(subset="store_number", keep="first")

In [None]:
previous_results_df.head(3)

In [None]:
#convert store number col to str type

df["store_number"] = df["store_number"].astype("str")

In [None]:
#left merge with main df to assess for previous limit allocation

df = pd.merge(df, previous_results_df, how="left", on="store_number")

In [None]:
#strip out white spaces from column names

df = df.rename(columns=lambda x: x.strip())

In [None]:
df["previous_21_limit"].unique()

In [None]:
#convert object type columns to int

#df.loc[df["previous_21_limit"].notnull()].astype("float")
df["previous_21_limit"] = pd.to_numeric(df["previous_21_limit"], errors='coerce')
df["previous_7_limit"] = pd.to_numeric(df["previous_7_limit"], errors='coerce')
df["previous_1_limit"] = pd.to_numeric(df["previous_1_limit"], errors='coerce')

In [None]:
#fillna previous_limit column

cols_fillna = ["previous_21_limit"]
# replace 'NaN' with zero in these columns
for col in cols_fillna:
    df[col].fillna(0,inplace=True)

cols_fillna = ["previous_7_limit"]
# replace 'NaN' with zero in these columns
for col in cols_fillna:
    df[col].fillna(0,inplace=True)

cols_fillna = ["previous_1_limit"]
# replace 'NaN' with zero in these columns
for col in cols_fillna:
    df[col].fillna(0,inplace=True)

In [None]:
df.info()

In [None]:
df['previous_21_limit'] = df['previous_21_limit'].astype(int)
df['previous_7_limit'] = df['previous_7_limit'].astype(int)
df['previous_1_limit'] = df['previous_1_limit'].astype(int)


df['limit_21_cap'] = df['previous_21_limit'] * 1.25
df['limit_7_cap'] = df['previous_7_limit'] * 1.25
df['limit_1_cap'] = df['previous_1_limit'] * 1.25

# defining a function for the limit caps
def calc_final_21_limit(df):

    limit_21_cap = df['limit_21_cap']
    final_21_limit = df['final_21_limit']
    previous_21_limit = df['previous_21_limit']
  
    if previous_21_limit == 0:
        return final_21_limit
    elif final_21_limit <=  previous_21_limit:
        return final_21_limit
    elif final_21_limit >  previous_21_limit & final_21_limit < limit_21_cap:
        return final_21_limit
    elif final_21_limit >  previous_21_limit & final_21_limit >= limit_21_cap:
        return limit_21_cap
    else:
        return final_21_limit

    

def calc_final_7_limit(df):

    limit_7_cap = df['limit_7_cap']
    final_7_limit = df['final_7_limit']
    previous_7_limit = df['previous_7_limit']
  
    if previous_7_limit == 0:
        return final_7_limit
    elif final_7_limit <=  previous_7_limit:
        return final_7_limit
    elif final_7_limit >  previous_7_limit & final_7_limit < limit_7_cap:
        return final_7_limit
    elif final_7_limit >  previous_7_limit & final_7_limit >= limit_7_cap:
        return limit_7_cap
    else:
        return final_7_limit



def calc_final_1_limit(df):

    limit_1_cap = df['limit_1_cap']
    final_1_limit = df['final_1_limit']
    previous_1_limit = df['previous_1_limit']
  
    if previous_1_limit == 0:
        return final_1_limit
    elif final_1_limit <=  previous_1_limit:
        return final_1_limit
    elif final_1_limit >  previous_1_limit & final_1_limit < limit_1_cap:
        return final_1_limit
    elif final_1_limit >  previous_1_limit & final_1_limit >= limit_1_cap:
        return limit_1_cap
    else:
        return final_1_limit


In [None]:
df['final_21_limit'] = df.apply(lambda x: calc_final_21_limit(x), axis = 1)
df['final_7_limit'] = df.apply(lambda x: calc_final_7_limit(x), axis = 1)
df['final_1_limit'] = df.apply(lambda x: calc_final_1_limit(x), axis = 1)

In [None]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

In [None]:
df.shape

In [None]:
previous_results_df.head()

In [None]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

#### limits adjustments to merchants with DPD 30 plus

In [None]:
# Limit adjustment rule 1: Merchants that have DPD 30 plus on loans clients get a 50% limit reduction across all limits (1-day, 7-day and 21-day, subject to product minimum rules) 
def calc_final_21_limit(df):
    
    days_past_due = df["days_past_due"]
    final_21_limit = df["final_21_limit"]
    
    if days_past_due >= 30:
        return 0.5 * final_21_limit
    else:
        return final_21_limit
    
    
def calc_final_7_limit(df):
    
    days_past_due = df["days_past_due"]
    final_7_limit = df["final_7_limit"]
    
    if days_past_due >= 30:
        return 0.5 * final_7_limit
    else:
        return final_7_limit    
    
    
def calc_final_1_limit(df):
    
    days_past_due = df["days_past_due"]
    final_1_limit = df["final_1_limit"]
    
    if days_past_due >= 30:
        return 0.5 * final_1_limit
    else:
        return final_1_limit  

In [None]:
df['final_21_limit'] = df.apply(lambda x: calc_final_21_limit(x), axis = 1)
df['final_7_limit'] = df.apply(lambda x: calc_final_7_limit(x), axis = 1)
df['final_1_limit'] = df.apply(lambda x: calc_final_1_limit(x), axis = 1)

In [None]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

In [None]:
#apply ceiling 100 to final limit columns to adjust limits to the nearest 100 value 

df["final_21_limit"] = (np.ceil(df["final_21_limit"] / 100) * 100).astype(int)
df["final_7_limit"] = (np.ceil(df["final_7_limit"] / 100) * 100).astype(int)
df["final_1_limit"] = (np.ceil(df["final_1_limit"] / 100) * 100).astype(int)

In [None]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

In [None]:
# defining a function for the limit caps
def calc_final_21_limit(df):
    final_21_limit = df['final_21_limit']
  
    if final_21_limit < 1000:
        return 0
    elif final_21_limit > 200000:
        return 200000
    else:
        return final_21_limit


def calc_final_7_limit(df):
    final_7_limit = df['final_7_limit']
  
    if final_7_limit < 1000:
        return 0
    elif final_7_limit > 200000:
        return 200000
    else:
        return final_7_limit


def calc_final_1_limit(df):
    final_1_limit = df['final_1_limit']
  
    if final_1_limit < 200:
        return 0
    elif final_1_limit > 200000:
        return 200000
    else:
        return final_1_limit

In [None]:
df['final_21_limit'] = df.apply(lambda x: calc_final_21_limit(x), axis = 1)
df['final_7_limit'] = df.apply(lambda x: calc_final_7_limit(x), axis = 1)
df['final_1_limit'] = df.apply(lambda x: calc_final_1_limit(x), axis = 1)

In [None]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

###==> resume logic

In [None]:
# removing the test accounts from dataframe
removed_lst = ['573691', '219091', '884766', '735346', '5009587']
df = df[~df['store_number'].isin(removed_lst)]

In [None]:
df[df['store_number'] == 7926899]

In [None]:
def label_model(df):
    """
    function to label model version and track model changes i.e.\
    model index/rank e.g 2022_001,
    model_start_date e.g 2022,2,24 reported as year-month-day,
    model_latest_date e.g today() reported as year-month-day,
    this is full is combined to i,e 2022_001[2022-2-24, 2022-3-24]
    
    Inputs:
    Model start date
    Current latest refresh date for the model
    Model index/rank
    
    Outputs:
    model version that dynamically tracks the dates of refresh for a particulay model 
    
    """
    model_start_date = dt.datetime(2022,5,14)
    model_latest_date = (pd.Timestamp.today()).strftime("%Y-%m-%d")
    model_index = "2022-004"
    
    model_version  = model_index + "[" + model_start_date.strftime("%Y-%m-%d") + "," + " "+ model_latest_date +"]"
    
    return model_version


df["model_version"] = label_model(df)

In [None]:
#add created at column

model_latest_date = (pd.Timestamp.today()).strftime("%Y-%m-%d %H:%M:%S")

df["created_at"] = model_latest_date

#convert column to timestamp

df["created_at"] = df["created_at"].apply(pd.to_datetime, errors="coerce")


In [None]:
#add record_added_to_warehouse_on_timestamp column

model_latest_date = (pd.Timestamp.today()).strftime("%Y-%m-%d %H:%M:%S:%f")

df["record_added_to_warehouse_on_timestamp"] = model_latest_date

#convert column to timestamp

#f["created_at"] = df["created_at"].apply(pd.to_datetime, errors="coerce")
#record_added_to_warehouse_on_timestamp

In [None]:
df.shape

In [None]:
df.head(3)

In [None]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

In [None]:
df['idm_recommendation'].value_counts()

In [None]:
print("num of customers who've been scored: {}".format(df["store_number"].nunique()))
print("\n")
print("gross limit allocation for 21 day: {}".format(df["final_21_limit"].sum()))
print("gross num of store nums allocated 21 day limit: {}".format(df[df["final_21_limit"]>0]["store_number"].nunique()))
print("effective limit allocation for 21 day  with blacklist check: {}".format(df.loc[df["blacklist_flag"]==0]["final_21_limit"].sum()))
print("effective num of store nums allocated 21 day limit: {}".format(df.loc[(df["final_21_limit"]>0)&(df["blacklist_flag"]==0)]["store_number"].nunique()))
print("\n")
print("gross limit allocation for 7 day: {}".format(df["final_7_limit"].sum()))
print("gross num of store nums allocated 7 day limit: {}".format(df[df["final_7_limit"]>0]["store_number"].nunique()))
print("effective limit allocation for 7 day  with blacklist check: {}".format(df.loc[df["blacklist_flag"]==0]["final_7_limit"].sum()))
print("effective num of store nums allocated 7 day limit: {}".format(df.loc[(df["final_7_limit"]>0)&(df["blacklist_flag"]==0)]["store_number"].nunique()))
print("\n")
print("gross limit allocation for 1 day: {}".format(df["final_1_limit"].sum()))
print("gross num of store nums allocated 1 day limit: {}".format(df[df["final_1_limit"]>0]["store_number"].nunique()))
print("effective limit allocation for 1 day with blacklist check: {}".format(df.loc[df["blacklist_flag"]==0]["final_1_limit"].sum()))
print("effective num of store nums allocated 1 day limit: {}".format(df.loc[(df["final_1_limit"]>0)&(df["blacklist_flag"]==0)]["store_number"].nunique()))

In [None]:
df.head()

In [None]:
df.info()

In [None]:
cols_fillna = ["is_iprs_validated"]
# replace 'NaN' with zero in these columns
for col in cols_fillna:
    df[col].fillna('False',inplace=True)

In [None]:
df['is_iprs_validated'].value_counts()

In [None]:
# Limit adjustment rule 1: Merchants that have DPD 30 plus on loans clients get a 50% limit reduction across all limits (1-day, 7-day and 21-day, subject to product minimum rules) 
def calc_final_21_limit(df):
    
    is_iprs_validated = df["is_iprs_validated"]
    final_21_limit = df["final_21_limit"]
    
    if is_iprs_validated == 'False':
        return 0
    else:
        return final_21_limit
    
    
def calc_final_7_limit(df):
    
    is_iprs_validated = df["is_iprs_validated"]
    final_7_limit = df["final_7_limit"]
    
    if is_iprs_validated == 'False':
        return 0
    else:
        return final_7_limit    
    
    
def calc_final_1_limit(df):
    
    is_iprs_validated = df["is_iprs_validated"]
    final_1_limit = df["final_1_limit"]
    
    if is_iprs_validated == 'False':
        return 0
    else:
        return final_1_limit  

In [None]:
df['final_21_limit'] = df.apply(lambda x: calc_final_21_limit(x), axis = 1)
df['final_7_limit'] = df.apply(lambda x: calc_final_7_limit(x), axis = 1)
df['final_1_limit'] = df.apply(lambda x: calc_final_1_limit(x), axis = 1)

In [None]:
df[["final_21_limit","final_7_limit","final_1_limit"]].sum()

In [None]:
print("num of customers who've been scored: {}".format(df["store_number"].nunique()))
print("\n")
print("gross limit allocation for 21 day: {}".format(df["final_21_limit"].sum()))
print("gross num of store nums allocated 21 day limit: {}".format(df[df["final_21_limit"]>0]["store_number"].nunique()))
print("effective limit allocation for 21 day  with blacklist check: {}".format(df.loc[df["blacklist_flag"]==0]["final_21_limit"].sum()))
print("effective num of store nums allocated 21 day limit: {}".format(df.loc[(df["final_21_limit"]>0)&(df["blacklist_flag"]==0)]["store_number"].nunique()))
print("\n")
print("gross limit allocation for 7 day: {}".format(df["final_7_limit"].sum()))
print("gross num of store nums allocated 7 day limit: {}".format(df[df["final_7_limit"]>0]["store_number"].nunique()))
print("effective limit allocation for 7 day  with blacklist check: {}".format(df.loc[df["blacklist_flag"]==0]["final_7_limit"].sum()))
print("effective num of store nums allocated 7 day limit: {}".format(df.loc[(df["final_7_limit"]>0)&(df["blacklist_flag"]==0)]["store_number"].nunique()))
print("\n")
print("gross limit allocation for 1 day: {}".format(df["final_1_limit"].sum()))
print("gross num of store nums allocated 1 day limit: {}".format(df[df["final_1_limit"]>0]["store_number"].nunique()))
print("effective limit allocation for 1 day with blacklist check: {}".format(df.loc[df["blacklist_flag"]==0]["final_1_limit"].sum()))
print("effective num of store nums allocated 1 day limit: {}".format(df.loc[(df["final_1_limit"]>0)&(df["blacklist_flag"]==0)]["store_number"].nunique()))

In [None]:
df.to_excel("C:\\Project_summaries\\Bloom\\Bloom all_loans\\20220721\\Analysis_summaries\\Limits_refresh_summary_20220721_multiple_products.xlsx")

In [None]:
pd.DataFrame(bloom_customers_no_till, columns = ["national_id"]).to_excel("C:\\Project_summaries\\Bloom\\Bloom all_loans\\20220721\\Analysis_summaries\\"+"bloom_customers_no_till.xlsx")

In [None]:
excluded_first_time_df.to_excel("C:\\Project_summaries\\Bloom\\Bloom all_loans\\20220721\\Analysis_summaries\\excluded_new_customers_20220721.xlsx")

---
#### Add reasons for rejection

In [6]:
df.inference_col.unique()

array(['relax_rules', 'No_rules_relaxed'], dtype=object)

In [7]:
df.loc[(df["inference_col"]=="relax_rules")&(df["days_since_last_trx"]==7)].head()

Unnamed: 0.1,Unnamed: 0,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,idm_recommendation,idm_limit,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag,previous_21_limit,previous_7_limit,previous_1_limit,previous_limit_reason,model_version,created_at,record_added_to_warehouse_on_timestamp,national_id,mobile_number,ID Completeness Check,Mobile Number Completeness Check,KYC Completeness Check
170,170,896236,70.0,2022-06-22,2022-06-22,1,1,1.0,relax_rules,7,Yes,0.7,Reject,,254726230280.0,3,600.0,7.0,15000.0,15000.0,2018-06-22,2018-06-29,2018-06-24,2018-06-29,-5.0,1.0,closed_early_repayment,15000.0,2018-06-22,3.0,1.0,1467.0,1.0,0,0.35,0.12,0.12,0.7,0.71,0.71,Band 7,Band 3,0.3,0.17,0.17,1.0,0.93,0.19,0.11,0.11,13.6,8.0,8.0,0.0,8.0,8.0,0,0,0,0,,,,,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,896236,254722958092,complete,complete,KYC complete
273,273,646667,60.0,2022-06-22,2022-06-22,1,1,1.0,relax_rules,7,Yes,0.7,Reject,,254720374848.0,8,601.0,30.0,10000.0,0.0,2018-09-24,2018-10-24,2021-08-24,2018-10-24,1035.0,1.0,written-off_default,12698.0,2018-02-23,7.0,0.88,1373.0,0.0,0,0.35,0.12,0.12,0.7,0.71,0.71,Band 7,Band 5,0.4,0.23,0.23,1.0,0.65,0.18,0.1,0.1,10.84,6.15,6.15,0.0,6.15,6.15,0,0,0,1,,,,,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,646667,254720374848,complete,complete,KYC complete
1045,1045,627556,710.0,2022-06-20,2022-06-22,3,2,0.67,relax_rules,7,Yes,0.7,Reject,,254727575539.0,9,601.0,30.0,15000.0,0.0,2019-05-08,2019-06-07,2021-08-24,2019-06-07,809.0,1.0,written-off_default,50000.0,2018-09-05,7.0,0.78,1147.0,0.0,0,0.35,0.12,0.12,0.7,0.71,0.71,Band 4,Band 6,0.25,0.17,0.17,0.9,0.59,0.1,0.07,0.07,73.93,52.18,52.18,0.0,52.18,52.18,0,0,0,1,0.0,0.0,0.0,A2,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,627556,254727575539,complete,complete,KYC complete
1098,1098,527922,15840.0,2022-06-02,2022-06-22,21,7,0.33,relax_rules,7,Yes,0.7,Reject,,254722647233.0,3,600.0,7.0,5000.0,5000.0,2019-05-24,2019-05-31,2019-05-31,2019-05-31,0.0,1.0,closed_on_time,25000.0,2018-06-28,3.0,1.0,1131.0,1.0,0,0.35,0.12,0.12,0.7,0.71,0.71,Band 2,Band 3,0.05,0.07,0.07,0.0,0.68,0.02,0.04,0.04,374.22,566.05,566.05,0.0,566.05,566.05,0,0,600,0,0.0,0.0,900.0,B2,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,93800180,254721709157,complete,complete,KYC complete
1171,1171,801036,870.0,2022-06-04,2022-06-22,19,2,0.11,relax_rules,7,Yes,0.7,Reject,0.0,254716514795.0,21,601.0,30.0,25000.0,0.0,2019-06-07,2019-07-07,2021-08-24,2019-07-07,779.0,1.0,written-off_default,50000.0,2018-10-26,18.0,0.86,1117.0,0.0,0,0.35,0.12,0.12,0.7,0.71,0.71,Band 1,Band 8,0.2,0.15,0.15,0.0,0.39,0.05,0.04,0.04,47.5,35.93,35.93,0.0,35.93,35.93,0,0,0,1,0.0,0.0,0.0,A2,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,23562559,254716514795,complete,complete,KYC complete


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51050 entries, 0 to 51049
Data columns (total 72 columns):
 #   Column                                  Non-Null Count  Dtype         
---  ------                                  --------------  -----         
 0   Unnamed: 0                              51050 non-null  int64         
 1   store_number                            51050 non-null  int64         
 2   approx_30_days_trx_val                  51050 non-null  float64       
 3   most_recent_trx_date_past_30_days       51050 non-null  datetime64[ns]
 4   last_trx_date                           51050 non-null  datetime64[ns]
 5   expected_trx_days                       51050 non-null  int64         
 6   actual_trx_days                         51050 non-null  int64         
 7   page_active_days                        51050 non-null  float64       
 8   inference_col                           51050 non-null  object        
 9   days_since_last_trx                     51050 non-

In [12]:
def rejection_reasons(df):
    """
    """
    good_loans_repayment_ratio = df["good_loans_repayment_ratio"]
    days_past_due = df["days_past_due"]
    till_recency = df["transacted_last_5_days"]
    till_consistency = df["page_active_days"]
    inference_col = df["inference_col"]
    idm_recommendation = df["idm_recommendation"]
    loan_count = df["loan_count"]
    num_days_since_last_disbursement = df["num_days_since_last_disbursement"]
    initial_21_day_limit = df["limit_21_day"]
    initial_7_day_limit = df["limit_7_day"]
    initial_1_day_limit = df["limit_1_day"]
    kyc_completeness_check = df["KYC Completeness Check"]
    
    
    repayment_ratio_response = "bad repayment history:A1"
    dpd_response = "bad repayment history:A2"
    till_recency_response = "lower than expected trading activity:B1"
    till_consistency_response = "lower than expected trading activity:B2"
    idm_recommendation_response = "inadequate CRB risk profile:C1"
    num_days_since_last_disbursement_response = "insufficient recent credit activity:D1"
    trivial_limits_cut_off = "limit assigned less than product thresholds:E1"
    kyc_check = "limits zeroized due to incomplete KYC:H1"
    
    
    conditions = [
        loan_count.gt(0) & good_loans_repayment_ratio.lt(0.6),
        days_past_due.gt(30) & inference_col.str.match("No_rules_relaxed"),
        days_past_due.gt(41) & inference_col.str.match("relax_rules"),
        till_recency.str.match("No"),
        till_consistency.lt(0.7) & inference_col.str.match("No_rules_relaxed"),
        till_consistency.lt(0.49) & inference_col.str.match("relax_rules"),
        loan_count.lt(6) & idm_recommendation.str.match("Reject"),
        num_days_since_last_disbursement.gt(180),
        initial_21_day_limit.lt(1000) | initial_7_day_limit.lt(1000) | initial_7_day_limit.lt(200),
        kyc_completeness_check.str.match("KYC incomplete")
    ]
    
    choices = [
        repayment_ratio_response,
        dpd_response,
        dpd_response,
        till_recency_response,
        till_consistency_response,
        till_consistency_response,
        idm_recommendation_response,
        num_days_since_last_disbursement_response,
        trivial_limits_cut_off,
        kyc_check
    ]
    
    reasons_col = np.select(conditions, choices)
    
    return reasons_col

#apply function
df["rules_summary_narration"] = rejection_reasons(df)

#fix unassigned rows
df.loc[(df["rules_summary_narration"]=="0")&(df["blacklist_flag"]==1), "rules_summary_narration"] = "part of Mifos recon list:F1"
df.loc[(df["rules_summary_narration"]=="0")&(df["blacklist_flag"]==0), "rules_summary_narration"] = "all rules passed:G1"

In [13]:
df["rules_summary_narration"].unique()

array(['lower than expected trading activity:B1',
       'bad repayment history:A1',
       'lower than expected trading activity:B2',
       'inadequate CRB risk profile:C1',
       'insufficient recent credit activity:D1',
       'bad repayment history:A2', 'all rules passed:G1',
       'limit assigned less than product thresholds:E1',
       'limits zeroized due to incomplete KYC:H1',
       'part of Mifos recon list:F1'], dtype=object)

In [14]:
#split rules summary narration column to get split of narration and code

df[["rules_summary_narration","limit_reason"]] = df["rules_summary_narration"].astype("str").str.split(":", expand=True)

In [15]:
df.head(3)

Unnamed: 0.1,Unnamed: 0,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,idm_recommendation,idm_limit,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag,previous_21_limit,previous_7_limit,previous_1_limit,previous_limit_reason,model_version,created_at,record_added_to_warehouse_on_timestamp,national_id,mobile_number,ID Completeness Check,Mobile Number Completeness Check,KYC Completeness Check,rules_summary_narration,limit_reason
0,0,822369,470.0,2022-06-18,2022-06-18,1,1,1.0,relax_rules,11,No,0.0,Reject,,254722978849.0,4,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1661.0,1.0,0,0.35,0.12,0.12,0.7,0.71,0.71,Band 7,Band 3,0.3,0.17,0.17,1.0,0.75,0.16,0.09,0.09,74.02,43.54,43.54,0.0,43.54,43.54,0,0,0,0,0.0,0.0,0.0,C1,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,822369,254722978849,complete,complete,KYC complete,lower than expected trading activity,B1
1,1,786152,3190.0,2022-06-03,2022-06-24,22,9,0.41,No_rules_relaxed,5,Yes,1.0,Approve,0.0,254723173634.0,1,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1653.0,0.0,0,0.5,0.17,0.17,1.0,1.0,1.0,Band 2,Band 2,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,1,0.0,0.0,0.0,A1,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,786152,254723173634,complete,complete,KYC complete,bad repayment history,A1
2,2,165978,868610.98,2022-05-31,2022-06-28,29,29,1.0,No_rules_relaxed,1,Yes,1.0,Reject,,254723916436.0,1,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1653.0,0.0,0,0.35,0.12,0.12,0.7,0.71,0.71,Band 7,Band 2,0.25,0.15,0.15,1.0,0.5,0.09,0.05,0.05,76003.46,45985.29,45985.29,0.0,45985.29,45985.29,0,23000,23000,1,0.0,33500.0,33500.0,A1,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,165978,254723916436,complete,complete,KYC complete,bad repayment history,A1


In [16]:
df[df['store_number'] == '5028261']

Unnamed: 0.1,Unnamed: 0,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,idm_recommendation,idm_limit,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag,previous_21_limit,previous_7_limit,previous_1_limit,previous_limit_reason,model_version,created_at,record_added_to_warehouse_on_timestamp,national_id,mobile_number,ID Completeness Check,Mobile Number Completeness Check,KYC Completeness Check,rules_summary_narration,limit_reason


In [18]:
df.head()

Unnamed: 0.1,Unnamed: 0,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,idm_recommendation,idm_limit,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag,previous_21_limit,previous_7_limit,previous_1_limit,previous_limit_reason,model_version,created_at,record_added_to_warehouse_on_timestamp,national_id,mobile_number,ID Completeness Check,Mobile Number Completeness Check,KYC Completeness Check,rules_summary_narration,limit_reason
0,0,822369,470.0,2022-06-18,2022-06-18,1,1,1.0,relax_rules,11,No,0.0,Reject,,254722978849.0,4,600.0,7.0,5000.0,5000.0,2017-12-10,2017-12-17,2017-12-10,2017-12-17,-7.0,1.0,closed_early_repayment,5000.0,2017-12-10,4.0,1.0,1661.0,1.0,0,0.35,0.12,0.12,0.7,0.71,0.71,Band 7,Band 3,0.3,0.17,0.17,1.0,0.75,0.16,0.09,0.09,74.02,43.54,43.54,0.0,43.54,43.54,0,0,0,0,0.0,0.0,0.0,C1,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,822369,254722978849,complete,complete,KYC complete,lower than expected trading activity,B1
1,1,786152,3190.0,2022-06-03,2022-06-24,22,9,0.41,No_rules_relaxed,5,Yes,1.0,Approve,0.0,254723173634.0,1,601.0,30.0,12698.0,9206.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,12698.0,2017-12-18,0.0,0.0,1653.0,0.0,0,0.5,0.17,0.17,1.0,1.0,1.0,Band 2,Band 2,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,1,0.0,0.0,0.0,A1,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,786152,254723173634,complete,complete,KYC complete,bad repayment history,A1
2,2,165978,868610.98,2022-05-31,2022-06-28,29,29,1.0,No_rules_relaxed,1,Yes,1.0,Reject,,254723916436.0,1,601.0,30.0,20000.0,12800.0,2017-12-18,2018-01-17,2019-12-31,2018-01-17,713.0,1.0,written-off_default,20000.0,2017-12-18,0.0,0.0,1653.0,0.0,0,0.35,0.12,0.12,0.7,0.71,0.71,Band 7,Band 2,0.25,0.15,0.15,1.0,0.5,0.09,0.05,0.05,76003.46,45985.29,45985.29,0.0,45985.29,45985.29,0,23000,23000,1,0.0,33500.0,33500.0,A1,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,165978,254723916436,complete,complete,KYC complete,bad repayment history,A1
3,3,836820,60.0,2022-06-03,2022-06-03,1,1,1.0,No_rules_relaxed,26,No,0.0,Reject,,254721718637.0,1,601.0,30.0,12600.0,5669.0,2017-12-19,2018-01-18,2018-12-31,2018-01-18,347.0,1.0,written-off_default,12600.0,2017-12-19,0.0,0.0,1652.0,0.0,0,0.35,0.12,0.12,0.7,0.71,0.71,Band 7,Band 2,0.25,0.15,0.15,1.0,0.25,0.04,0.03,0.03,2.62,1.59,1.59,0.0,0.0,0.0,0,0,0,1,0.0,0.0,0.0,A1,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,836820,254721718637,complete,complete,KYC complete,bad repayment history,A1
4,4,745142,193884.81,2022-05-31,2022-06-28,29,28,0.97,No_rules_relaxed,1,Yes,1.0,Reject,,254720475133.0,1,601.0,30.0,28684.0,0.0,2017-12-21,2018-01-20,2019-12-31,2018-01-20,710.0,1.0,written-off_default,28684.0,2017-12-21,0.0,0.0,1650.0,0.0,0,0.35,0.12,0.12,0.7,0.71,0.71,Band 7,Band 2,0.25,0.15,0.15,1.0,0.5,0.09,0.05,0.05,16964.92,10264.49,10264.49,0.0,10264.49,10264.49,0,0,0,1,0.0,0.0,0.0,A1,"2022-004[2022-05-14, 2022-06-29]",2022-06-29 18:41:13,2022-06-29 18:41:17:704455,745142,254720475133,complete,complete,KYC complete,bad repayment history,A1


In [19]:
# including the test accounts with new limits assigned
df2 = pd.DataFrame({'store_number': ['573691', '219091', '884766', '735346', '5009587'],
                   'blacklist_flag': [0, 0, 0, 0, 0],
                   'final_21_limit': [1000, 1000, 1000, 1000, 1000],
                   'final_7_limit': [500, 500, 500, 500, 1000],
                   'final_1_limit': [300, 300, 300, 300, 1000]})

# adding the test accounts into the dataframe
#df = df.append(df2,ignore_index=True)

In [20]:
df2.head(3)

Unnamed: 0,store_number,blacklist_flag,final_21_limit,final_7_limit,final_1_limit
0,573691,0,1000,500,300
1,219091,0,1000,500,300
2,884766,0,1000,500,300


In [21]:
def label_model(df):
    """
    function to label model version and track model changes i.e.\
    model index/rank e.g 2022_001,
    model_start_date e.g 2022,2,24 reported as year-month-day,
    model_latest_date e.g today() reported as year-month-day,
    this is full is combined to i,e 2022_001[2022-2-24, 2022-3-24]
    
    Inputs:
    Model start date
    Current latest refresh date for the model
    Model index/rank
    
    Outputs:
    model version that dynamically tracks the dates of refresh for a particulay model 
    
    """
    model_start_date = dt.datetime(2022,5,14)
    model_latest_date = (pd.Timestamp.today()).strftime("%Y-%m-%d")
    model_index = "2022-004"
    
    model_version  = model_index + "[" + model_start_date.strftime("%Y-%m-%d") + "," + " "+ model_latest_date +"]"
    
    return model_version


df2["model_version"] = label_model(df)

In [22]:
#add created at column

model_latest_date = (pd.Timestamp.today()).strftime("%Y-%m-%d %H:%M:%S")

df2["created_at"] = model_latest_date

#convert column to timestamp

df2["created_at"] = df2["created_at"].apply(pd.to_datetime, errors="coerce")

In [23]:
#add record_added_to_warehouse_on_timestamp column

model_latest_date = (pd.Timestamp.today()).strftime("%Y-%m-%d %H:%M:%S:%f")

df2["record_added_to_warehouse_on_timestamp"] = model_latest_date

#convert column to timestamp

#f["created_at"] = df["created_at"].apply(pd.to_datetime, errors="coerce")
#record_added_to_warehouse_on_timestamp

In [24]:
df2.head(3)

Unnamed: 0,store_number,blacklist_flag,final_21_limit,final_7_limit,final_1_limit,model_version,created_at,record_added_to_warehouse_on_timestamp
0,573691,0,1000,500,300,"2022-004[2022-05-14, 2022-06-30]",2022-06-30 11:31:08,2022-06-30 11:31:09:674307
1,219091,0,1000,500,300,"2022-004[2022-05-14, 2022-06-30]",2022-06-30 11:31:08,2022-06-30 11:31:09:674307
2,884766,0,1000,500,300,"2022-004[2022-05-14, 2022-06-30]",2022-06-30 11:31:08,2022-06-30 11:31:09:674307


In [25]:
# adding the test accounts into the dataframe
df = df.append(df2,ignore_index=True)

In [26]:
df[df['store_number'] == '573691']

Unnamed: 0.1,Unnamed: 0,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,idm_recommendation,idm_limit,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag,previous_21_limit,previous_7_limit,previous_1_limit,previous_limit_reason,model_version,created_at,record_added_to_warehouse_on_timestamp,national_id,mobile_number,ID Completeness Check,Mobile Number Completeness Check,KYC Completeness Check,rules_summary_narration,limit_reason
51050,,573691,,NaT,NaT,,,,,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000,500,300,0,,,,,"2022-004[2022-05-14, 2022-06-30]",2022-06-30 11:31:08,2022-06-30 11:31:09:674307,,,,,,,


In [27]:
df[df['store_number'] == '219091']

Unnamed: 0.1,Unnamed: 0,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,idm_recommendation,idm_limit,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag,previous_21_limit,previous_7_limit,previous_1_limit,previous_limit_reason,model_version,created_at,record_added_to_warehouse_on_timestamp,national_id,mobile_number,ID Completeness Check,Mobile Number Completeness Check,KYC Completeness Check,rules_summary_narration,limit_reason
51051,,219091,,NaT,NaT,,,,,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000,500,300,0,,,,,"2022-004[2022-05-14, 2022-06-30]",2022-06-30 11:31:08,2022-06-30 11:31:09:674307,,,,,,,


In [28]:
df[df['store_number'] == '884766']

Unnamed: 0.1,Unnamed: 0,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,idm_recommendation,idm_limit,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag,previous_21_limit,previous_7_limit,previous_1_limit,previous_limit_reason,model_version,created_at,record_added_to_warehouse_on_timestamp,national_id,mobile_number,ID Completeness Check,Mobile Number Completeness Check,KYC Completeness Check,rules_summary_narration,limit_reason
51052,,884766,,NaT,NaT,,,,,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000,500,300,0,,,,,"2022-004[2022-05-14, 2022-06-30]",2022-06-30 11:31:08,2022-06-30 11:31:09:674307,,,,,,,


In [29]:
df[df['store_number'] == '735346']

Unnamed: 0.1,Unnamed: 0,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,idm_recommendation,idm_limit,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag,previous_21_limit,previous_7_limit,previous_1_limit,previous_limit_reason,model_version,created_at,record_added_to_warehouse_on_timestamp,national_id,mobile_number,ID Completeness Check,Mobile Number Completeness Check,KYC Completeness Check,rules_summary_narration,limit_reason
51053,,735346,,NaT,NaT,,,,,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000,500,300,0,,,,,"2022-004[2022-05-14, 2022-06-30]",2022-06-30 11:31:08,2022-06-30 11:31:09:674307,,,,,,,


In [30]:
df[df['store_number'] == '5009587']

Unnamed: 0.1,Unnamed: 0,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,idm_recommendation,idm_limit,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag,previous_21_limit,previous_7_limit,previous_1_limit,previous_limit_reason,model_version,created_at,record_added_to_warehouse_on_timestamp,national_id,mobile_number,ID Completeness Check,Mobile Number Completeness Check,KYC Completeness Check,rules_summary_narration,limit_reason
51054,,5009587,,NaT,NaT,,,,,,,,,,,,,,,,NaT,NaT,NaT,NaT,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,1000,1000,1000,0,,,,,"2022-004[2022-05-14, 2022-06-30]",2022-06-30 11:31:08,2022-06-30 11:31:09:674307,,,,,,,


In [31]:
df[df['store_number'] == '610896']

Unnamed: 0.1,Unnamed: 0,store_number,approx_30_days_trx_val,most_recent_trx_date_past_30_days,last_trx_date,expected_trx_days,actual_trx_days,page_active_days,inference_col,days_since_last_trx,transacted_last_5_days,weight_till_recency,idm_recommendation,idm_limit,client_mobile_number,loan_count,loan_status,term_frequency,principal_disbursed,principal_repaid,disbursed_on_date,expected_matured_on_date,closed_on_date,due_date_fixed,days_past_due,bloom_version,loan_repayment_status,max_principal_amount,max_loan_disbursement_date,count_good_loans,good_loans_repayment_ratio,num_days_since_last_disbursement,weight_dpd,adjusted_loan_count,limit_factor_21,limit_factor_7,limit_factor_1,idm_factor_21,idm_factor_7,idm_factor_1,trading_consistency_bands,loan_count_bands,new_limit_factor_21,new_limit_factor_7,new_limit_factor_1,weight_consistency,risk_rules_factor,ultimate_factor_21,ultimate_factor_7,ultimate_factor_1,limit_21_day,limit_7_day,limit_1_day,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,blacklist_flag,previous_21_limit,previous_7_limit,previous_1_limit,previous_limit_reason,model_version,created_at,record_added_to_warehouse_on_timestamp,national_id,mobile_number,ID Completeness Check,Mobile Number Completeness Check,KYC Completeness Check,rules_summary_narration,limit_reason


---
#### Push limits to DB ==> bloomlive.scoring_results

In [32]:
#temp_df = pd.read_excel("C:\\Project_summaries\\Bloom\\Bloom all_loans\\20220520\\Analysis_summaries\\db_upload_20220520.xlsx")

In [33]:
#get copy of df
scoring_results = df.copy()


#trim df to only match DB column names
db_cols = [
    "store_number", "national_id","final_21_limit",
    "final_7_limit","final_1_limit", "idm_recommendation",
    "limit_factor_21", "limit_factor_7", "limit_factor_1",
    "model_version", "blacklist_flag", "created_at", "record_added_to_warehouse_on_timestamp","limit_reason"
]

scoring_results = scoring_results[db_cols]

In [34]:
#convert store_number col to string type

scoring_results["store_number"] = scoring_results["store_number"].astype(str)

In [35]:
scoring_results.head(3)

Unnamed: 0,store_number,national_id,final_21_limit,final_7_limit,final_1_limit,idm_recommendation,limit_factor_21,limit_factor_7,limit_factor_1,model_version,blacklist_flag,created_at,record_added_to_warehouse_on_timestamp,limit_reason
0,822369,822369,0,0,0,Reject,0.35,0.12,0.12,"2022-004[2022-05-14, 2022-06-29]",0,2022-06-29 18:41:13,2022-06-29 18:41:17:704455,B1
1,786152,786152,0,0,0,Approve,0.5,0.17,0.17,"2022-004[2022-05-14, 2022-06-29]",1,2022-06-29 18:41:13,2022-06-29 18:41:17:704455,A1
2,165978,165978,0,23000,23000,Reject,0.35,0.12,0.12,"2022-004[2022-05-14, 2022-06-29]",1,2022-06-29 18:41:13,2022-06-29 18:41:17:704455,A1


In [36]:
scoring_results.shape

(51055, 14)

In [37]:
#function to write pandas scoring results df to db table 
def write_results_to_db(conn, df_final, table):
  
    tuples = [tuple(x) for x in df_final.to_numpy()]
  
    cols = ','.join(list(df_final.columns))
    # SQL query to execute
    query = "INSERT INTO %s(%s) VALUES %%s" % (table, cols)
    cursor = conn.cursor()
    try:
        extras.execute_values(cursor, query, tuples)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    print("the dataframe is inserted")
    cursor.close()
  
  
conn = psycopg2.connect(
    database="ubuntu", user="jacklinengenia", password="x3MX&8#!", host="157.245.248.249", port='5432'
)
  
df_final = scoring_results.copy()
  
write_results_to_db(conn, df_final, "bloomlive.scoring_results")

the dataframe is inserted


In [3]:
import requests
import datetime as dt
from dateutil import tz
from tzlocal import get_localzone
local_tz = get_localzone()
import datetime

In [4]:
model_version = '2022-004[2022-05-14, 2022-06-30]'

def trigger_bloom_limit_refresh_push(model_version, is_initial_run):
    response = requests.post(
        url='https://192.241.150.164/airflow/api/v1/dags/Bloom_limit_refresh_push/dagRuns',
        headers={
            'Content-type': 'application/json',
            'Accept': 'application/json'
        },
        json={
            "execution_date": str(datetime.datetime.now().replace(tzinfo=local_tz)),
            "conf": {
                'model_version': model_version,
                'is_initial_run': is_initial_run,
            }
        },
        auth=requests.auth.HTTPBasicAuth("jackline.ngenia@asantefinancegroup.com", '=tR"5TK(9)'),
        verify=False
    )
    
    print(response.status_code)
    print(response.text)

trigger_bloom_limit_refresh_push(model_version=model_version, is_initial_run='true')



200
{
  "conf": {
    "is_initial_run": "true",
    "model_version": "2022-004[2022-05-14, 2022-06-30]"
  },
  "dag_id": "Bloom_limit_refresh_push",
  "dag_run_id": "manual__2022-07-20T09:22:13.709387+03:00",
  "end_date": null,
  "execution_date": "2022-07-20T09:22:13.709387+03:00",
  "external_trigger": true,
  "logical_date": "2022-07-20T09:22:13.709387+03:00",
  "start_date": null,
  "state": "queued"
}



In [40]:
#convert store_number col to string type
df['final_21_limit'] = df['final_21_limit'].fillna(0)
df['final_7_limit'] = df['final_7_limit'].fillna(0)
df['final_1_limit'] = df['final_1_limit'].fillna(0)

df["most_recent_trx_date_past_30_days"] = df["most_recent_trx_date_past_30_days"].astype(str)
df["last_trx_date"] = df["last_trx_date"].astype(str)
df["national_id"] = df["national_id"].astype(str)
df["disbursed_on_date"] = df["disbursed_on_date"].astype(str)
df["expected_matured_on_date"] = df["expected_matured_on_date"].astype(str)
df["closed_on_date"] = df["closed_on_date"].astype(str)
df["due_date_fixed"] = df["due_date_fixed"].astype(str)
df["max_loan_disbursement_date"] = df["max_loan_disbursement_date"].astype(str)
df["due_date_fixed"] = df["due_date_fixed"].astype(str)
df["due_date_fixed"] = df["due_date_fixed"].astype(str)
df["created_at"] = df["created_at"].astype(str)
df["record_added_to_warehouse_on_timestamp"] = df["record_added_to_warehouse_on_timestamp"].astype(str)

df["adjusted_loan_count"] = df["adjusted_loan_count"].astype(float)
df["blacklist_flag"] = df["blacklist_flag"].astype(float)
df["due_date_fixed"] = df["due_date_fixed"].astype(str)

In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51055 entries, 0 to 51054
Data columns (total 74 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Unnamed: 0                              51050 non-null  float64
 1   store_number                            51055 non-null  object 
 2   approx_30_days_trx_val                  51050 non-null  float64
 3   most_recent_trx_date_past_30_days       51055 non-null  object 
 4   last_trx_date                           51055 non-null  object 
 5   expected_trx_days                       51050 non-null  float64
 6   actual_trx_days                         51050 non-null  float64
 7   page_active_days                        51050 non-null  float64
 8   inference_col                           51050 non-null  object 
 9   days_since_last_trx                     51050 non-null  float64
 10  transacted_last_5_days                  51050 non-null  ob

In [42]:
df.shape

(51055, 74)

In [48]:
#get copy of df
df = df.copy()


#trim df to only match DB column names
db_cols = [
    "store_number", "approx_30_days_trx_val", "most_recent_trx_date_past_30_days", "last_trx_date","expected_trx_days", "actual_trx_days", "page_active_days",
    "days_since_last_trx", "transacted_last_5_days", "weight_till_recency", "national_id", "client_mobile_number", "loan_count", "loan_status", "term_frequency",
    "principal_disbursed",  "principal_repaid", "disbursed_on_date", "expected_matured_on_date", "closed_on_date", "due_date_fixed", "days_past_due", "bloom_version",
    "loan_repayment_status", "max_principal_amount", "max_loan_disbursement_date", "count_good_loans", "good_loans_repayment_ratio", "num_days_since_last_disbursement",
    "weight_dpd", "adjusted_loan_count", "limit_21_day", "limit_7_day", "limit_1_day", "adjusted_21_limit", "adjusted_7_limit", "adjusted_1_limit", "final_21_limit",
    "final_7_limit", "final_1_limit", "model_version", "blacklist_flag", "limit_factor_21", "limit_factor_7", "limit_factor_1", "created_at", 
    "record_added_to_warehouse_on_timestamp"
]

df = df[db_cols]

In [49]:
df.shape

(51055, 47)

In [50]:
df.columns

Index(['store_number', 'approx_30_days_trx_val',
       'most_recent_trx_date_past_30_days', 'last_trx_date',
       'expected_trx_days', 'actual_trx_days', 'page_active_days',
       'days_since_last_trx', 'transacted_last_5_days', 'weight_till_recency',
       'national_id', 'client_mobile_number', 'loan_count', 'loan_status',
       'term_frequency', 'principal_disbursed', 'principal_repaid',
       'disbursed_on_date', 'expected_matured_on_date', 'closed_on_date',
       'due_date_fixed', 'days_past_due', 'bloom_version',
       'loan_repayment_status', 'max_principal_amount',
       'max_loan_disbursement_date', 'count_good_loans',
       'good_loans_repayment_ratio', 'num_days_since_last_disbursement',
       'weight_dpd', 'adjusted_loan_count', 'limit_21_day', 'limit_7_day',
       'limit_1_day', 'adjusted_21_limit', 'adjusted_7_limit',
       'adjusted_1_limit', 'final_21_limit', 'final_7_limit', 'final_1_limit',
       'model_version', 'blacklist_flag', 'limit_factor_21', 'lim

In [51]:
#db connection libraries
import psycopg2
from sqlalchemy import create_engine
import psycopg2.extras as extras

In [52]:
#function to write pandas scoring results df to db table 
def write_results_to_db(conn, df_final, table):
  
    tuples = [tuple(x) for x in df_final.to_numpy()]
  
    cols = ','.join(list(df_final.columns))
    # SQL query to execute
    query = "INSERT INTO %s(%s) VALUES %%s" % (table, cols)
    cursor = conn.cursor()
    try:
        extras.execute_values(cursor, query, tuples)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    print("the dataframe is inserted")
    cursor.close()
  
  
conn = psycopg2.connect(
    database="ubuntu", user="jacklinengenia", password="x3MX&8#!", host="157.245.248.249", port='5432'
)
  
df_final = scoring_results.copy()
  
write_results_to_db(conn, df_final, "bloomlive.temp_scoring_summaries")

Error: column "idm_recommendation" of relation "temp_scoring_summaries" does not exist
LINE 1: ...nal_id,final_21_limit,final_7_limit,final_1_limit,idm_recomm...
                                                             ^



1