In [1]:
#import data processing libraries
import os
import pandas as pd
import numpy as np
import math as math
import datetime as dt
from scipy import stats

#import visualization libraries
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
sns.set_style('darkgrid')
%matplotlib inline

#db connection libraries
import psycopg2
from sqlalchemy import create_engine
import psycopg2.extras as extras

In [2]:
pd.set_option('display.max_colwidth', 500)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 300)

#suppress scientific notation to 2 decimal places
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [3]:
host = '157.245.248.249'
port = int(5432)
dbname = 'ubuntu'
user = 'jacklinengenia'
password = 'x3MX&8#!'


def get_query_results_postgres():
    with psycopg2.connect(host = host,
                          port = port,
                          database = dbname,
                          user = user,
                          password = password) as conn:
        sql = 'select loan_mifos_id, store_number, loan_status, term_frequency, principal_disbursed, principal_outstanding, interest_outstanding, fee_charges_charged, fee_charges_outstanding, penalty_charges_charged, penalty_charges_outstanding, total_expected_repayment, total_repayment, total_outstanding, safaricom_loan_balance, bloom_version, disbursed_on_date, expected_matured_on_date, end_rollvr_dt, dpd_30, dpd_d60, dpd_d90 from bloomlive.loans_fact_table_materialized_summary_view'
        df = pd.read_sql(sql, conn)
        
    return df

    conn.close()

In [4]:
df_loans = get_query_results_postgres()

df_loans.head(3)



Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90
0,98788,7191795,600,7,54600.0,0.0,0.0,0.0,0.0,0.0,0.0,56019.61,56019.61,0.0,0.0,2.0,2022-05-19,2022-05-26,2022-05-29,2022-06-28,2022-07-28,2022-08-27
1,98686,7498133,600,7,3300.0,0.0,0.0,0.0,0.0,0.0,0.0,3385.81,3385.81,0.0,0.0,2.0,2022-05-19,2022-05-26,2022-05-29,2022-06-28,2022-07-28,2022-08-27
2,101319,799916,600,21,160000.0,0.0,0.0,0.0,0.0,0.0,0.0,172160.01,172160.01,0.0,0.0,2.0,2022-05-20,2022-06-10,2022-06-15,2022-07-15,2022-08-14,2022-09-13


In [5]:
df_loans.shape

(474953, 22)

In [6]:
#create loan count column i.e adds a new column that captures the num of loans a customer has taken
df_loans["loan_count"] = df_loans.groupby("store_number")["store_number"].transform('size')

df_loans.head(3)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count
0,98788,7191795,600,7,54600.0,0.0,0.0,0.0,0.0,0.0,0.0,56019.61,56019.61,0.0,0.0,2.0,2022-05-19,2022-05-26,2022-05-29,2022-06-28,2022-07-28,2022-08-27,3.0
1,98686,7498133,600,7,3300.0,0.0,0.0,0.0,0.0,0.0,0.0,3385.81,3385.81,0.0,0.0,2.0,2022-05-19,2022-05-26,2022-05-29,2022-06-28,2022-07-28,2022-08-27,5.0
2,101319,799916,600,21,160000.0,0.0,0.0,0.0,0.0,0.0,0.0,172160.01,172160.01,0.0,0.0,2.0,2022-05-20,2022-06-10,2022-06-15,2022-07-15,2022-08-14,2022-09-13,26.0


In [7]:
#converting the current time to datetime format
curr_time = pd.to_datetime('today')

df_loans['disbursed_on_date'] = pd.to_datetime(df_loans['disbursed_on_date'], errors='coerce')
df_loans['end_rollvr_dt'] = pd.to_datetime(df_loans['end_rollvr_dt'], errors='coerce')
df_loans['expected_matured_on_date'] = pd.to_datetime(df_loans['expected_matured_on_date'], errors='coerce')

In [8]:
def saf_loan_balance(df):
    
    total_outstanding = df['total_outstanding']
    safaricom_loan_balance = df['safaricom_loan_balance']
    
    if safaricom_loan_balance >= 0:
        return safaricom_loan_balance
    else:
        return total_outstanding

In [9]:
df_loans['safaricom_loan_balance'] = df_loans.apply(lambda x: saf_loan_balance(x), axis = 1)

df_loans.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count
0,98788,7191795,600,7,54600.0,0.0,0.0,0.0,0.0,0.0,0.0,56019.61,56019.61,0.0,0.0,2.0,2022-05-19,2022-05-26,2022-05-29,2022-06-28,2022-07-28,2022-08-27,3.0
1,98686,7498133,600,7,3300.0,0.0,0.0,0.0,0.0,0.0,0.0,3385.81,3385.81,0.0,0.0,2.0,2022-05-19,2022-05-26,2022-05-29,2022-06-28,2022-07-28,2022-08-27,5.0
2,101319,799916,600,21,160000.0,0.0,0.0,0.0,0.0,0.0,0.0,172160.01,172160.01,0.0,0.0,2.0,2022-05-20,2022-06-10,2022-06-15,2022-07-15,2022-08-14,2022-09-13,26.0
3,110344,7276538,300,7,2500.0,2500.0,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.0,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.0
4,108190,7376448,300,7,3400.0,3400.0,88.41,85.0,85.0,1562.05,1500.82,5135.46,61.23,5074.23,5593.73,2.0,2022-05-27,2022-06-03,2022-06-06,2022-07-06,2022-08-05,2022-09-04,1.0


In [10]:
def cal_loan_count_flag(df):
    
    loan_count = df['loan_count']
    
    if loan_count == 1:
        return 'New Client'
    else:
        return 'Repeat Client'

In [11]:
df_loans['Loan Count Flag'] = df_loans.apply(lambda x: cal_loan_count_flag(x), axis = 1)

df_loans.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag
0,98788,7191795,600,7,54600.0,0.0,0.0,0.0,0.0,0.0,0.0,56019.61,56019.61,0.0,0.0,2.0,2022-05-19,2022-05-26,2022-05-29,2022-06-28,2022-07-28,2022-08-27,3.0,Repeat Client
1,98686,7498133,600,7,3300.0,0.0,0.0,0.0,0.0,0.0,0.0,3385.81,3385.81,0.0,0.0,2.0,2022-05-19,2022-05-26,2022-05-29,2022-06-28,2022-07-28,2022-08-27,5.0,Repeat Client
2,101319,799916,600,21,160000.0,0.0,0.0,0.0,0.0,0.0,0.0,172160.01,172160.01,0.0,0.0,2.0,2022-05-20,2022-06-10,2022-06-15,2022-07-15,2022-08-14,2022-09-13,26.0,Repeat Client
3,110344,7276538,300,7,2500.0,2500.0,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.0,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.0,Repeat Client
4,108190,7376448,300,7,3400.0,3400.0,88.41,85.0,85.0,1562.05,1500.82,5135.46,61.23,5074.23,5593.73,2.0,2022-05-27,2022-06-03,2022-06-06,2022-07-06,2022-08-05,2022-09-04,1.0,New Client


In [12]:
current_defaulters = df_loans[(df_loans['penalty_charges_charged'] > 0) & (df_loans['total_outstanding'] > 0) & (df_loans['safaricom_loan_balance'] > 0) & (df_loans['loan_status'] == 300)]

current_defaulters.shape

(26251, 24)

In [13]:
defaulters = df_loans[(df_loans['penalty_charges_charged'] > 0)]

defaulters.shape

(67303, 24)

In [14]:
defaulters_bloom_one = defaulters[(defaulters['bloom_version'] == 1)]

defaulters_bloom_one.shape

(21120, 24)

In [15]:
defaulters_bloom_two = defaulters[(defaulters['bloom_version'] == 2)]

defaulters_bloom_two.shape

(46183, 24)

In [16]:
defaulters_bloom_two[(defaulters_bloom_two['store_number'] == '7468569')]

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag
68394,106687,7468569,600,21,8500.0,0.0,0.0,149.14,0.0,3.31,0.0,9298.46,9298.46,0.0,0.0,2.0,2022-05-26,2022-06-16,2022-06-21,2022-07-21,2022-08-20,2022-09-19,21.0,Repeat Client
440914,127075,7468569,300,21,7000.0,34.29,0.0,24.0,0.0,10.28,0.0,7566.29,7532.0,34.29,0.0,2.0,2022-06-17,2022-07-08,2022-07-13,2022-08-12,2022-09-11,2022-10-11,21.0,Repeat Client
469253,228595,7468569,300,21,7400.0,164.72,0.0,150.0,0.0,14.72,0.0,8239.6,8074.88,164.72,0.0,2.0,2022-10-12,2022-11-02,2022-11-07,2022-12-07,2023-01-06,2023-02-05,21.0,Repeat Client


In [17]:
# grouping store_number to find the count of bloom one defaulters
defaulters_bloom_one_count = defaulters_bloom_one.groupby(['store_number'], as_index=False)['loan_mifos_id'].count()
defaulters_bloom_one_count = defaulters_bloom_one_count.rename(columns={'loan_mifos_id':'defaulters_bloom_one_count'})

defaulters_bloom_one_count.head()

Unnamed: 0,store_number,defaulters_bloom_one_count
0,58169,1
1,101705,1
2,103914,1
3,105639,1
4,105649,1


In [18]:
# grouping store_number to find the count of bloom two defaulters
defaulters_bloom_two_count = defaulters_bloom_two.groupby(['store_number'], as_index=False)['loan_mifos_id'].count()
defaulters_bloom_two_count = defaulters_bloom_two_count.rename(columns={'loan_mifos_id':'defaulters_bloom_two_count'})

defaulters_bloom_two_count.head()

Unnamed: 0,store_number,defaulters_bloom_two_count
0,30,1
1,3227,5
2,58133,1
3,105570,1
4,105652,3


In [19]:
defaulters_count = pd.merge(defaulters_bloom_one_count, defaulters_bloom_two_count, on = 'store_number', how = 'outer')

defaulters_count = defaulters_count.fillna(0)

defaulters_count.head(15)

Unnamed: 0,store_number,defaulters_bloom_one_count,defaulters_bloom_two_count
0,58169,1.0,0.0
1,101705,1.0,0.0
2,103914,1.0,0.0
3,105639,1.0,0.0
4,105649,1.0,0.0
5,105754,1.0,0.0
6,110138,2.0,0.0
7,110169,1.0,1.0
8,110398,1.0,0.0
9,110501,1.0,2.0


In [20]:
current_defaulters['loan_status'].value_counts()

300    26251
Name: loan_status, dtype: int64

In [21]:
current_defaulters_bloom_one = current_defaulters[current_defaulters['bloom_version'] == 1]

current_defaulters_bloom_one.shape

(5735, 24)

In [22]:
current_defaulters_bloom_one.head()

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag
751,230443,257900,300,30,62000.0,62000.0,4340.0,3649.0,3649.0,20181.0,12681.0,90170.0,7500.0,82670.0,82670.0,1.0,2021-11-05,2021-12-05,NaT,,,,53.0,Repeat Client
942,230662,540238,300,30,21000.0,21000.0,1470.0,1181.0,1181.0,6784.0,4744.0,30435.0,2040.0,28395.0,28395.0,1.0,2021-11-06,2021-12-06,NaT,,,,3.0,Repeat Client
984,230576,919946,300,30,25000.0,25000.0,1750.0,1472.0,1472.0,8327.0,8327.0,36549.0,0.0,36549.0,36549.0,1.0,2021-11-05,2021-12-05,NaT,,,,2.0,Repeat Client
1828,231651,607332,300,30,18900.0,18900.0,1323.0,948.0,948.0,5363.0,2363.0,26534.0,3000.0,23534.0,23534.0,1.0,2021-11-10,2021-12-10,NaT,,,,7.0,Repeat Client
4744,230995,819715,300,30,40000.0,40000.0,2800.0,2310.0,2310.0,12894.0,9137.0,58004.0,3757.0,54247.0,54247.0,1.0,2021-11-08,2021-12-08,NaT,,,,2.0,Repeat Client


In [23]:
current_defaulters_bloom_one = pd.merge(current_defaulters_bloom_one, defaulters_bloom_one_count, on = 'store_number', how = 'left')

current_defaulters_bloom_one.head()

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_one_count
0,230443,257900,300,30,62000.0,62000.0,4340.0,3649.0,3649.0,20181.0,12681.0,90170.0,7500.0,82670.0,82670.0,1.0,2021-11-05,2021-12-05,NaT,,,,53.0,Repeat Client,1
1,230662,540238,300,30,21000.0,21000.0,1470.0,1181.0,1181.0,6784.0,4744.0,30435.0,2040.0,28395.0,28395.0,1.0,2021-11-06,2021-12-06,NaT,,,,3.0,Repeat Client,1
2,230576,919946,300,30,25000.0,25000.0,1750.0,1472.0,1472.0,8327.0,8327.0,36549.0,0.0,36549.0,36549.0,1.0,2021-11-05,2021-12-05,NaT,,,,2.0,Repeat Client,1
3,231651,607332,300,30,18900.0,18900.0,1323.0,948.0,948.0,5363.0,2363.0,26534.0,3000.0,23534.0,23534.0,1.0,2021-11-10,2021-12-10,NaT,,,,7.0,Repeat Client,1
4,230995,819715,300,30,40000.0,40000.0,2800.0,2310.0,2310.0,12894.0,9137.0,58004.0,3757.0,54247.0,54247.0,1.0,2021-11-08,2021-12-08,NaT,,,,2.0,Repeat Client,1


In [24]:
def cal_first_bloom_one_default(df):
    
    defaulters_bloom_one_count = df['defaulters_bloom_one_count']
    
    if defaulters_bloom_one_count == 1:
        return 'Bloom One First Default'
    else:
        return 'Bloom One Subsequent Default'

In [25]:
current_defaulters_bloom_one['Default Flag'] = current_defaulters_bloom_one.apply(lambda x: cal_first_bloom_one_default(x), axis = 1)

current_defaulters_bloom_one.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_one_count,Default Flag
0,230443,257900,300,30,62000.0,62000.0,4340.0,3649.0,3649.0,20181.0,12681.0,90170.0,7500.0,82670.0,82670.0,1.0,2021-11-05,2021-12-05,NaT,,,,53.0,Repeat Client,1,Bloom One First Default
1,230662,540238,300,30,21000.0,21000.0,1470.0,1181.0,1181.0,6784.0,4744.0,30435.0,2040.0,28395.0,28395.0,1.0,2021-11-06,2021-12-06,NaT,,,,3.0,Repeat Client,1,Bloom One First Default
2,230576,919946,300,30,25000.0,25000.0,1750.0,1472.0,1472.0,8327.0,8327.0,36549.0,0.0,36549.0,36549.0,1.0,2021-11-05,2021-12-05,NaT,,,,2.0,Repeat Client,1,Bloom One First Default
3,231651,607332,300,30,18900.0,18900.0,1323.0,948.0,948.0,5363.0,2363.0,26534.0,3000.0,23534.0,23534.0,1.0,2021-11-10,2021-12-10,NaT,,,,7.0,Repeat Client,1,Bloom One First Default
4,230995,819715,300,30,40000.0,40000.0,2800.0,2310.0,2310.0,12894.0,9137.0,58004.0,3757.0,54247.0,54247.0,1.0,2021-11-08,2021-12-08,NaT,,,,2.0,Repeat Client,1,Bloom One First Default


In [26]:
current_defaulters_bloom_one['Default Flag'].value_counts()

Bloom One First Default         4433
Bloom One Subsequent Default    1302
Name: Default Flag, dtype: int64

In [27]:
current_defaulters_bloom_two = current_defaulters[current_defaulters['bloom_version'] == 2]

current_defaulters_bloom_two.shape

(20516, 24)

In [28]:
current_defaulters_bloom_two = pd.merge(current_defaulters_bloom_two, defaulters_bloom_two_count, on = 'store_number', how = 'left')

current_defaulters_bloom_two.head()

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count
0,110344,7276538,300,7,2500.0,2500.0,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.0,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.0,Repeat Client,1
1,108190,7376448,300,7,3400.0,3400.0,88.41,85.0,85.0,1562.05,1500.82,5135.46,61.23,5074.23,5593.73,2.0,2022-05-27,2022-06-03,2022-06-06,2022-07-06,2022-08-05,2022-09-04,1.0,New Client,1
2,156885,7278403,300,21,7900.0,7900.0,600.41,135.33,135.33,4071.57,4071.57,12707.31,0.0,12707.31,13467.12,2.0,2022-07-15,2022-08-05,2022-08-10,2022-09-09,2022-10-09,2022-11-08,17.0,Repeat Client,5
3,179316,7593440,300,7,1700.0,831.41,0.0,92.13,0.0,465.07,0.0,2301.41,1470.0,831.41,812.02,2.0,2022-07-28,2022-08-04,2022-08-07,2022-09-06,2022-10-06,2022-11-05,3.0,Repeat Client,3
4,200776,7270475,300,7,6500.0,49.71,0.0,201.08,0.0,49.71,0.0,6953.59,6903.88,49.71,6903.88,2.0,2022-08-27,2022-09-03,2022-09-06,2022-10-06,2022-11-05,2022-12-05,4.0,Repeat Client,3


In [29]:
current_defaulters_bloom_two[(current_defaulters_bloom_two['store_number'] == '7468569')]

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count


In [30]:
def cal_first_bloom_two_default(df):
    
    defaulters_bloom_two_count = df['defaulters_bloom_two_count']
    
    if defaulters_bloom_two_count == 1:
        return 'Bloom Two First Default'
    else:
        return 'Bloom Two Subsequent Default'

In [31]:
current_defaulters_bloom_two['Default Flag'] = current_defaulters_bloom_two.apply(lambda x: cal_first_bloom_two_default(x), axis = 1)

current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag
0,110344,7276538,300,7,2500.0,2500.0,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.0,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.0,Repeat Client,1,Bloom Two First Default
1,108190,7376448,300,7,3400.0,3400.0,88.41,85.0,85.0,1562.05,1500.82,5135.46,61.23,5074.23,5593.73,2.0,2022-05-27,2022-06-03,2022-06-06,2022-07-06,2022-08-05,2022-09-04,1.0,New Client,1,Bloom Two First Default
2,156885,7278403,300,21,7900.0,7900.0,600.41,135.33,135.33,4071.57,4071.57,12707.31,0.0,12707.31,13467.12,2.0,2022-07-15,2022-08-05,2022-08-10,2022-09-09,2022-10-09,2022-11-08,17.0,Repeat Client,5,Bloom Two Subsequent Default
3,179316,7593440,300,7,1700.0,831.41,0.0,92.13,0.0,465.07,0.0,2301.41,1470.0,831.41,812.02,2.0,2022-07-28,2022-08-04,2022-08-07,2022-09-06,2022-10-06,2022-11-05,3.0,Repeat Client,3,Bloom Two Subsequent Default
4,200776,7270475,300,7,6500.0,49.71,0.0,201.08,0.0,49.71,0.0,6953.59,6903.88,49.71,6903.88,2.0,2022-08-27,2022-09-03,2022-09-06,2022-10-06,2022-11-05,2022-12-05,4.0,Repeat Client,3,Bloom Two Subsequent Default


In [32]:
current_defaulters_bloom_two['Default Flag'].value_counts()

Bloom Two First Default         13631
Bloom Two Subsequent Default     6885
Name: Default Flag, dtype: int64

In [33]:
current_defaulters_bloom_two.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20516 entries, 0 to 20515
Data columns (total 26 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   loan_mifos_id                20516 non-null  int64         
 1   store_number                 20516 non-null  object        
 2   loan_status                  20516 non-null  int64         
 3   term_frequency               20516 non-null  int64         
 4   principal_disbursed          20516 non-null  float64       
 5   principal_outstanding        20516 non-null  float64       
 6   interest_outstanding         20516 non-null  float64       
 7   fee_charges_charged          20516 non-null  float64       
 8   fee_charges_outstanding      20516 non-null  float64       
 9   penalty_charges_charged      20516 non-null  float64       
 10  penalty_charges_outstanding  20516 non-null  float64       
 11  total_expected_repayment     20516 non-nu

In [34]:
def cal_occurred_after_first_limit_reductions(df):
    
    expected_matured_on_date = df['expected_matured_on_date']
    disbursed_on_date = df['disbursed_on_date']
    
    if ((disbursed_on_date <= pd.to_datetime('2022-07-27')) and (expected_matured_on_date > pd.to_datetime('2022-07-27'))):
        return 'Default occurred after first limit reductions'
    else:
        return 'Default occurred before first limit reductions'

In [35]:
current_defaulters_bloom_two['First Limit Reductions Flag'] = current_defaulters_bloom_two.apply(lambda x: cal_occurred_after_first_limit_reductions(x), axis = 1)

current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag
0,110344,7276538,300,7,2500.0,2500.0,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.0,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions
1,108190,7376448,300,7,3400.0,3400.0,88.41,85.0,85.0,1562.05,1500.82,5135.46,61.23,5074.23,5593.73,2.0,2022-05-27,2022-06-03,2022-06-06,2022-07-06,2022-08-05,2022-09-04,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions
2,156885,7278403,300,21,7900.0,7900.0,600.41,135.33,135.33,4071.57,4071.57,12707.31,0.0,12707.31,13467.12,2.0,2022-07-15,2022-08-05,2022-08-10,2022-09-09,2022-10-09,2022-11-08,17.0,Repeat Client,5,Bloom Two Subsequent Default,Default occurred after first limit reductions
3,179316,7593440,300,7,1700.0,831.41,0.0,92.13,0.0,465.07,0.0,2301.41,1470.0,831.41,812.02,2.0,2022-07-28,2022-08-04,2022-08-07,2022-09-06,2022-10-06,2022-11-05,3.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions
4,200776,7270475,300,7,6500.0,49.71,0.0,201.08,0.0,49.71,0.0,6953.59,6903.88,49.71,6903.88,2.0,2022-08-27,2022-09-03,2022-09-06,2022-10-06,2022-11-05,2022-12-05,4.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions


In [36]:
def cal_occurred_after_second_limit_reductions(df):
    
    expected_matured_on_date = df['expected_matured_on_date']
    disbursed_on_date = df['disbursed_on_date']
    
    if ((disbursed_on_date <= pd.to_datetime('2022-08-03')) and (expected_matured_on_date > pd.to_datetime('2022-08-03'))):
        return 'Default occurred after second limit reductions'
    else:
        return 'Default occurred before second limit reductions'

In [37]:
current_defaulters_bloom_two['Second Limit Reductions Flag'] = current_defaulters_bloom_two.apply(lambda x: cal_occurred_after_second_limit_reductions(x), axis = 1)

current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag
0,110344,7276538,300,7,2500.0,2500.0,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.0,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions
1,108190,7376448,300,7,3400.0,3400.0,88.41,85.0,85.0,1562.05,1500.82,5135.46,61.23,5074.23,5593.73,2.0,2022-05-27,2022-06-03,2022-06-06,2022-07-06,2022-08-05,2022-09-04,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions
2,156885,7278403,300,21,7900.0,7900.0,600.41,135.33,135.33,4071.57,4071.57,12707.31,0.0,12707.31,13467.12,2.0,2022-07-15,2022-08-05,2022-08-10,2022-09-09,2022-10-09,2022-11-08,17.0,Repeat Client,5,Bloom Two Subsequent Default,Default occurred after first limit reductions,Default occurred after second limit reductions
3,179316,7593440,300,7,1700.0,831.41,0.0,92.13,0.0,465.07,0.0,2301.41,1470.0,831.41,812.02,2.0,2022-07-28,2022-08-04,2022-08-07,2022-09-06,2022-10-06,2022-11-05,3.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred after second limit reductions
4,200776,7270475,300,7,6500.0,49.71,0.0,201.08,0.0,49.71,0.0,6953.59,6903.88,49.71,6903.88,2.0,2022-08-27,2022-09-03,2022-09-06,2022-10-06,2022-11-05,2022-12-05,4.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions


In [38]:
current_defaulters_bloom_two['First Limit Reductions Flag'].value_counts()

Default occurred before first limit reductions    19460
Default occurred after first limit reductions      1056
Name: First Limit Reductions Flag, dtype: int64

In [39]:
current_defaulters_bloom_two['Second Limit Reductions Flag'].value_counts()

Default occurred before second limit reductions    19403
Default occurred after second limit reductions      1113
Name: Second Limit Reductions Flag, dtype: int64

In [40]:
current_defaulters_bloom_two['Loan Count Flag'].value_counts()

Repeat Client    15789
New Client        4727
Name: Loan Count Flag, dtype: int64

In [41]:
def saf_loan_balance(df):
    
    safaricom_loan_balance = df['safaricom_loan_balance']
    
    if safaricom_loan_balance >= 0:
        return 'yes'
    else:
        return 'no'

In [42]:
current_defaulters_bloom_two['saf_loan_balance'] = current_defaulters_bloom_two.apply(lambda x: saf_loan_balance(x), axis = 1)

current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance
0,110344,7276538,300,7,2500.0,2500.0,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.0,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes
1,108190,7376448,300,7,3400.0,3400.0,88.41,85.0,85.0,1562.05,1500.82,5135.46,61.23,5074.23,5593.73,2.0,2022-05-27,2022-06-03,2022-06-06,2022-07-06,2022-08-05,2022-09-04,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes
2,156885,7278403,300,21,7900.0,7900.0,600.41,135.33,135.33,4071.57,4071.57,12707.31,0.0,12707.31,13467.12,2.0,2022-07-15,2022-08-05,2022-08-10,2022-09-09,2022-10-09,2022-11-08,17.0,Repeat Client,5,Bloom Two Subsequent Default,Default occurred after first limit reductions,Default occurred after second limit reductions,yes
3,179316,7593440,300,7,1700.0,831.41,0.0,92.13,0.0,465.07,0.0,2301.41,1470.0,831.41,812.02,2.0,2022-07-28,2022-08-04,2022-08-07,2022-09-06,2022-10-06,2022-11-05,3.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred after second limit reductions,yes
4,200776,7270475,300,7,6500.0,49.71,0.0,201.08,0.0,49.71,0.0,6953.59,6903.88,49.71,6903.88,2.0,2022-08-27,2022-09-03,2022-09-06,2022-10-06,2022-11-05,2022-12-05,4.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes


In [43]:
def calc_loan_balance(df):
    
    saf_loan_balance = df['saf_loan_balance']
    total_outstanding = df['total_outstanding']
    safaricom_loan_balance = df['safaricom_loan_balance']
    
    if saf_loan_balance == 'yes':
        return safaricom_loan_balance
    else:
        return total_outstanding

In [44]:
current_defaulters_bloom_two['loan_balance'] = current_defaulters_bloom_two.apply(lambda x: calc_loan_balance(x), axis = 1)

current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance
0,110344,7276538,300,7,2500.0,2500.0,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.0,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,3502.41
1,108190,7376448,300,7,3400.0,3400.0,88.41,85.0,85.0,1562.05,1500.82,5135.46,61.23,5074.23,5593.73,2.0,2022-05-27,2022-06-03,2022-06-06,2022-07-06,2022-08-05,2022-09-04,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,5593.73
2,156885,7278403,300,21,7900.0,7900.0,600.41,135.33,135.33,4071.57,4071.57,12707.31,0.0,12707.31,13467.12,2.0,2022-07-15,2022-08-05,2022-08-10,2022-09-09,2022-10-09,2022-11-08,17.0,Repeat Client,5,Bloom Two Subsequent Default,Default occurred after first limit reductions,Default occurred after second limit reductions,yes,13467.12
3,179316,7593440,300,7,1700.0,831.41,0.0,92.13,0.0,465.07,0.0,2301.41,1470.0,831.41,812.02,2.0,2022-07-28,2022-08-04,2022-08-07,2022-09-06,2022-10-06,2022-11-05,3.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred after second limit reductions,yes,812.02
4,200776,7270475,300,7,6500.0,49.71,0.0,201.08,0.0,49.71,0.0,6953.59,6903.88,49.71,6903.88,2.0,2022-08-27,2022-09-03,2022-09-06,2022-10-06,2022-11-05,2022-12-05,4.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,6903.88


In [45]:
host = '157.245.248.249'
port = int(5432)
dbname = 'ubuntu'
user = 'jacklinengenia'
password = 'x3MX&8#!'


def get_query_results_postgres():
    with psycopg2.connect(host = host,
                          port = port,
                          database = dbname,
                          user = user,
                          password = password) as conn:
        sql = "select store_number, final_21_limit, final_7_limit, final_1_limit, model_version from bloomlive.temp_scoring_summaries where model_version in('2022-004[2022-05-14, 2022-06-30]', '2022-005[2022-07-27, 2022-07-27]', '2022-005[2022-07-27, 2022-08-03]', '2022-007[2022-10-21, 2022-11-03]')"
        df = pd.read_sql(sql, conn)
        
    return df

    conn.close()

In [46]:
limits = get_query_results_postgres()

limits.head(3)



Unnamed: 0,store_number,final_21_limit,final_7_limit,final_1_limit,model_version
0,351645,0,18400,0,"2022-007[2022-10-21, 2022-11-03]"
1,822369,0,0,0,"2022-004[2022-05-14, 2022-06-30]"
2,786152,0,0,0,"2022-004[2022-05-14, 2022-06-30]"


In [47]:
#limits["adjusted_21_limit"] = (np.ceil(limits["adjusted_21_limit"] / 100) * 100).astype(int)
#limits["adjusted_7_limit"] = (np.ceil(limits["adjusted_7_limit"] / 100) * 100).astype(int)
#limits["adjusted_1_limit"] = (np.ceil(limits["adjusted_1_limit"] / 100) * 100).astype(int)

#limits["adjusted_global_limit"] = limits[["adjusted_21_limit", "adjusted_7_limit", "adjusted_1_limit"]].max(axis=1)
limits["final_global_limit"] = limits[["final_21_limit", "final_7_limit", "final_1_limit"]].max(axis=1)

limits.head()

Unnamed: 0,store_number,final_21_limit,final_7_limit,final_1_limit,model_version,final_global_limit
0,351645,0,18400,0,"2022-007[2022-10-21, 2022-11-03]",18400
1,822369,0,0,0,"2022-004[2022-05-14, 2022-06-30]",0
2,786152,0,0,0,"2022-004[2022-05-14, 2022-06-30]",0
3,165978,0,23000,23000,"2022-004[2022-05-14, 2022-06-30]",23000
4,836820,0,0,0,"2022-004[2022-05-14, 2022-06-30]",0


In [48]:
limits = limits[['store_number', 'model_version', 'final_global_limit']]

limits.head()

Unnamed: 0,store_number,model_version,final_global_limit
0,351645,"2022-007[2022-10-21, 2022-11-03]",18400
1,822369,"2022-004[2022-05-14, 2022-06-30]",0
2,786152,"2022-004[2022-05-14, 2022-06-30]",0
3,165978,"2022-004[2022-05-14, 2022-06-30]",23000
4,836820,"2022-004[2022-05-14, 2022-06-30]",0


In [49]:
previous_refresh = limits[limits['model_version'] == '2022-004[2022-05-14, 2022-06-30]']
del previous_refresh['model_version']

first_refresh = limits[limits['model_version'] == '2022-005[2022-07-27, 2022-07-27]']
del first_refresh['model_version']

second_refresh = limits[limits['model_version'] == '2022-005[2022-07-27, 2022-08-03]']
del second_refresh['model_version']

current_refresh = limits[limits['model_version'] == '2022-007[2022-10-21, 2022-11-03]']
del current_refresh['model_version']


In [50]:
current_defaulters_bloom_two = pd.merge(current_defaulters_bloom_two, previous_refresh, on = 'store_number', how = 'left')
current_defaulters_bloom_two = current_defaulters_bloom_two.rename(columns={'final_global_limit':'previous_final_global_limit'})
current_defaulters_bloom_two['previous_final_global_limit'] = current_defaulters_bloom_two['previous_final_global_limit'].fillna(0)

current_defaulters_bloom_two = pd.merge(current_defaulters_bloom_two, first_refresh, on = 'store_number', how = 'left')
current_defaulters_bloom_two = current_defaulters_bloom_two.rename(columns={'final_global_limit':'first_final_global_limit'})
current_defaulters_bloom_two['first_final_global_limit'] = current_defaulters_bloom_two['first_final_global_limit'].fillna(0)

current_defaulters_bloom_two = pd.merge(current_defaulters_bloom_two, second_refresh, on = 'store_number', how = 'left')
current_defaulters_bloom_two = current_defaulters_bloom_two.rename(columns={'final_global_limit':'second_final_global_limit'})
current_defaulters_bloom_two['second_final_global_limit'] = current_defaulters_bloom_two['second_final_global_limit'].fillna(0)

current_defaulters_bloom_two = pd.merge(current_defaulters_bloom_two, current_refresh, on = 'store_number', how = 'left')
current_defaulters_bloom_two = current_defaulters_bloom_two.rename(columns={'final_global_limit':'current_final_global_limit'})
current_defaulters_bloom_two['current_final_global_limit'] = current_defaulters_bloom_two['current_final_global_limit'].fillna(0)

#current_defaulters_bloom_two["adjusted_global_limit"] = current_defaulters_bloom_two[["adjusted_21_limit", "adjusted_7_limit", "adjusted_1_limit"]].max(axis=1)
#current_defaulters_bloom_two["final_global_limit"] = current_defaulters_bloom_two[["final_21_limit", "final_7_limit", "final_1_limit"]].max(axis=1)

current_defaulters_bloom_two.head()

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance,previous_final_global_limit,first_final_global_limit,second_final_global_limit,current_final_global_limit
0,110344,7276538,300,7,2500.0,2500.0,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.0,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,3502.41,4600.0,0.0,0.0,0.0
1,108190,7376448,300,7,3400.0,3400.0,88.41,85.0,85.0,1562.05,1500.82,5135.46,61.23,5074.23,5593.73,2.0,2022-05-27,2022-06-03,2022-06-06,2022-07-06,2022-08-05,2022-09-04,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,5593.73,1900.0,1300.0,0.0,0.0
2,156885,7278403,300,21,7900.0,7900.0,600.41,135.33,135.33,4071.57,4071.57,12707.31,0.0,12707.31,13467.12,2.0,2022-07-15,2022-08-05,2022-08-10,2022-09-09,2022-10-09,2022-11-08,17.0,Repeat Client,5,Bloom Two Subsequent Default,Default occurred after first limit reductions,Default occurred after second limit reductions,yes,13467.12,7900.0,4900.0,0.0,0.0
3,179316,7593440,300,7,1700.0,831.41,0.0,92.13,0.0,465.07,0.0,2301.41,1470.0,831.41,812.02,2.0,2022-07-28,2022-08-04,2022-08-07,2022-09-06,2022-10-06,2022-11-05,3.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred after second limit reductions,yes,812.02,1700.0,1700.0,0.0,0.0
4,200776,7270475,300,7,6500.0,49.71,0.0,201.08,0.0,49.71,0.0,6953.59,6903.88,49.71,6903.88,2.0,2022-08-27,2022-09-03,2022-09-06,2022-10-06,2022-11-05,2022-12-05,4.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,6903.88,7000.0,7000.0,6900.0,0.0


In [51]:
current_defaulters_bloom_two.shape

(20516, 34)

In [52]:
current_defaulters_bloom_two[['previous_final_global_limit', 'first_final_global_limit', 'second_final_global_limit', 'current_final_global_limit']].sum()

previous_final_global_limit   284050800.00
first_final_global_limit      183306900.00
second_final_global_limit      62488200.00
current_final_global_limit     40705800.00
dtype: float64

In [53]:
def calc_first_zeroized(df):
    
    previous_final_global_limit = df['previous_final_global_limit']
    first_final_global_limit = df['first_final_global_limit']
    
    if first_final_global_limit == 0 and previous_final_global_limit > 0:
        return 'first zeroized'
    else:
        return 'first not zeroized'
    
    
def calc_second_zeroized(df):
    
    previous_final_global_limit = df['previous_final_global_limit']
    second_final_global_limit = df['second_final_global_limit']
    
    if second_final_global_limit == 0 and previous_final_global_limit > 0:
        return 'second zeroized'
    else:
        return 'second not zeroized' 

In [54]:
current_defaulters_bloom_two['first zeroized'] = current_defaulters_bloom_two.apply(lambda x: calc_first_zeroized(x), axis = 1)
current_defaulters_bloom_two['second zeroized'] = current_defaulters_bloom_two.apply(lambda x: calc_second_zeroized(x), axis = 1)


current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance,previous_final_global_limit,first_final_global_limit,second_final_global_limit,current_final_global_limit,first zeroized,second zeroized
0,110344,7276538,300,7,2500.0,2500.0,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.0,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,3502.41,4600.0,0.0,0.0,0.0,first zeroized,second zeroized
1,108190,7376448,300,7,3400.0,3400.0,88.41,85.0,85.0,1562.05,1500.82,5135.46,61.23,5074.23,5593.73,2.0,2022-05-27,2022-06-03,2022-06-06,2022-07-06,2022-08-05,2022-09-04,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,5593.73,1900.0,1300.0,0.0,0.0,first not zeroized,second zeroized
2,156885,7278403,300,21,7900.0,7900.0,600.41,135.33,135.33,4071.57,4071.57,12707.31,0.0,12707.31,13467.12,2.0,2022-07-15,2022-08-05,2022-08-10,2022-09-09,2022-10-09,2022-11-08,17.0,Repeat Client,5,Bloom Two Subsequent Default,Default occurred after first limit reductions,Default occurred after second limit reductions,yes,13467.12,7900.0,4900.0,0.0,0.0,first not zeroized,second zeroized
3,179316,7593440,300,7,1700.0,831.41,0.0,92.13,0.0,465.07,0.0,2301.41,1470.0,831.41,812.02,2.0,2022-07-28,2022-08-04,2022-08-07,2022-09-06,2022-10-06,2022-11-05,3.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred after second limit reductions,yes,812.02,1700.0,1700.0,0.0,0.0,first not zeroized,second zeroized
4,200776,7270475,300,7,6500.0,49.71,0.0,201.08,0.0,49.71,0.0,6953.59,6903.88,49.71,6903.88,2.0,2022-08-27,2022-09-03,2022-09-06,2022-10-06,2022-11-05,2022-12-05,4.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,6903.88,7000.0,7000.0,6900.0,0.0,first not zeroized,second not zeroized


In [55]:
current_defaulters_bloom_two[current_defaulters_bloom_two['previous_final_global_limit'] > current_defaulters_bloom_two['loan_balance']]

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance,previous_final_global_limit,first_final_global_limit,second_final_global_limit,current_final_global_limit,first zeroized,second zeroized
0,110344,7276538,300,7,2500.00,2500.00,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.00,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.00,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,3502.41,4600.00,0.00,0.00,0.00,first zeroized,second zeroized
3,179316,7593440,300,7,1700.00,831.41,0.00,92.13,0.00,465.07,0.00,2301.41,1470.00,831.41,812.02,2.00,2022-07-28,2022-08-04,2022-08-07,2022-09-06,2022-10-06,2022-11-05,3.00,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred after second limit reductions,yes,812.02,1700.00,1700.00,0.00,0.00,first not zeroized,second zeroized
4,200776,7270475,300,7,6500.00,49.71,0.00,201.08,0.00,49.71,0.00,6953.59,6903.88,49.71,6903.88,2.00,2022-08-27,2022-09-03,2022-09-06,2022-10-06,2022-11-05,2022-12-05,4.00,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,6903.88,7000.00,7000.00,6900.00,0.00,first not zeroized,second not zeroized
18,140571,7523642,300,7,3700.00,3700.00,96.21,34.80,34.80,1426.25,1426.25,5257.26,0.00,5257.26,5589.41,2.00,2022-06-29,2022-07-06,2022-07-09,2022-08-08,2022-09-07,2022-10-07,2.00,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,5589.41,6000.00,2800.00,0.00,0.00,first not zeroized,second zeroized
20,210522,7801326,300,7,2300.00,560.41,0.00,71.16,0.00,417.49,0.00,2860.41,2300.00,560.41,611.97,2.00,2022-09-14,2022-09-21,2022-09-24,2022-10-24,2022-11-23,2022-12-23,1.00,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,611.97,2300.00,2300.00,2300.00,0.00,first not zeroized,second not zeroized
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20493,227880,698030,300,21,11000.00,785.03,0.00,720.20,0.00,64.83,0.00,12788.23,12003.20,785.03,12003.20,2.00,2022-10-13,2022-11-03,2022-11-08,2022-12-08,2023-01-07,2023-02-06,38.00,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,12003.20,35000.00,0.00,0.00,8000.00,first zeroized,second zeroized
20496,22878,308492,300,21,200000.00,11728.74,0.00,10039.51,0.00,1689.22,0.00,229128.74,217400.00,11728.74,11728.74,2.00,2022-02-05,2022-02-26,2022-03-03,2022-04-02,2022-05-02,2022-06-01,99.00,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,11728.74,200000.00,200000.00,0.00,200000.00,first not zeroized,second zeroized
20502,185627,4012089,300,21,67400.00,36838.43,0.00,8294.49,0.00,18967.53,0.00,99784.43,62946.00,36838.43,32061.83,2.00,2022-08-04,2022-08-25,2022-08-30,2022-09-29,2022-10-29,2022-11-28,10.00,Repeat Client,2,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,32061.83,67400.00,36900.00,0.00,0.00,first not zeroized,second zeroized
20507,16527,278826,300,7,3000.00,3000.00,102.01,75.00,75.00,1356.00,1356.00,4533.01,0.00,4533.01,4533.01,2.00,2022-01-21,2022-01-28,2022-01-31,2022-03-02,2022-04-01,2022-05-01,2.00,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,4533.01,10900.00,10400.00,0.00,0.00,first not zeroized,second zeroized


In [56]:
def calc_first_balance_limit_flag(df):
    
    loan_balance = df['loan_balance']
    previous_final_global_limit = df['previous_final_global_limit']
    
    if previous_final_global_limit < loan_balance:
        return 'Lower than balance'
    elif previous_final_global_limit > loan_balance:
        return 'Higher than balance'
    elif loan_balance == previous_final_global_limit:
        return 'Equal Balances'
    

def calc_second_balance_limit_flag(df):
    
    loan_balance = df['loan_balance']
    previous_final_global_limit = df['previous_final_global_limit']
    
    if previous_final_global_limit < loan_balance:
        return 'Lower than balance'
    elif previous_final_global_limit > loan_balance:
        return 'Higher than balance'
    elif loan_balance == previous_final_global_limit:
        return 'Equal Balances'
    

In [57]:
current_defaulters_bloom_two['first_balance_limit_flag'] = current_defaulters_bloom_two.apply(lambda x: calc_first_balance_limit_flag(x), axis = 1)
current_defaulters_bloom_two['second_balance_limit_flag'] = current_defaulters_bloom_two.apply(lambda x: calc_second_balance_limit_flag(x), axis = 1)


current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance,previous_final_global_limit,first_final_global_limit,second_final_global_limit,current_final_global_limit,first zeroized,second zeroized,first_balance_limit_flag,second_balance_limit_flag
0,110344,7276538,300,7,2500.0,2500.0,65.01,59.13,59.13,950.87,456.44,3575.01,494.43,3080.58,3502.41,2.0,2022-05-31,2022-06-07,2022-06-10,2022-07-10,2022-08-09,2022-09-08,8.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,3502.41,4600.0,0.0,0.0,0.0,first zeroized,second zeroized,Higher than balance,Higher than balance
1,108190,7376448,300,7,3400.0,3400.0,88.41,85.0,85.0,1562.05,1500.82,5135.46,61.23,5074.23,5593.73,2.0,2022-05-27,2022-06-03,2022-06-06,2022-07-06,2022-08-05,2022-09-04,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,5593.73,1900.0,1300.0,0.0,0.0,first not zeroized,second zeroized,Lower than balance,Lower than balance
2,156885,7278403,300,21,7900.0,7900.0,600.41,135.33,135.33,4071.57,4071.57,12707.31,0.0,12707.31,13467.12,2.0,2022-07-15,2022-08-05,2022-08-10,2022-09-09,2022-10-09,2022-11-08,17.0,Repeat Client,5,Bloom Two Subsequent Default,Default occurred after first limit reductions,Default occurred after second limit reductions,yes,13467.12,7900.0,4900.0,0.0,0.0,first not zeroized,second zeroized,Lower than balance,Lower than balance
3,179316,7593440,300,7,1700.0,831.41,0.0,92.13,0.0,465.07,0.0,2301.41,1470.0,831.41,812.02,2.0,2022-07-28,2022-08-04,2022-08-07,2022-09-06,2022-10-06,2022-11-05,3.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred after second limit reductions,yes,812.02,1700.0,1700.0,0.0,0.0,first not zeroized,second zeroized,Higher than balance,Higher than balance
4,200776,7270475,300,7,6500.0,49.71,0.0,201.08,0.0,49.71,0.0,6953.59,6903.88,49.71,6903.88,2.0,2022-08-27,2022-09-03,2022-09-06,2022-10-06,2022-11-05,2022-12-05,4.0,Repeat Client,3,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,6903.88,7000.0,7000.0,6900.0,0.0,first not zeroized,second not zeroized,Higher than balance,Higher than balance


In [58]:
current_defaulters_bloom_two = current_defaulters_bloom_two.sort_values(by = 'disbursed_on_date')

current_defaulters_bloom_two = current_defaulters_bloom_two.drop_duplicates(subset=['store_number'], keep='last')

current_defaulters_bloom_two.head()

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance,previous_final_global_limit,first_final_global_limit,second_final_global_limit,current_final_global_limit,first zeroized,second zeroized,first_balance_limit_flag,second_balance_limit_flag
16008,3699,735346,300,7,150.0,150.0,5.09,0.0,0.0,20.4,20.4,175.49,0.0,175.49,175.49,2.0,2021-11-05,2021-11-12,2021-11-15,2021-12-15,2022-01-14,2022-02-13,102.0,Repeat Client,10,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,175.49,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
10635,3749,438125,300,21,8000.0,8000.0,695.47,434.8,434.8,2736.0,2736.0,11866.27,0.0,11866.27,11866.27,2.0,2021-11-11,2021-12-02,2021-12-07,2022-01-06,2022-02-05,2022-03-07,6.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,11866.27,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
11988,3766,146964,300,21,23400.0,23400.0,2034.26,1271.79,1271.79,8330.4,8330.4,35036.45,0.0,35036.45,35036.45,2.0,2021-11-12,2021-12-03,2021-12-08,2022-01-07,2022-02-06,2022-03-08,5.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,35036.45,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
17482,3757,913867,300,21,7500.0,7500.0,652.01,407.63,407.63,2670.0,2670.0,11229.64,0.0,11229.64,11229.64,2.0,2021-11-12,2021-12-03,2021-12-08,2022-01-07,2022-02-06,2022-03-08,10.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,11229.64,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
7347,3769,761748,300,7,15000.0,15000.0,509.47,387.75,387.75,2880.0,2880.0,18777.22,0.0,18777.22,18777.22,2.0,2021-11-12,2021-11-19,2021-11-22,2021-12-22,2022-01-21,2022-02-20,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,18777.22,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance


In [59]:
current_defaulters_bloom_two = current_defaulters_bloom_two[['store_number', 'Loan Count Flag', 'Default Flag', 'First Limit Reductions Flag', 'Second Limit Reductions Flag', 'total_outstanding', 'safaricom_loan_balance', 'loan_balance', 'previous_final_global_limit', 'first_final_global_limit', 'second_final_global_limit', 'current_final_global_limit', 'first zeroized', 'second zeroized', 'first_balance_limit_flag', 'second_balance_limit_flag']]

current_defaulters_bloom_two.head()

Unnamed: 0,store_number,Loan Count Flag,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,total_outstanding,safaricom_loan_balance,loan_balance,previous_final_global_limit,first_final_global_limit,second_final_global_limit,current_final_global_limit,first zeroized,second zeroized,first_balance_limit_flag,second_balance_limit_flag
16008,735346,Repeat Client,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,175.49,175.49,175.49,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
10635,438125,Repeat Client,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,11866.27,11866.27,11866.27,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
11988,146964,Repeat Client,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,35036.45,35036.45,35036.45,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
17482,913867,Repeat Client,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,11229.64,11229.64,11229.64,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
7347,761748,New Client,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,18777.22,18777.22,18777.22,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance


In [60]:
current_defaulters_bloom_two['store_number'].nunique()

20167

In [61]:
current_defaulters_bloom_two.shape

(20167, 16)

In [62]:
current_defaulters_bloom_two['first_balance_limit_flag'].value_counts()

Lower than balance     17635
Higher than balance     2532
Name: first_balance_limit_flag, dtype: int64

In [63]:
first_batch = current_defaulters_bloom_two[(current_defaulters_bloom_two['Default Flag'] == 'Bloom Two First Default') & (current_defaulters_bloom_two['First Limit Reductions Flag'] == 'Default occurred after first limit reductions') & (current_defaulters_bloom_two['first_balance_limit_flag'] == 'Lower than balance') & (current_defaulters_bloom_two['first zeroized'] == 'first zeroized')]

first_batch = first_batch[['store_number', 'Loan Count Flag', 'Default Flag', 'First Limit Reductions Flag', 'loan_balance', 'previous_final_global_limit', 'first_final_global_limit', 'current_final_global_limit', 'first zeroized', 'first_balance_limit_flag']]

first_batch.head()

Unnamed: 0,store_number,Loan Count Flag,Default Flag,First Limit Reductions Flag,loan_balance,previous_final_global_limit,first_final_global_limit,current_final_global_limit,first zeroized,first_balance_limit_flag
5491,7187782,Repeat Client,Bloom Two First Default,Default occurred after first limit reductions,10973.33,4400.0,0.0,0.0,first zeroized,Lower than balance
811,7214596,Repeat Client,Bloom Two First Default,Default occurred after first limit reductions,14859.08,5400.0,0.0,0.0,first zeroized,Lower than balance
7295,7343046,Repeat Client,Bloom Two First Default,Default occurred after first limit reductions,37699.62,22300.0,0.0,0.0,first zeroized,Lower than balance
20066,7982347,Repeat Client,Bloom Two First Default,Default occurred after first limit reductions,1861.47,1100.0,0.0,0.0,first zeroized,Lower than balance
16669,7184468,Repeat Client,Bloom Two First Default,Default occurred after first limit reductions,286231.88,169100.0,0.0,0.0,first zeroized,Lower than balance


In [64]:
first_batch.shape

(56, 10)

In [65]:
first_batch.to_excel('First Batch Defaulters.xlsx')

In [66]:
second_batch = current_defaulters_bloom_two[(current_defaulters_bloom_two['Default Flag'] == 'Bloom Two First Default') & (current_defaulters_bloom_two['Second Limit Reductions Flag'] == 'Default occurred after second limit reductions') & (current_defaulters_bloom_two['second_balance_limit_flag'] == 'Lower than balance') & (current_defaulters_bloom_two['second zeroized'] == 'second zeroized')]

second_batch = second_batch[['store_number', 'Loan Count Flag', 'Default Flag', 'Second Limit Reductions Flag', 'loan_balance', 'previous_final_global_limit', 'second_final_global_limit', 'current_final_global_limit', 'second zeroized', 'second_balance_limit_flag']]

second_batch.head()

Unnamed: 0,store_number,Loan Count Flag,Default Flag,Second Limit Reductions Flag,loan_balance,previous_final_global_limit,second_final_global_limit,current_final_global_limit,second zeroized,second_balance_limit_flag
19394,7237719,Repeat Client,Bloom Two First Default,Default occurred after second limit reductions,13621.39,12400.0,0.0,0.0,second zeroized,Lower than balance
18169,7759217,Repeat Client,Bloom Two First Default,Default occurred after second limit reductions,16516.03,6800.0,0.0,0.0,second zeroized,Lower than balance
8510,779448,Repeat Client,Bloom Two First Default,Default occurred after second limit reductions,3579.6,1700.0,0.0,0.0,second zeroized,Lower than balance
9440,7565981,Repeat Client,Bloom Two First Default,Default occurred after second limit reductions,14660.42,8600.0,0.0,0.0,second zeroized,Lower than balance
14734,7536789,Repeat Client,Bloom Two First Default,Default occurred after second limit reductions,4957.25,3600.0,0.0,0.0,second zeroized,Lower than balance


In [67]:
second_batch.to_excel('Second Batch Defaulters.xlsx')

In [68]:
second_batch.shape

(158, 10)

In [69]:
current_defaulters_bloom_two.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20167 entries, 16008 to 9026
Data columns (total 16 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   store_number                  20167 non-null  object 
 1   Loan Count Flag               20167 non-null  object 
 2   Default Flag                  20167 non-null  object 
 3   First Limit Reductions Flag   20167 non-null  object 
 4   Second Limit Reductions Flag  20167 non-null  object 
 5   total_outstanding             20167 non-null  float64
 6   safaricom_loan_balance        20167 non-null  float64
 7   loan_balance                  20167 non-null  float64
 8   previous_final_global_limit   20167 non-null  float64
 9   first_final_global_limit      20167 non-null  float64
 10  second_final_global_limit     20167 non-null  float64
 11  current_final_global_limit    20167 non-null  float64
 12  first zeroized                20167 non-null  object 
 13

In [70]:
# current_defaulters_700 = current_defaulters[current_defaulters['loan_status'] == 700]

# current_defaulters_700 = current_defaulters_700[['loan_mifos_id', 'store_number', 'loan_status', 'principal_outstanding', 'interest_outstanding', 'fee_charges_outstanding', 'penalty_charges_outstanding', 'total_outstanding']]

# current_defaulters_700['total_outstanding_calculated'] = current_defaulters_700['principal_outstanding'] + current_defaulters_700['interest_outstanding'] + current_defaulters_700['fee_charges_outstanding'] + current_defaulters_700['penalty_charges_outstanding']

# current_defaulters_700.head()