In [1]:
#import data processing libraries
import os
import pandas as pd
import numpy as np
import math as math
import datetime as dt
from scipy import stats

#import visualization libraries
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
sns.set_style('darkgrid')
%matplotlib inline

#db connection libraries
import psycopg2
from sqlalchemy import create_engine
import psycopg2.extras as extras

In [2]:
pd.set_option('display.max_colwidth', 500)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 300)

#suppress scientific notation to 2 decimal places
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [3]:
host = '157.245.248.249'
port = int(5432)
dbname = 'ubuntu'
user = 'jacklinengenia'
password = 'x3MX&8#!'


def get_query_results_postgres():
    with psycopg2.connect(host = host,
                          port = port,
                          database = dbname,
                          user = user,
                          password = password) as conn:
        sql = 'select loan_mifos_id, store_number, loan_status, term_frequency, principal_disbursed, principal_outstanding, interest_outstanding, fee_charges_charged, fee_charges_outstanding, penalty_charges_charged, penalty_charges_outstanding, total_expected_repayment, total_repayment, total_outstanding, safaricom_loan_balance, bloom_version, disbursed_on_date, expected_matured_on_date, end_rollvr_dt, dpd_30, dpd_d60, dpd_d90 from bloomlive.loans_fact_table_materialized_summary_view'
        df = pd.read_sql(sql, conn)
        
    return df

    conn.close()

In [4]:
df_loans = get_query_results_postgres()

df_loans.head(3)



Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90
0,60553,930988,600,1,20000.0,0.0,0.0,0.0,0.0,0.0,0.0,20080.0,20080.0,0.0,,2.0,2022-04-05,2022-04-06,2022-04-07,2022-05-07,2022-06-06,2022-07-06
1,60590,7049959,600,1,2500.0,0.0,0.0,18.68,0.0,20.0,0.0,2548.68,2548.68,0.0,,2.0,2022-04-05,2022-04-06,2022-04-07,2022-05-07,2022-06-06,2022-07-06
2,60828,210507,600,1,5000.0,0.0,0.0,0.0,0.0,0.0,0.0,5020.0,5020.0,0.0,,2.0,2022-04-06,2022-04-07,2022-04-08,2022-05-08,2022-06-07,2022-07-07


In [5]:
df_loans.shape

(469068, 22)

In [6]:
#create loan count column i.e adds a new column that captures the num of loans a customer has taken
df_loans["loan_count"] = df_loans.groupby("store_number")["store_number"].transform('size')

df_loans.head(3)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count
0,60553,930988,600,1,20000.0,0.0,0.0,0.0,0.0,0.0,0.0,20080.0,20080.0,0.0,,2.0,2022-04-05,2022-04-06,2022-04-07,2022-05-07,2022-06-06,2022-07-06,26.0
1,60590,7049959,600,1,2500.0,0.0,0.0,18.68,0.0,20.0,0.0,2548.68,2548.68,0.0,,2.0,2022-04-05,2022-04-06,2022-04-07,2022-05-07,2022-06-06,2022-07-06,2.0
2,60828,210507,600,1,5000.0,0.0,0.0,0.0,0.0,0.0,0.0,5020.0,5020.0,0.0,,2.0,2022-04-06,2022-04-07,2022-04-08,2022-05-08,2022-06-07,2022-07-07,39.0


In [7]:
#converting the current time to datetime format
curr_time = pd.to_datetime('today')

df_loans['disbursed_on_date'] = pd.to_datetime(df_loans['disbursed_on_date'], errors='coerce')
df_loans['end_rollvr_dt'] = pd.to_datetime(df_loans['end_rollvr_dt'], errors='coerce')
df_loans['expected_matured_on_date'] = pd.to_datetime(df_loans['expected_matured_on_date'], errors='coerce')

In [8]:
def saf_loan_balance(df):
    
    total_outstanding = df['total_outstanding']
    safaricom_loan_balance = df['safaricom_loan_balance']
    
    if safaricom_loan_balance >= 0:
        return safaricom_loan_balance
    else:
        return total_outstanding

In [9]:
df_loans['safaricom_loan_balance'] = df_loans.apply(lambda x: saf_loan_balance(x), axis = 1)

df_loans.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count
0,60553,930988,600,1,20000.0,0.0,0.0,0.0,0.0,0.0,0.0,20080.0,20080.0,0.0,0.0,2.0,2022-04-05,2022-04-06,2022-04-07,2022-05-07,2022-06-06,2022-07-06,26.0
1,60590,7049959,600,1,2500.0,0.0,0.0,18.68,0.0,20.0,0.0,2548.68,2548.68,0.0,0.0,2.0,2022-04-05,2022-04-06,2022-04-07,2022-05-07,2022-06-06,2022-07-06,2.0
2,60828,210507,600,1,5000.0,0.0,0.0,0.0,0.0,0.0,0.0,5020.0,5020.0,0.0,0.0,2.0,2022-04-06,2022-04-07,2022-04-08,2022-05-08,2022-06-07,2022-07-07,39.0
3,60977,7278566,600,7,1800.0,0.0,0.0,0.0,0.0,0.0,0.0,1846.81,1846.81,0.0,0.0,2.0,2022-04-06,2022-04-13,2022-04-16,2022-05-16,2022-06-15,2022-07-15,5.0
4,61245,7569133,600,7,43000.0,0.0,0.0,0.0,0.0,0.0,0.0,44118.01,44118.01,0.0,0.0,2.0,2022-04-06,2022-04-13,2022-04-16,2022-05-16,2022-06-15,2022-07-15,17.0


In [10]:
def cal_loan_count_flag(df):
    
    loan_count = df['loan_count']
    
    if loan_count == 1:
        return 'New Client'
    else:
        return 'Repeat Client'

In [11]:
df_loans['Loan Count Flag'] = df_loans.apply(lambda x: cal_loan_count_flag(x), axis = 1)

df_loans.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag
0,60553,930988,600,1,20000.0,0.0,0.0,0.0,0.0,0.0,0.0,20080.0,20080.0,0.0,0.0,2.0,2022-04-05,2022-04-06,2022-04-07,2022-05-07,2022-06-06,2022-07-06,26.0,Repeat Client
1,60590,7049959,600,1,2500.0,0.0,0.0,18.68,0.0,20.0,0.0,2548.68,2548.68,0.0,0.0,2.0,2022-04-05,2022-04-06,2022-04-07,2022-05-07,2022-06-06,2022-07-06,2.0,Repeat Client
2,60828,210507,600,1,5000.0,0.0,0.0,0.0,0.0,0.0,0.0,5020.0,5020.0,0.0,0.0,2.0,2022-04-06,2022-04-07,2022-04-08,2022-05-08,2022-06-07,2022-07-07,39.0,Repeat Client
3,60977,7278566,600,7,1800.0,0.0,0.0,0.0,0.0,0.0,0.0,1846.81,1846.81,0.0,0.0,2.0,2022-04-06,2022-04-13,2022-04-16,2022-05-16,2022-06-15,2022-07-15,5.0,Repeat Client
4,61245,7569133,600,7,43000.0,0.0,0.0,0.0,0.0,0.0,0.0,44118.01,44118.01,0.0,0.0,2.0,2022-04-06,2022-04-13,2022-04-16,2022-05-16,2022-06-15,2022-07-15,17.0,Repeat Client


In [12]:
current_defaulters = df_loans[(df_loans['penalty_charges_charged'] > 0) & (df_loans['total_outstanding'] > 0) & (df_loans['safaricom_loan_balance'] > 0) & (df_loans['loan_status'] == 300)]

current_defaulters.shape

(25787, 24)

In [13]:
defaulters = df_loans[(df_loans['penalty_charges_charged'] > 0)]

defaulters.shape

(65988, 24)

In [14]:
defaulters_bloom_one = defaulters[(defaulters['bloom_version'] == 1)]

defaulters_bloom_one.shape

(21120, 24)

In [15]:
defaulters_bloom_two = defaulters[(defaulters['bloom_version'] == 2)]

defaulters_bloom_two.shape

(44868, 24)

In [16]:
defaulters_bloom_two[(defaulters_bloom_two['store_number'] == '7468569')]

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag
131290,228595,7468569,300,21,7400.0,154.38,0.0,150.0,0.0,4.38,0.0,8229.26,8074.88,154.38,0.0,2.0,2022-10-12,2022-11-02,2022-11-07,2022-12-07,2023-01-06,2023-02-05,21.0,Repeat Client
135822,106687,7468569,600,21,8500.0,0.0,0.0,149.14,0.0,3.31,0.0,9298.46,9298.46,0.0,0.0,2.0,2022-05-26,2022-06-16,2022-06-21,2022-07-21,2022-08-20,2022-09-19,21.0,Repeat Client
135892,127075,7468569,300,21,7000.0,34.29,0.0,24.0,0.0,10.28,0.0,7566.29,7532.0,34.29,0.0,2.0,2022-06-17,2022-07-08,2022-07-13,2022-08-12,2022-09-11,2022-10-11,21.0,Repeat Client


In [17]:
# grouping store_number to find the count of bloom one defaulters
defaulters_bloom_one_count = defaulters_bloom_one.groupby(['store_number'], as_index=False)['loan_mifos_id'].count()
defaulters_bloom_one_count = defaulters_bloom_one_count.rename(columns={'loan_mifos_id':'defaulters_bloom_one_count'})

defaulters_bloom_one_count.head()

Unnamed: 0,store_number,defaulters_bloom_one_count
0,58169,1
1,101705,1
2,103914,1
3,105639,1
4,105649,1


In [18]:
# grouping store_number to find the count of bloom two defaulters
defaulters_bloom_two_count = defaulters_bloom_two.groupby(['store_number'], as_index=False)['loan_mifos_id'].count()
defaulters_bloom_two_count = defaulters_bloom_two_count.rename(columns={'loan_mifos_id':'defaulters_bloom_two_count'})

defaulters_bloom_two_count.head()

Unnamed: 0,store_number,defaulters_bloom_two_count
0,30,1
1,3227,5
2,58133,1
3,105570,1
4,105652,3


In [19]:
defaulters_count = pd.merge(defaulters_bloom_one_count, defaulters_bloom_two_count, on = 'store_number', how = 'outer')

defaulters_count = defaulters_count.fillna(0)

defaulters_count.head(15)

Unnamed: 0,store_number,defaulters_bloom_one_count,defaulters_bloom_two_count
0,58169,1.0,0.0
1,101705,1.0,0.0
2,103914,1.0,0.0
3,105639,1.0,0.0
4,105649,1.0,0.0
5,105754,1.0,0.0
6,110138,2.0,0.0
7,110169,1.0,1.0
8,110398,1.0,0.0
9,110501,1.0,2.0


In [20]:
current_defaulters['loan_status'].value_counts()

300    25787
Name: loan_status, dtype: int64

In [21]:
current_defaulters_bloom_one = current_defaulters[current_defaulters['bloom_version'] == 1]

current_defaulters_bloom_one.shape

(5736, 24)

In [22]:
current_defaulters_bloom_one.head()

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag
433,170892,604194,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2322.0,7966.0,0.0,7966.0,7966.0,1.0,2019-12-09,2020-01-08,NaT,,,,3.0,Repeat Client
445,171103,530658,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2322.0,7966.0,0.0,7966.0,7966.0,1.0,2019-12-10,2020-01-09,NaT,,,,6.0,Repeat Client
453,171381,567823,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2222.0,7966.0,100.0,7866.0,7866.0,1.0,2019-12-11,2020-01-10,NaT,,,,3.0,Repeat Client
454,171391,740928,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2322.0,7966.0,0.0,7966.0,7966.0,1.0,2019-12-11,2020-01-10,NaT,,,,3.0,Repeat Client
460,171510,218210,300,30,40000.0,40000.0,2800.0,2354.0,2354.0,17866.0,15866.0,63020.0,2000.0,61020.0,61020.0,1.0,2019-12-11,2020-01-10,NaT,,,,17.0,Repeat Client


In [23]:
current_defaulters_bloom_one = pd.merge(current_defaulters_bloom_one, defaulters_bloom_one_count, on = 'store_number', how = 'left')

current_defaulters_bloom_one.head()

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_one_count
0,170892,604194,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2322.0,7966.0,0.0,7966.0,7966.0,1.0,2019-12-09,2020-01-08,NaT,,,,3.0,Repeat Client,1
1,171103,530658,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2322.0,7966.0,0.0,7966.0,7966.0,1.0,2019-12-10,2020-01-09,NaT,,,,6.0,Repeat Client,1
2,171381,567823,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2222.0,7966.0,100.0,7866.0,7866.0,1.0,2019-12-11,2020-01-10,NaT,,,,3.0,Repeat Client,1
3,171391,740928,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2322.0,7966.0,0.0,7966.0,7966.0,1.0,2019-12-11,2020-01-10,NaT,,,,3.0,Repeat Client,1
4,171510,218210,300,30,40000.0,40000.0,2800.0,2354.0,2354.0,17866.0,15866.0,63020.0,2000.0,61020.0,61020.0,1.0,2019-12-11,2020-01-10,NaT,,,,17.0,Repeat Client,1


In [24]:
def cal_first_bloom_one_default(df):
    
    defaulters_bloom_one_count = df['defaulters_bloom_one_count']
    
    if defaulters_bloom_one_count == 1:
        return 'Bloom One First Default'
    else:
        return 'Bloom One Subsequent Default'

In [25]:
current_defaulters_bloom_one['Default Flag'] = current_defaulters_bloom_one.apply(lambda x: cal_first_bloom_one_default(x), axis = 1)

current_defaulters_bloom_one.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_one_count,Default Flag
0,170892,604194,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2322.0,7966.0,0.0,7966.0,7966.0,1.0,2019-12-09,2020-01-08,NaT,,,,3.0,Repeat Client,1,Bloom One First Default
1,171103,530658,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2322.0,7966.0,0.0,7966.0,7966.0,1.0,2019-12-10,2020-01-09,NaT,,,,6.0,Repeat Client,1,Bloom One First Default
2,171381,567823,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2222.0,7966.0,100.0,7866.0,7866.0,1.0,2019-12-11,2020-01-10,NaT,,,,3.0,Repeat Client,1,Bloom One First Default
3,171391,740928,300,30,5000.0,5000.0,350.0,294.0,294.0,2322.0,2322.0,7966.0,0.0,7966.0,7966.0,1.0,2019-12-11,2020-01-10,NaT,,,,3.0,Repeat Client,1,Bloom One First Default
4,171510,218210,300,30,40000.0,40000.0,2800.0,2354.0,2354.0,17866.0,15866.0,63020.0,2000.0,61020.0,61020.0,1.0,2019-12-11,2020-01-10,NaT,,,,17.0,Repeat Client,1,Bloom One First Default


In [26]:
current_defaulters_bloom_one['Default Flag'].value_counts()

Bloom One First Default         4433
Bloom One Subsequent Default    1303
Name: Default Flag, dtype: int64

In [27]:
current_defaulters_bloom_two = current_defaulters[current_defaulters['bloom_version'] == 2]

current_defaulters_bloom_two.shape

(20051, 24)

In [28]:
current_defaulters_bloom_two = pd.merge(current_defaulters_bloom_two, defaulters_bloom_two_count, on = 'store_number', how = 'left')

current_defaulters_bloom_two.head()

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count
0,86961,7803525,300,1,2800.0,2800.0,11.2,21.0,21.0,915.06,847.86,3747.26,67.2,3680.06,3680.06,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,1.0,New Client,1
1,87075,916139,300,21,3300.0,3300.0,250.81,49.5,49.5,1504.78,1425.58,5105.09,79.2,5025.89,5025.89,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,12.0,Repeat Client,2
2,87171,772622,300,1,5700.0,88.35,0.0,42.75,0.0,45.6,0.0,5811.15,5722.8,88.35,88.35,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,87.0,Repeat Client,6
3,87286,7801651,300,21,28400.0,595.77,0.0,426.0,0.0,180.4,0.0,31164.81,30569.04,595.77,595.77,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,15.0,Repeat Client,7
4,87291,7965219,300,7,1200.0,959.92,0.0,30.0,0.0,427.51,0.0,1688.72,728.8,959.92,959.92,2.0,2022-05-06,2022-05-13,2022-05-16,2022-06-15,2022-07-15,2022-08-14,1.0,New Client,1


In [29]:
current_defaulters_bloom_two[(current_defaulters_bloom_two['store_number'] == '7468569')]

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count


In [30]:
def cal_first_bloom_two_default(df):
    
    defaulters_bloom_two_count = df['defaulters_bloom_two_count']
    
    if defaulters_bloom_two_count == 1:
        return 'Bloom Two First Default'
    else:
        return 'Bloom Two Subsequent Default'

In [31]:
current_defaulters_bloom_two['Default Flag'] = current_defaulters_bloom_two.apply(lambda x: cal_first_bloom_two_default(x), axis = 1)

current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag
0,86961,7803525,300,1,2800.0,2800.0,11.2,21.0,21.0,915.06,847.86,3747.26,67.2,3680.06,3680.06,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,1.0,New Client,1,Bloom Two First Default
1,87075,916139,300,21,3300.0,3300.0,250.81,49.5,49.5,1504.78,1425.58,5105.09,79.2,5025.89,5025.89,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,12.0,Repeat Client,2,Bloom Two Subsequent Default
2,87171,772622,300,1,5700.0,88.35,0.0,42.75,0.0,45.6,0.0,5811.15,5722.8,88.35,88.35,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,87.0,Repeat Client,6,Bloom Two Subsequent Default
3,87286,7801651,300,21,28400.0,595.77,0.0,426.0,0.0,180.4,0.0,31164.81,30569.04,595.77,595.77,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,15.0,Repeat Client,7,Bloom Two Subsequent Default
4,87291,7965219,300,7,1200.0,959.92,0.0,30.0,0.0,427.51,0.0,1688.72,728.8,959.92,959.92,2.0,2022-05-06,2022-05-13,2022-05-16,2022-06-15,2022-07-15,2022-08-14,1.0,New Client,1,Bloom Two First Default


In [32]:
current_defaulters_bloom_two['Default Flag'].value_counts()

Bloom Two First Default         13391
Bloom Two Subsequent Default     6660
Name: Default Flag, dtype: int64

In [33]:
current_defaulters_bloom_two.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 20051 entries, 0 to 20050
Data columns (total 26 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   loan_mifos_id                20051 non-null  int64         
 1   store_number                 20051 non-null  object        
 2   loan_status                  20051 non-null  int64         
 3   term_frequency               20051 non-null  int64         
 4   principal_disbursed          20051 non-null  float64       
 5   principal_outstanding        20051 non-null  float64       
 6   interest_outstanding         20051 non-null  float64       
 7   fee_charges_charged          20051 non-null  float64       
 8   fee_charges_outstanding      20051 non-null  float64       
 9   penalty_charges_charged      20051 non-null  float64       
 10  penalty_charges_outstanding  20051 non-null  float64       
 11  total_expected_repayment     20051 non-nu

In [34]:
def cal_occurred_after_first_limit_reductions(df):
    
    expected_matured_on_date = df['expected_matured_on_date']
    disbursed_on_date = df['disbursed_on_date']
    
    if ((disbursed_on_date <= pd.to_datetime('2022-07-27')) and (expected_matured_on_date > pd.to_datetime('2022-07-27'))):
        return 'Default occurred after first limit reductions'
    else:
        return 'Default occurred before first limit reductions'

In [35]:
current_defaulters_bloom_two['First Limit Reductions Flag'] = current_defaulters_bloom_two.apply(lambda x: cal_occurred_after_first_limit_reductions(x), axis = 1)

current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag
0,86961,7803525,300,1,2800.0,2800.0,11.2,21.0,21.0,915.06,847.86,3747.26,67.2,3680.06,3680.06,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions
1,87075,916139,300,21,3300.0,3300.0,250.81,49.5,49.5,1504.78,1425.58,5105.09,79.2,5025.89,5025.89,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,12.0,Repeat Client,2,Bloom Two Subsequent Default,Default occurred before first limit reductions
2,87171,772622,300,1,5700.0,88.35,0.0,42.75,0.0,45.6,0.0,5811.15,5722.8,88.35,88.35,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,87.0,Repeat Client,6,Bloom Two Subsequent Default,Default occurred before first limit reductions
3,87286,7801651,300,21,28400.0,595.77,0.0,426.0,0.0,180.4,0.0,31164.81,30569.04,595.77,595.77,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,15.0,Repeat Client,7,Bloom Two Subsequent Default,Default occurred before first limit reductions
4,87291,7965219,300,7,1200.0,959.92,0.0,30.0,0.0,427.51,0.0,1688.72,728.8,959.92,959.92,2.0,2022-05-06,2022-05-13,2022-05-16,2022-06-15,2022-07-15,2022-08-14,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions


In [36]:
def cal_occurred_after_second_limit_reductions(df):
    
    expected_matured_on_date = df['expected_matured_on_date']
    disbursed_on_date = df['disbursed_on_date']
    
    if ((disbursed_on_date <= pd.to_datetime('2022-08-03')) and (expected_matured_on_date > pd.to_datetime('2022-08-03'))):
        return 'Default occurred after second limit reductions'
    else:
        return 'Default occurred before second limit reductions'

In [37]:
current_defaulters_bloom_two['Second Limit Reductions Flag'] = current_defaulters_bloom_two.apply(lambda x: cal_occurred_after_second_limit_reductions(x), axis = 1)

current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag
0,86961,7803525,300,1,2800.0,2800.0,11.2,21.0,21.0,915.06,847.86,3747.26,67.2,3680.06,3680.06,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions
1,87075,916139,300,21,3300.0,3300.0,250.81,49.5,49.5,1504.78,1425.58,5105.09,79.2,5025.89,5025.89,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,12.0,Repeat Client,2,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions
2,87171,772622,300,1,5700.0,88.35,0.0,42.75,0.0,45.6,0.0,5811.15,5722.8,88.35,88.35,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,87.0,Repeat Client,6,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions
3,87286,7801651,300,21,28400.0,595.77,0.0,426.0,0.0,180.4,0.0,31164.81,30569.04,595.77,595.77,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,15.0,Repeat Client,7,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions
4,87291,7965219,300,7,1200.0,959.92,0.0,30.0,0.0,427.51,0.0,1688.72,728.8,959.92,959.92,2.0,2022-05-06,2022-05-13,2022-05-16,2022-06-15,2022-07-15,2022-08-14,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions


In [38]:
current_defaulters_bloom_two['First Limit Reductions Flag'].value_counts()

Default occurred before first limit reductions    18988
Default occurred after first limit reductions      1063
Name: First Limit Reductions Flag, dtype: int64

In [39]:
current_defaulters_bloom_two['Second Limit Reductions Flag'].value_counts()

Default occurred before second limit reductions    18931
Default occurred after second limit reductions      1120
Name: Second Limit Reductions Flag, dtype: int64

In [40]:
current_defaulters_bloom_two['Loan Count Flag'].value_counts()

Repeat Client    15422
New Client        4629
Name: Loan Count Flag, dtype: int64

In [41]:
def saf_loan_balance(df):
    
    safaricom_loan_balance = df['safaricom_loan_balance']
    
    if safaricom_loan_balance >= 0:
        return 'yes'
    else:
        return 'no'

In [42]:
current_defaulters_bloom_two['saf_loan_balance'] = current_defaulters_bloom_two.apply(lambda x: saf_loan_balance(x), axis = 1)

current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance
0,86961,7803525,300,1,2800.0,2800.0,11.2,21.0,21.0,915.06,847.86,3747.26,67.2,3680.06,3680.06,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes
1,87075,916139,300,21,3300.0,3300.0,250.81,49.5,49.5,1504.78,1425.58,5105.09,79.2,5025.89,5025.89,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,12.0,Repeat Client,2,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes
2,87171,772622,300,1,5700.0,88.35,0.0,42.75,0.0,45.6,0.0,5811.15,5722.8,88.35,88.35,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,87.0,Repeat Client,6,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes
3,87286,7801651,300,21,28400.0,595.77,0.0,426.0,0.0,180.4,0.0,31164.81,30569.04,595.77,595.77,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,15.0,Repeat Client,7,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes
4,87291,7965219,300,7,1200.0,959.92,0.0,30.0,0.0,427.51,0.0,1688.72,728.8,959.92,959.92,2.0,2022-05-06,2022-05-13,2022-05-16,2022-06-15,2022-07-15,2022-08-14,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes


In [43]:
def calc_loan_balance(df):
    
    saf_loan_balance = df['saf_loan_balance']
    total_outstanding = df['total_outstanding']
    safaricom_loan_balance = df['safaricom_loan_balance']
    
    if saf_loan_balance == 'yes':
        return safaricom_loan_balance
    else:
        return total_outstanding

In [44]:
current_defaulters_bloom_two['loan_balance'] = current_defaulters_bloom_two.apply(lambda x: calc_loan_balance(x), axis = 1)

current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance
0,86961,7803525,300,1,2800.0,2800.0,11.2,21.0,21.0,915.06,847.86,3747.26,67.2,3680.06,3680.06,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,3680.06
1,87075,916139,300,21,3300.0,3300.0,250.81,49.5,49.5,1504.78,1425.58,5105.09,79.2,5025.89,5025.89,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,12.0,Repeat Client,2,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,5025.89
2,87171,772622,300,1,5700.0,88.35,0.0,42.75,0.0,45.6,0.0,5811.15,5722.8,88.35,88.35,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,87.0,Repeat Client,6,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,88.35
3,87286,7801651,300,21,28400.0,595.77,0.0,426.0,0.0,180.4,0.0,31164.81,30569.04,595.77,595.77,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,15.0,Repeat Client,7,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,595.77
4,87291,7965219,300,7,1200.0,959.92,0.0,30.0,0.0,427.51,0.0,1688.72,728.8,959.92,959.92,2.0,2022-05-06,2022-05-13,2022-05-16,2022-06-15,2022-07-15,2022-08-14,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,959.92


In [45]:
host = '157.245.248.249'
port = int(5432)
dbname = 'ubuntu'
user = 'jacklinengenia'
password = 'x3MX&8#!'


def get_query_results_postgres():
    with psycopg2.connect(host = host,
                          port = port,
                          database = dbname,
                          user = user,
                          password = password) as conn:
        sql = "select store_number, adjusted_21_limit, adjusted_7_limit, adjusted_1_limit, final_21_limit, final_7_limit, final_1_limit, model_version from bloomlive.temp_scoring_summaries where model_version in('2022-005[2022-05-14, 2022-07-27]', '2022-005[2022-05-14, 2022-08-03]', '2022-007[2022-05-14, 2022-11-03]')"
        df = pd.read_sql(sql, conn)
        
    return df

    conn.close()

In [46]:
limits = get_query_results_postgres()

limits.head(3)



Unnamed: 0,store_number,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,model_version
0,7290826,0.0,262.94,262.94,0,0,0,"2022-005[2022-05-14, 2022-07-27]"
1,7373821,0.0,602.38,602.38,0,0,0,"2022-005[2022-05-14, 2022-07-27]"
2,7465042,0.0,471.0,471.0,0,0,0,"2022-005[2022-05-14, 2022-07-27]"


In [47]:
limits["adjusted_21_limit"] = (np.ceil(limits["adjusted_21_limit"] / 100) * 100).astype(int)
limits["adjusted_7_limit"] = (np.ceil(limits["adjusted_7_limit"] / 100) * 100).astype(int)
limits["adjusted_1_limit"] = (np.ceil(limits["adjusted_1_limit"] / 100) * 100).astype(int)

limits["adjusted_global_limit"] = limits[["adjusted_21_limit", "adjusted_7_limit", "adjusted_1_limit"]].max(axis=1)
limits["final_global_limit"] = limits[["final_21_limit", "final_7_limit", "final_1_limit"]].max(axis=1)

limits.head()

Unnamed: 0,store_number,adjusted_21_limit,adjusted_7_limit,adjusted_1_limit,final_21_limit,final_7_limit,final_1_limit,model_version,adjusted_global_limit,final_global_limit
0,7290826,0,300,300,0,0,0,"2022-005[2022-05-14, 2022-07-27]",300,0
1,7373821,0,700,700,0,0,0,"2022-005[2022-05-14, 2022-07-27]",700,0
2,7465042,0,500,500,0,0,0,"2022-005[2022-05-14, 2022-07-27]",500,0
3,7480355,0,300,300,0,0,0,"2022-005[2022-05-14, 2022-07-27]",300,0
4,7506256,0,600,600,0,0,0,"2022-005[2022-05-14, 2022-07-27]",600,0


In [48]:
limits = limits[['store_number', 'model_version', 'adjusted_global_limit', 'final_global_limit']]

limits.head()

Unnamed: 0,store_number,model_version,adjusted_global_limit,final_global_limit
0,7290826,"2022-005[2022-05-14, 2022-07-27]",300,0
1,7373821,"2022-005[2022-05-14, 2022-07-27]",700,0
2,7465042,"2022-005[2022-05-14, 2022-07-27]",500,0
3,7480355,"2022-005[2022-05-14, 2022-07-27]",300,0
4,7506256,"2022-005[2022-05-14, 2022-07-27]",600,0


In [49]:
first_refresh = limits[limits['model_version'] == '2022-005[2022-05-14, 2022-07-27]']
del first_refresh['model_version']

second_refresh = limits[limits['model_version'] == '2022-005[2022-05-14, 2022-08-03]']
del second_refresh['model_version']

current_refresh = limits[limits['model_version'] == '2022-007[2022-05-14, 2022-11-03]']
del current_refresh['model_version']


In [50]:
current_defaulters_bloom_two = pd.merge(current_defaulters_bloom_two, first_refresh, on = 'store_number', how = 'left')
current_defaulters_bloom_two = current_defaulters_bloom_two.rename(columns={'adjusted_global_limit':'first_adjusted_global_limit', 'final_global_limit':'first_final_global_limit'})
current_defaulters_bloom_two['first_adjusted_global_limit'] = current_defaulters_bloom_two['first_adjusted_global_limit'].fillna(0)
current_defaulters_bloom_two['first_final_global_limit'] = current_defaulters_bloom_two['first_final_global_limit'].fillna(0)

current_defaulters_bloom_two = pd.merge(current_defaulters_bloom_two, second_refresh, on = 'store_number', how = 'left')
current_defaulters_bloom_two = current_defaulters_bloom_two.rename(columns={'adjusted_global_limit':'second_adjusted_global_limit', 'final_global_limit':'second_final_global_limit'})
current_defaulters_bloom_two['second_adjusted_global_limit'] = current_defaulters_bloom_two['second_adjusted_global_limit'].fillna(0)
current_defaulters_bloom_two['second_final_global_limit'] = current_defaulters_bloom_two['second_final_global_limit'].fillna(0)

current_defaulters_bloom_two = pd.merge(current_defaulters_bloom_two, current_refresh, on = 'store_number', how = 'left')
current_defaulters_bloom_two = current_defaulters_bloom_two.rename(columns={'adjusted_global_limit':'current_adjusted_global_limit', 'final_global_limit':'current_final_global_limit'})
current_defaulters_bloom_two['current_adjusted_global_limit'] = current_defaulters_bloom_two['current_adjusted_global_limit'].fillna(0)
current_defaulters_bloom_two['current_final_global_limit'] = current_defaulters_bloom_two['current_final_global_limit'].fillna(0)

#current_defaulters_bloom_two["adjusted_global_limit"] = current_defaulters_bloom_two[["adjusted_21_limit", "adjusted_7_limit", "adjusted_1_limit"]].max(axis=1)
#current_defaulters_bloom_two["final_global_limit"] = current_defaulters_bloom_two[["final_21_limit", "final_7_limit", "final_1_limit"]].max(axis=1)

current_defaulters_bloom_two.head()

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance,first_adjusted_global_limit,first_final_global_limit,second_adjusted_global_limit,second_final_global_limit,current_adjusted_global_limit,current_final_global_limit
0,86961,7803525,300,1,2800.0,2800.0,11.2,21.0,21.0,915.06,847.86,3747.26,67.2,3680.06,3680.06,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,3680.06,0.0,0.0,0.0,0.0,0.0,0.0
1,87075,916139,300,21,3300.0,3300.0,250.81,49.5,49.5,1504.78,1425.58,5105.09,79.2,5025.89,5025.89,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,12.0,Repeat Client,2,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,5025.89,0.0,0.0,0.0,0.0,0.0,0.0
2,87171,772622,300,1,5700.0,88.35,0.0,42.75,0.0,45.6,0.0,5811.15,5722.8,88.35,88.35,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,87.0,Repeat Client,6,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,88.35,16200.0,5000.0,20400.0,0.0,0.0,0.0
3,87286,7801651,300,21,28400.0,595.77,0.0,426.0,0.0,180.4,0.0,31164.81,30569.04,595.77,595.77,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,15.0,Repeat Client,7,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,595.77,37000.0,26800.0,55700.0,0.0,0.0,0.0
4,87291,7965219,300,7,1200.0,959.92,0.0,30.0,0.0,427.51,0.0,1688.72,728.8,959.92,959.92,2.0,2022-05-06,2022-05-13,2022-05-16,2022-06-15,2022-07-15,2022-08-14,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,959.92,0.0,0.0,0.0,0.0,0.0,0.0


In [51]:
current_defaulters_bloom_two.shape

(20051, 36)

In [52]:
current_defaulters_bloom_two[['first_adjusted_global_limit', 'first_final_global_limit', 'second_adjusted_global_limit', 'second_final_global_limit', 'current_adjusted_global_limit', 'current_final_global_limit']].sum()

first_adjusted_global_limit     294487900.00
first_final_global_limit        178210600.00
second_adjusted_global_limit    294528600.00
second_final_global_limit        60490900.00
current_adjusted_global_limit   179870100.00
current_final_global_limit       33999900.00
dtype: float64

In [53]:
def calc_first_zeroized(df):
    
    first_adjusted_global_limit = df['first_adjusted_global_limit']
    first_final_global_limit = df['first_final_global_limit']
    
    if first_final_global_limit == 0 and first_adjusted_global_limit > 0:
        return 'first zeroized'
    else:
        return 'first not zeroized'
    
    
def calc_second_zeroized(df):
    
    second_adjusted_global_limit = df['second_adjusted_global_limit']
    second_final_global_limit = df['second_final_global_limit']
    
    if second_final_global_limit == 0 and second_adjusted_global_limit > 0:
        return 'second zeroized'
    else:
        return 'second not zeroized' 

In [54]:
current_defaulters_bloom_two['first zeroized'] = current_defaulters_bloom_two.apply(lambda x: calc_first_zeroized(x), axis = 1)
current_defaulters_bloom_two['second zeroized'] = current_defaulters_bloom_two.apply(lambda x: calc_second_zeroized(x), axis = 1)


current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance,first_adjusted_global_limit,first_final_global_limit,second_adjusted_global_limit,second_final_global_limit,current_adjusted_global_limit,current_final_global_limit,first zeroized,second zeroized
0,86961,7803525,300,1,2800.0,2800.0,11.2,21.0,21.0,915.06,847.86,3747.26,67.2,3680.06,3680.06,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,3680.06,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized
1,87075,916139,300,21,3300.0,3300.0,250.81,49.5,49.5,1504.78,1425.58,5105.09,79.2,5025.89,5025.89,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,12.0,Repeat Client,2,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,5025.89,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized
2,87171,772622,300,1,5700.0,88.35,0.0,42.75,0.0,45.6,0.0,5811.15,5722.8,88.35,88.35,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,87.0,Repeat Client,6,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,88.35,16200.0,5000.0,20400.0,0.0,0.0,0.0,first not zeroized,second zeroized
3,87286,7801651,300,21,28400.0,595.77,0.0,426.0,0.0,180.4,0.0,31164.81,30569.04,595.77,595.77,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,15.0,Repeat Client,7,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,595.77,37000.0,26800.0,55700.0,0.0,0.0,0.0,first not zeroized,second zeroized
4,87291,7965219,300,7,1200.0,959.92,0.0,30.0,0.0,427.51,0.0,1688.72,728.8,959.92,959.92,2.0,2022-05-06,2022-05-13,2022-05-16,2022-06-15,2022-07-15,2022-08-14,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,959.92,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized


In [55]:
current_defaulters_bloom_two[current_defaulters_bloom_two['second_adjusted_global_limit'] > current_defaulters_bloom_two['loan_balance']]

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance,first_adjusted_global_limit,first_final_global_limit,second_adjusted_global_limit,second_final_global_limit,current_adjusted_global_limit,current_final_global_limit,first zeroized,second zeroized
2,87171,772622,300,1,5700.00,88.35,0.00,42.75,0.00,45.60,0.00,5811.15,5722.80,88.35,88.35,2.00,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,87.00,Repeat Client,6,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,88.35,16200.00,5000.00,20400.00,0.00,0.00,0.00,first not zeroized,second zeroized
3,87286,7801651,300,21,28400.00,595.77,0.00,426.00,0.00,180.40,0.00,31164.81,30569.04,595.77,595.77,2.00,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,15.00,Repeat Client,7,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,595.77,37000.00,26800.00,55700.00,0.00,0.00,0.00,first not zeroized,second zeroized
8,155623,7186034,300,1,20900.00,1766.93,0.00,156.75,0.00,1610.18,0.00,22750.53,20983.60,1766.93,1766.93,2.00,2022-07-07,2022-07-08,2022-07-09,2022-08-08,2022-09-07,2022-10-07,16.00,Repeat Client,2,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,1766.93,80200.00,0.00,66800.00,0.00,165900.00,0.00,first zeroized,second zeroized
12,87296,7148467,300,7,23000.00,297.72,0.00,575.00,0.00,73.46,0.00,24246.47,23948.75,297.72,297.72,2.00,2022-05-06,2022-05-13,2022-05-16,2022-06-15,2022-07-15,2022-08-14,15.00,Repeat Client,2,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,297.72,41800.00,41800.00,44000.00,0.00,72300.00,0.00,first not zeroized,second zeroized
44,45895,7273381,300,7,6700.00,6700.00,174.21,13.02,13.02,1969.80,1728.60,8857.03,241.20,8615.83,8615.83,2.00,2022-03-18,2022-03-25,2022-03-28,2022-04-27,2022-05-27,2022-06-26,1.00,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,8615.83,14100.00,6300.00,13400.00,0.00,13800.00,0.00,first not zeroized,second zeroized
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20017,58744,854851,300,21,19400.00,422.70,0.00,349.37,0.00,82.67,0.00,21306.45,20883.75,422.70,422.70,2.00,2022-04-02,2022-04-23,2022-04-28,2022-05-28,2022-06-27,2022-07-27,26.00,Repeat Client,2,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,422.70,7100.00,7100.00,7100.00,0.00,6300.00,0.00,first not zeroized,second zeroized
20020,86973,619121,300,21,17600.00,369.39,0.00,264.00,0.00,110.32,0.00,19311.93,18942.54,369.39,369.39,2.00,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,36.00,Repeat Client,6,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,369.39,18900.00,18900.00,17300.00,0.00,20100.00,17700.00,first not zeroized,second zeroized
20021,86987,442317,300,21,30000.00,629.32,0.00,450.00,0.00,190.55,0.00,32920.56,32291.24,629.32,629.32,2.00,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,48.00,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,629.32,23700.00,11000.00,28600.00,11000.00,33900.00,28700.00,first not zeroized,second not zeroized
20025,88593,7918597,300,21,160000.00,2561.61,0.00,1832.41,0.00,774.86,0.00,174767.28,172205.67,2561.61,2561.61,2.00,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,14.00,Repeat Client,6,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,2561.61,142800.00,142800.00,92800.00,0.00,0.00,0.00,first not zeroized,second zeroized


In [56]:
def calc_first_balance_limit_flag(df):
    
    loan_balance = df['loan_balance']
    first_adjusted_global_limit = df['first_adjusted_global_limit']
    
    if first_adjusted_global_limit < loan_balance:
        return 'Lower than balance'
    elif first_adjusted_global_limit > loan_balance:
        return 'Higher than balance'
    elif loan_balance == first_adjusted_global_limit:
        return 'Equal Balances'
    

def calc_second_balance_limit_flag(df):
    
    loan_balance = df['loan_balance']
    second_adjusted_global_limit = df['second_adjusted_global_limit']
    
    if second_adjusted_global_limit < loan_balance:
        return 'Lower than balance'
    elif second_adjusted_global_limit > loan_balance:
        return 'Higher than balance'
    elif loan_balance == second_adjusted_global_limit:
        return 'Equal Balances'
    

In [57]:
current_defaulters_bloom_two['first_balance_limit_flag'] = current_defaulters_bloom_two.apply(lambda x: calc_first_balance_limit_flag(x), axis = 1)
current_defaulters_bloom_two['second_balance_limit_flag'] = current_defaulters_bloom_two.apply(lambda x: calc_second_balance_limit_flag(x), axis = 1)


current_defaulters_bloom_two.head(5)

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance,first_adjusted_global_limit,first_final_global_limit,second_adjusted_global_limit,second_final_global_limit,current_adjusted_global_limit,current_final_global_limit,first zeroized,second zeroized,first_balance_limit_flag,second_balance_limit_flag
0,86961,7803525,300,1,2800.0,2800.0,11.2,21.0,21.0,915.06,847.86,3747.26,67.2,3680.06,3680.06,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,3680.06,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
1,87075,916139,300,21,3300.0,3300.0,250.81,49.5,49.5,1504.78,1425.58,5105.09,79.2,5025.89,5025.89,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,12.0,Repeat Client,2,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,5025.89,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
2,87171,772622,300,1,5700.0,88.35,0.0,42.75,0.0,45.6,0.0,5811.15,5722.8,88.35,88.35,2.0,2022-05-06,2022-05-07,2022-05-08,2022-06-07,2022-07-07,2022-08-06,87.0,Repeat Client,6,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,88.35,16200.0,5000.0,20400.0,0.0,0.0,0.0,first not zeroized,second zeroized,Higher than balance,Higher than balance
3,87286,7801651,300,21,28400.0,595.77,0.0,426.0,0.0,180.4,0.0,31164.81,30569.04,595.77,595.77,2.0,2022-05-06,2022-05-27,2022-06-01,2022-07-01,2022-07-31,2022-08-30,15.0,Repeat Client,7,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,595.77,37000.0,26800.0,55700.0,0.0,0.0,0.0,first not zeroized,second zeroized,Higher than balance,Higher than balance
4,87291,7965219,300,7,1200.0,959.92,0.0,30.0,0.0,427.51,0.0,1688.72,728.8,959.92,959.92,2.0,2022-05-06,2022-05-13,2022-05-16,2022-06-15,2022-07-15,2022-08-14,1.0,New Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,959.92,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance


In [58]:
current_defaulters_bloom_two = current_defaulters_bloom_two.sort_values(by = 'disbursed_on_date')

current_defaulters_bloom_two = current_defaulters_bloom_two.drop_duplicates(subset=['store_number'], keep='last')

current_defaulters_bloom_two.head()

Unnamed: 0,loan_mifos_id,store_number,loan_status,term_frequency,principal_disbursed,principal_outstanding,interest_outstanding,fee_charges_charged,fee_charges_outstanding,penalty_charges_charged,penalty_charges_outstanding,total_expected_repayment,total_repayment,total_outstanding,safaricom_loan_balance,bloom_version,disbursed_on_date,expected_matured_on_date,end_rollvr_dt,dpd_30,dpd_d60,dpd_d90,loan_count,Loan Count Flag,defaulters_bloom_two_count,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,saf_loan_balance,loan_balance,first_adjusted_global_limit,first_final_global_limit,second_adjusted_global_limit,second_final_global_limit,current_adjusted_global_limit,current_final_global_limit,first zeroized,second zeroized,first_balance_limit_flag,second_balance_limit_flag
1005,3699,735346,300,7,150.0,150.0,5.09,0.0,0.0,20.4,20.4,175.49,0.0,175.49,175.49,2.0,2021-11-05,2021-11-12,2021-11-15,2021-12-15,2022-01-14,2022-02-13,100.0,Repeat Client,10,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,175.49,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
2139,3749,438125,300,21,8000.0,8000.0,695.47,434.8,434.8,2736.0,2736.0,11866.27,0.0,11866.27,11866.27,2.0,2021-11-11,2021-12-02,2021-12-07,2022-01-06,2022-02-05,2022-03-07,6.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,11866.27,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
1099,3760,970016,300,21,80000.0,80000.0,6954.74,4348.0,4348.0,27840.0,24840.0,119142.74,3000.0,116142.74,116142.74,2.0,2021-11-12,2021-12-03,2021-12-08,2022-01-07,2022-02-06,2022-03-08,2.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,116142.74,0.0,0.0,0.0,0.0,44800.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
19780,3768,583528,300,21,31000.0,31000.0,2694.96,1684.85,1684.85,11036.0,11036.0,46415.81,0.0,46415.81,46415.81,2.0,2021-11-12,2021-12-03,2021-12-08,2022-01-07,2022-02-06,2022-03-08,8.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,46415.81,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
19815,3757,913867,300,21,7500.0,7500.0,652.01,407.63,407.63,2670.0,2670.0,11229.64,0.0,11229.64,11229.64,2.0,2021-11-12,2021-12-03,2021-12-08,2022-01-07,2022-02-06,2022-03-08,10.0,Repeat Client,1,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,yes,11229.64,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance


In [59]:
current_defaulters_bloom_two = current_defaulters_bloom_two[['store_number', 'Loan Count Flag', 'Default Flag', 'First Limit Reductions Flag', 'Second Limit Reductions Flag', 'total_outstanding', 'safaricom_loan_balance', 'loan_balance', 'first_adjusted_global_limit', 'first_final_global_limit', 'second_adjusted_global_limit', 'second_final_global_limit', 'current_adjusted_global_limit', 'second_final_global_limit', 'second_final_global_limit', 'first zeroized', 'second zeroized', 'first_balance_limit_flag', 'second_balance_limit_flag']]

current_defaulters_bloom_two.head()

Unnamed: 0,store_number,Loan Count Flag,Default Flag,First Limit Reductions Flag,Second Limit Reductions Flag,total_outstanding,safaricom_loan_balance,loan_balance,first_adjusted_global_limit,first_final_global_limit,second_adjusted_global_limit,second_final_global_limit,current_adjusted_global_limit,second_final_global_limit.1,second_final_global_limit.2,first zeroized,second zeroized,first_balance_limit_flag,second_balance_limit_flag
1005,735346,Repeat Client,Bloom Two Subsequent Default,Default occurred before first limit reductions,Default occurred before second limit reductions,175.49,175.49,175.49,0.0,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
2139,438125,Repeat Client,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,11866.27,11866.27,11866.27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
1099,970016,Repeat Client,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,116142.74,116142.74,116142.74,0.0,0.0,0.0,0.0,44800.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
19780,583528,Repeat Client,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,46415.81,46415.81,46415.81,0.0,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance
19815,913867,Repeat Client,Bloom Two First Default,Default occurred before first limit reductions,Default occurred before second limit reductions,11229.64,11229.64,11229.64,0.0,0.0,0.0,0.0,0.0,0.0,0.0,first not zeroized,second not zeroized,Lower than balance,Lower than balance


In [60]:
current_defaulters_bloom_two['store_number'].nunique()

19740

In [61]:
current_defaulters_bloom_two.shape

(19740, 19)

In [62]:
current_defaulters_bloom_two['first_balance_limit_flag'].value_counts()

Lower than balance     16631
Higher than balance     3109
Name: first_balance_limit_flag, dtype: int64

In [63]:
first_batch = current_defaulters_bloom_two[(current_defaulters_bloom_two['Default Flag'] == 'Bloom Two First Default') & (current_defaulters_bloom_two['First Limit Reductions Flag'] == 'Default occurred after first limit reductions') & (current_defaulters_bloom_two['first_balance_limit_flag'] == 'Lower than balance') & (current_defaulters_bloom_two['first zeroized'] == 'first zeroized')]

first_batch = first_batch[['store_number', 'Loan Count Flag', 'Default Flag', 'First Limit Reductions Flag', 'loan_balance', 'first_adjusted_global_limit', 'first_final_global_limit', 'current_adjusted_global_limit', 'first zeroized', 'first_balance_limit_flag']]

first_batch.head()

Unnamed: 0,store_number,Loan Count Flag,Default Flag,First Limit Reductions Flag,loan_balance,first_adjusted_global_limit,first_final_global_limit,current_adjusted_global_limit,first zeroized,first_balance_limit_flag
5264,7187782,Repeat Client,Bloom Two First Default,Default occurred after first limit reductions,10973.33,100.0,0.0,0.0,first zeroized,Lower than balance
5578,7343046,Repeat Client,Bloom Two First Default,Default occurred after first limit reductions,37699.62,13800.0,0.0,0.0,first zeroized,Lower than balance
4469,7057564,Repeat Client,Bloom Two First Default,Default occurred after first limit reductions,6761.95,5000.0,0.0,2500.0,first zeroized,Lower than balance
8647,7072094,Repeat Client,Bloom Two First Default,Default occurred after first limit reductions,14217.48,11600.0,0.0,0.0,first zeroized,Lower than balance
9786,7184468,Repeat Client,Bloom Two First Default,Default occurred after first limit reductions,286231.88,12800.0,0.0,0.0,first zeroized,Lower than balance


In [64]:
first_batch.shape

(51, 10)

In [65]:
first_batch.to_excel('First Batch Defaulters.xlsx')

In [66]:
second_batch = current_defaulters_bloom_two[(current_defaulters_bloom_two['Default Flag'] == 'Bloom Two First Default') & (current_defaulters_bloom_two['Second Limit Reductions Flag'] == 'Default occurred after second limit reductions') & (current_defaulters_bloom_two['second_balance_limit_flag'] == 'Lower than balance') & (current_defaulters_bloom_two['second zeroized'] == 'second zeroized')]

second_batch = second_batch[['store_number', 'Loan Count Flag', 'Default Flag', 'Second Limit Reductions Flag', 'loan_balance', 'second_adjusted_global_limit', 'second_final_global_limit', 'current_adjusted_global_limit', 'second zeroized', 'second_balance_limit_flag']]

second_batch.head()

Unnamed: 0,store_number,Loan Count Flag,Default Flag,Second Limit Reductions Flag,loan_balance,second_adjusted_global_limit,second_final_global_limit,second_final_global_limit.1,second_final_global_limit.2,current_adjusted_global_limit,second zeroized,second_balance_limit_flag
15187,7321104,Repeat Client,Bloom Two First Default,Default occurred after second limit reductions,14983.62,4200.0,0.0,0.0,0.0,0.0,second zeroized,Lower than balance
3989,7759217,Repeat Client,Bloom Two First Default,Default occurred after second limit reductions,16516.03,6700.0,0.0,0.0,0.0,0.0,second zeroized,Lower than balance
8207,7565981,Repeat Client,Bloom Two First Default,Default occurred after second limit reductions,14660.42,8700.0,0.0,0.0,0.0,0.0,second zeroized,Lower than balance
9990,7484507,Repeat Client,Bloom Two First Default,Default occurred after second limit reductions,74165.06,34900.0,0.0,0.0,0.0,0.0,second zeroized,Lower than balance
15088,7898485,Repeat Client,Bloom Two First Default,Default occurred after second limit reductions,11932.88,3200.0,0.0,0.0,0.0,0.0,second zeroized,Lower than balance


In [67]:
second_batch.to_excel('Second Batch Defaulters.xlsx')

In [68]:
second_batch.shape

(147, 12)

In [69]:
current_defaulters_bloom_two.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 19740 entries, 1005 to 9793
Data columns (total 19 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   store_number                   19740 non-null  object 
 1   Loan Count Flag                19740 non-null  object 
 2   Default Flag                   19740 non-null  object 
 3   First Limit Reductions Flag    19740 non-null  object 
 4   Second Limit Reductions Flag   19740 non-null  object 
 5   total_outstanding              19740 non-null  float64
 6   safaricom_loan_balance         19740 non-null  float64
 7   loan_balance                   19740 non-null  float64
 8   first_adjusted_global_limit    19740 non-null  float64
 9   first_final_global_limit       19740 non-null  float64
 10  second_adjusted_global_limit   19740 non-null  float64
 11  second_final_global_limit      19740 non-null  float64
 12  current_adjusted_global_limit  19740 non-nul

In [70]:
# current_defaulters_700 = current_defaulters[current_defaulters['loan_status'] == 700]

# current_defaulters_700 = current_defaulters_700[['loan_mifos_id', 'store_number', 'loan_status', 'principal_outstanding', 'interest_outstanding', 'fee_charges_outstanding', 'penalty_charges_outstanding', 'total_outstanding']]

# current_defaulters_700['total_outstanding_calculated'] = current_defaulters_700['principal_outstanding'] + current_defaulters_700['interest_outstanding'] + current_defaulters_700['fee_charges_outstanding'] + current_defaulters_700['penalty_charges_outstanding']

# current_defaulters_700.head()