In [22]:
import os
import pandas as pd 
import glob
import random 
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 

## Goal: Truncate loans at appropriate places; perhaps using new borrowing as a place to start? 

In [23]:
trx = pd.read_csv('../diaries_transactions_all.csv', low_memory=False)

In [24]:
trx.columns

Index(['hh_ids', 'unique_hhs', 'first_trx_date_hh', 'last_trx_date_hh',
       'tot_hh_daysofobs', 'tot_hh_monthsofobs', 'interview_designation',
       'int_date', 'int_month', 'int_year', 'int_yr_mo', 'first_int_date',
       'account_ids', 'unique_accnts', 'm_ids_owner', 'unique_hm_owner',
       'account_bsheet_desig', 'account_startclose_balance', 'account_formal',
       'account_liquid', 'first_trx_date_acc', 'last_trx_date_acc',
       'tot_acc_daysofobs', 'tot_acc_monthsofobs', 'trx_id', 'm_ids_trx',
       'trx_date', 'trx_month', 'trx_year', 'trx_yr_mo', 'trx_dq_round',
       'trx_stdtime_days_hh', 'trx_stdtime_mnths_hh', 'trx_stdtime_days_acc',
       'trx_stdtime_mnths_acc', 'trx_class_code', 'trx_class_desc',
       'trx_family_code', 'trx_family_desc', 'trx_type_code', 'trx_type_desc',
       'trx_prx_purpose', 'trx_prx_purpose_fd', 'trx_fee',
       'trx_bsheet_direction', 'trx_mode_code', 'trx_mode_desc',
       'trx_place_incommunity', 'trx_distance_km', 'trx_outlet'

In [25]:
pd.set_option('display.max_columns',200)

### Subset to Formal Loans: 

In [26]:
frmln_trx = trx.loc[trx['trx_family_code']=='FRMLN']
frmln_trx.shape

(1429, 58)

#### How many individual business or agriculture loans are there? 

In [27]:
frmln_trx.loc[frmln_trx['unique_accnts']==1].trx_type_desc.value_counts()

Individual Business or Agriculture Loan      48
Consumer/ personal loan (not payday loan)    33
Hire Purchase                                24
Joint liability loan                         21
M-SHWARI Loan                                14
School Fees Loan                             12
Payday loan                                   2
Credit card (including store card)            2
Group Enterprise Loan                         2
Student loan                                  1
Name: trx_type_desc, dtype: int64

In [28]:
frmln_trx.loc[frmln_trx['unique_accnts']==1].trx_type_code.value_counts()

2760    48
2762    33
3398    24
3247    21
4219    14
3397    12
2761     2
2746     2
3502     2
2759     1
Name: trx_type_code, dtype: int64

#### Subset to only those formal loans of business or agriculture: 

In [29]:
bus_ag_trx = frmln_trx.loc[frmln_trx['trx_type_code']==2760]

In [30]:
bus_ag_trx.shape

(442, 58)

In [31]:
bus_ag_trx.loc[bus_ag_trx['unique_accnts']==1].shape

(48, 58)

In [32]:
unique_accnt_ids = bus_ag_trx.loc[bus_ag_trx['unique_accnts']==1].account_ids

In [33]:
len(unique_accnt_ids)

48

## Cycle through loans, calculating the evolution of balance:

In [38]:
bus_ag_trx.shape

(442, 58)

In [34]:
bal_evol_df = pd.DataFrame()

In [35]:
for idx, acct_id in enumerate(list(unique_accnt_ids)):
    print(idx, acct_id)
    loan_df = bus_ag_trx.loc[bus_ag_trx['account_ids']==acct_id]    
    loan_df = loan_df.copy()
    loan_df.sort_values("trx_stdtime_days_acc", inplace=True)
    for index, row in loan_df.iterrows():
        if loan_df.loc[index,"trx_bsheet_direction"]=="Increase":
            loan_df.at[index,"trx_val_dir"] = loan_df.loc[index,"trx_value_kes"]
        elif loan_df.loc[index,"trx_bsheet_direction"]=="Decrease":
            loan_df.at[index,"trx_val_dir"] = -loan_df.loc[index,"trx_value_kes"]
        else:
            print("Error neither increase nor decrease at index" + str(index))
    print(loan_df.shape) 
    if loan_df.shape[0] > 1: 
        loan_df.insert(loan_df.shape[1],"bal_evol",np.zeros(loan_df.shape[0]))
        s=0
        for index,row in loan_df.iterrows():
            s+=loan_df.loc[index,"trx_val_dir"]
            loan_df.at[index,"bal_evol"]=s
    else: 
        print('Loan has only 1 entry for %s' %acct_id)
    
    bal_evol_df = bal_evol_df.append(loan_df, ignore_index = True)


0 105136540140100000
Error neither increase nor decrease at index19299
(9, 59)
1 105137049319900000
Error neither increase nor decrease at index3193
(6, 59)
2 56134798164800000
Error neither increase nor decrease at index1078
(9, 59)
3 59134666467600000
Error neither increase nor decrease at index19693
(22, 59)
4 60136436488300000
Error neither increase nor decrease at index17802
(9, 59)
5 60134752045800000
Error neither increase nor decrease at index20002
(8, 59)
6 60134787021500000
Error neither increase nor decrease at index15364
(2, 59)
7 84136514083500000
Error neither increase nor decrease at index5632
(1, 58)
Loan has only 1 entry for 84136514083500000
8 59134752569200000
Error neither increase nor decrease at index19873
Error neither increase nor decrease at index24068
(14, 59)
9 60137430710900000
Error neither increase nor decrease at index27
(6, 59)
10 84136879000800000
Error neither increase nor decrease at index9403
(9, 59)
11 60138558075900000
Error neither increase nor de

In [41]:
bal_evol_df.head()

Unnamed: 0,account_bsheet_desig,account_formal,account_ids,account_liquid,account_startclose_balance,bal_evol,ddd_gift,first_int_date,first_trx_date_acc,first_trx_date_hh,hh_ids,int_date,int_month,int_year,int_yr_mo,interview_designation,last_trx_date_acc,last_trx_date_hh,m_ids_owner,m_ids_trx,tot_acc_daysofobs,tot_acc_monthsofobs,tot_hh_daysofobs,tot_hh_monthsofobs,trx_bsheet_direction,trx_class_code,trx_class_desc,trx_date,trx_direction,trx_distance_km,trx_dq_round,trx_family_code,trx_family_desc,trx_fee,trx_id,trx_inkind_units,trx_inkind_value_kes,trx_inkind_value_usd,trx_mode_code,trx_mode_desc,trx_month,trx_outlet,trx_place_incommunity,trx_prx_purpose,trx_prx_purpose_fd,trx_stdtime_days_acc,trx_stdtime_days_hh,trx_stdtime_mnths_acc,trx_stdtime_mnths_hh,trx_stdtime_mnths_hh_nr,trx_type_code,trx_type_desc,trx_val_dir,trx_value_kes,trx_value_usd,trx_year,trx_yr_mo,unique_accnts,unique_hhs,unique_hm_owner
0,Liability,Formal,105136540140100000,,Start,18000.0,0,04sep2012,18mar2013,24sep2012,KELDK37,08apr2013,4,2013,2013_04,04=Diaries Interview,04oct2013,30nov2013,65134432186900000,65134432186900000,200,6,432,14,Increase,findev,"Borrowing, lending, savings or insurance media...",18mar2013,,,6.0,FRMLN,Formal loan,0.0,105136540174100000,,,,18.0,STARTING BALANCE,3,,,1. Starting balance (today),,0,175,0,6,6.034483,2760,Individual Business or Agriculture Loan,18000.0,18000.0,211.7647,2013,2013_03,1.0,,
1,Liability,Formal,105136540140100000,,,0.0,0,04sep2012,18mar2013,24sep2012,KELDK37,08apr2013,4,2013,2013_04,04=Diaries Interview,04oct2013,30nov2013,65134432186900000,65134432186900000,200,6,432,14,Decrease,findev,"Borrowing, lending, savings or insurance media...",22mar2013,Outflow,,6.0,FRMLN,Formal loan,0.0,105136540182500000,,,,3.0,"Transfer into bank, SACCO, or MFI account",3,"01=Bank, MFI, SACCO, insurance branch",,3. Payments,Repayment on financial liability,4,179,0,6,6.172414,2760,Individual Business or Agriculture Loan,-18000.0,18000.0,211.7647,2013,2013_03,,,
2,Liability,Formal,105136540140100000,,,60000.0,0,04sep2012,18mar2013,24sep2012,KELDK37,08apr2013,4,2013,2013_04,04=Diaries Interview,04oct2013,30nov2013,65134432186900000,65134432186900000,200,6,432,14,Increase,findev,"Borrowing, lending, savings or insurance media...",05apr2013,Inflow,8.651168,6.0,FRMLN,Formal loan,0.0,105136540188600000,,,,3.0,"Transfer into bank, SACCO, or MFI account",4,"01=Bank, MFI, SACCO, insurance branch",0.0,2. New borrowing,New borrowing on financial liability,18,193,0,6,6.655172,2760,Individual Business or Agriculture Loan,60000.0,60000.0,705.8823,2013,2013_04,,,
3,Liability,Formal,105136540140100000,,,55000.0,0,04sep2012,18mar2013,24sep2012,KELDK37,26sep2013,9,2013,2013_09,04=Diaries Interview,04oct2013,30nov2013,65134432186900000,65134432186900000,200,6,432,14,Decrease,findev,"Borrowing, lending, savings or insurance media...",04jun2013,Outflow,8.651168,17.0,FRMLN,Formal loan,0.0,105138433250900000,,,,13.0,Standing order,6,"01=Bank, MFI, SACCO, insurance branch",0.0,3. Payments,Repayment on financial liability,78,253,2,8,8.724138,2760,Individual Business or Agriculture Loan,-5000.0,5000.0,58.82353,2013,2013_06,,,
4,Liability,Formal,105136540140100000,,,50000.0,0,04sep2012,18mar2013,24sep2012,KELDK37,26sep2013,9,2013,2013_09,04=Diaries Interview,04oct2013,30nov2013,65134432186900000,65134432186900000,200,6,432,14,Decrease,findev,"Borrowing, lending, savings or insurance media...",04jul2013,Outflow,8.651168,17.0,FRMLN,Formal loan,0.0,105138433256800000,,,,13.0,Standing order,7,"01=Bank, MFI, SACCO, insurance branch",0.0,3. Payments,Repayment on financial liability,108,283,3,9,9.75862,2760,Individual Business or Agriculture Loan,-5000.0,5000.0,58.82353,2013,2013_07,,,


In [36]:
bal_evol_df.shape

(442, 60)

In [40]:
bal_evol_df.trx_prx_purpose.value_counts()

3. Payments                                 301
6. Closing Balance--End of last DQ           47
2. New borrowing                             34
1. Starting balance (today)                  30
5. Interest accruing                         11
DECREASE--Unexplained balance adjustment      7
4. Any known fees                             5
INCREASE--Unexplained balance adjustment      5
7. Refund from lender                         2
Name: trx_prx_purpose, dtype: int64

## How many starting balances are zero? 

In [43]:
bal_evol_df.loc[bal_evol_df['trx_prx_purpose']=="1. Starting balance (today)"].trx_val_dir.value_counts()

0.0         6
338847.0    1
88000.0     1
3500.0      1
12000.0     1
16000.0     1
298000.0    1
148609.0    1
23000.0     1
20000.0     1
40340.0     1
108000.0    1
9500.0      1
18870.0     1
4300.0      1
1700.0      1
116000.0    1
5450.0      1
41000.0     1
100000.0    1
45000.0     1
341850.0    1
16400.0     1
126450.0    1
18000.0     1
Name: trx_val_dir, dtype: int64

## So, we can safely start with the "starting balance" for all but 6 loans. 

## For each loan with a starting balance of zero, we want to use the first "new borrowing" as the starting point and discard the rest: 

In [None]:
for idx, acct_id in enumerate(list(unique_accnt_ids)):
    print(idx, acct_id)
    loan_df = bal_evol_df.loc[bal_evol_df['account_ids']==acct_id]    
    loan_df = loan_df.copy()
    loan_df.sort_values("trx_stdtime_days_acc", inplace=True)
    for index, row in loan_df.iterrows():
        if loan_df.loc[index,""]=="Increase":
            loan_df.at[index,"trx_val_dir"] = loan_df.loc[index,"trx_value_kes"]
        elif loan_df.loc[index,"trx_bsheet_direction"]=="Decrease":
            loan_df.at[index,"trx_val_dir"] = -loan_df.loc[index,"trx_value_kes"]
        else:
            print("Error neither increase nor decrease at index" + str(index))
    print(loan_df.shape) 
    if loan_df.shape[0] > 1: 
        loan_df.insert(loan_df.shape[1],"bal_evol",np.zeros(loan_df.shape[0]))
        s=0
        for index,row in loan_df.iterrows():
            s+=loan_df.loc[index,"trx_val_dir"]
            loan_df.at[index,"bal_evol"]=s
    else: 
        print('Loan has only 1 entry for %s' %acct_id)
    
    bal_evol_df = bal_evol_df.append(loan_df, ignore_index = True)
