In [2]:
import pandas as pd
import numpy as np

In [3]:
loans_given_data = pd.read_excel(r'lift_data\Loan Given Report - Mon Feb 13 2023.xlsx', sheet_name='Loans given')
loans_given_data.head()

Unnamed: 0,Respondent ID,Gender,Age,Number of children,Marital Status,Country of Residence,Citizenship,Firm ID,Sector type,Date firm established,...,Loan given loan duration,Loan given description,Loan given surveyor,Loan given account data ID,Loan given account,Loan given account report ID,Loan given customer,Loan given date created,Loan given last updated,Loan given edit count
0,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,0 Undefined,Lent provided to family members,Etalem,AC00079753,Account 1,AR00126787,,2021-07-18,2021-07-18,0
1,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,0 Weeks,,Etalem,AC00079764,Account 2,AR00158732,,2021-10-11,2021-10-11,0
2,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,0 Undefined,,Etalem,,,,Customer,2021-10-11,2021-10-11,0
3,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,0 Undefined,,Etalem,,,,,2021-11-08,2021-11-08,0
4,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,0 Undefined,,Etalem,,,,,2021-11-22,2021-11-22,0


In [4]:
loans_given_data.columns

Index(['Respondent ID', 'Gender', 'Age', 'Number of children',
       'Marital Status', 'Country of Residence', 'Citizenship', 'Firm ID',
       'Sector type', 'Date firm established', 'Age of Firm',
       'Number of Owners', 'Owner/s Gender', 'Number of Employees', 'Industry',
       'Location in the Country', 'Loan given data ID', 'Loan given name',
       'Loan given loan receiver', 'Loan given amount given',
       'Loan given repayment amount', 'Loan given remaining amount',
       'Loan given currency', 'Loan given date', 'Loan given interest rate',
       'Loan given payment schedule', 'Loan given loan duration',
       'Loan given description', 'Loan given surveyor',
       'Loan given account data ID', 'Loan given account',
       'Loan given account report ID', 'Loan given customer',
       'Loan given date created', 'Loan given last updated',
       'Loan given edit count'],
      dtype='object')

In [5]:
loans_given_cols = [
    'Respondent ID', 'Loan given data ID', 'Loan given name',
    'Loan given loan receiver', 'Loan given amount given',
    'Loan given date', 
]

In [8]:
loans_given_filtered = loans_given_data[loans_given_cols]
loans_given_filtered.head()

Unnamed: 0,Respondent ID,Loan given data ID,Loan given name,Loan given loan receiver,Loan given amount given,Loan given date
0,4425,LG00001020,Loan Given 1,Family,200000.0,2015-04-15
1,4425,LG00002007,Loan Given 2,Family,5000.0,2021-09-20
2,4425,LG00002008,Loan Given 3,Customer,6604.0,2021-06-20
3,4425,LG00002294,Loan Given 4,Customer,1410.0,2021-11-06
4,4425,LG00002434,Loan Given 5,Customer,769.0,2021-11-16


In [9]:
def check_condition(condition:dict, row: pd.Series) -> bool:
    for key, value in condition.items():
        if type(value) == list:
            found_one = False
            for v in value:
                if v in row[key]:
                    found_one = True
                    break
            if found_one:
                continue
            return False
        elif str(value) in str(row[key]) :
            continue

        return False
    return True

def record_default(row, id_col_name='id'):
    return [row[id_col_name], '', '', '', '']

def record_transaction(cond_map:list[tuple], frame: pd.DataFrame, id_col_name = 'id') -> pd.DataFrame:
    transactions = []
    for row in frame.iterrows():
        recorded = False
        for cond, func in cond_map:
            if check_condition(cond, row[1]):
                transactions.append(func(row[1]))
                recorded = True
                break
        if not recorded:
            transactions.append(record_default(row[1], id_col_name))

    trans_frame = pd.DataFrame(transactions, columns=[id_col_name, 'debit acc', 'debit amt', 'credit acc', 'credit amt'])
    return pd.merge(frame, trans_frame, 'outer', id_col_name)

def record_func_creator(debit_acc, credit_acc, id_col_name, amount_col):
    def func(row: pd.Series):
        return [row[id_col_name], debit_acc, row[amount_col], credit_acc, row[amount_col]]

    return func

In [10]:
loans_given_filtered.columns

Index(['Respondent ID', 'Loan given data ID', 'Loan given name',
       'Loan given loan receiver', 'Loan given amount given',
       'Loan given date'],
      dtype='object')

In [11]:
AMOUNT_COL = 'Loan given amount given'
LOAN_ID = 'Loan given data ID'

LOAN_RECEIVER = 'Loan given loan receiver'

CASH = 'cash'
AR = 'AR'
SALE = 'sale'
LOANS_TO_EMPLOYEE = 'loans to employees'
INFORMAL_LOANS_GIVEN = 'informal loans given'

condition_func_map = [
    (
        {
            LOAN_RECEIVER: ['Customer']
        },
        record_func_creator(AR, SALE, LOAN_ID, AMOUNT_COL)
    ),
    (
        {
            LOAN_RECEIVER: ['Employee']
        },
        record_func_creator(LOANS_TO_EMPLOYEE, CASH, LOAN_ID, AMOUNT_COL)
    ),
    (
        {
            LOAN_RECEIVER: ['Friend', 'Family']
        },
        record_func_creator(INFORMAL_LOANS_GIVEN, CASH, LOAN_ID, AMOUNT_COL)
    ),
]

In [13]:
loan_given_transactions = record_transaction(condition_func_map, loans_given_filtered, LOAN_ID)
loan_given_transactions

Unnamed: 0,Respondent ID,Loan given data ID,Loan given name,Loan given loan receiver,Loan given amount given,Loan given date,debit acc,debit amt,credit acc,credit amt
0,4425,LG00001020,Loan Given 1,Family,200000.0,2015-04-15,informal loans given,200000.0,cash,200000.0
1,4425,LG00002007,Loan Given 2,Family,5000.0,2021-09-20,informal loans given,5000.0,cash,5000.0
2,4425,LG00002008,Loan Given 3,Customer,6604.0,2021-06-20,AR,6604.0,sale,6604.0
3,4425,LG00002294,Loan Given 4,Customer,1410.0,2021-11-06,AR,1410.0,sale,1410.0
4,4425,LG00002434,Loan Given 5,Customer,769.0,2021-11-16,AR,769.0,sale,769.0
...,...,...,...,...,...,...,...,...,...,...
216,4864,LG00000634,Loan Given 1,Friend,6500.0,2021-06-10,informal loans given,6500.0,cash,6500.0
217,5104,LG00001428,Loan Given 1,Friend,200.0,2021-08-11,informal loans given,200.0,cash,200.0
218,5104,LG00002399,Loan Given 2,Customer,4300.0,2021-11-18,AR,4300.0,sale,4300.0
219,5127,LG00000642,Loan Given 1,Family,261000.0,2020-07-01,informal loans given,261000.0,cash,261000.0


In [14]:
loan_given_transactions.to_csv('lift_data_transactions/loans_given.csv')

In [16]:
loans_given_repay_data = pd.read_excel(r'lift_data\Loan Given Report - Mon Feb 13 2023.xlsx', sheet_name='Loan given repayments')
loans_given_repay_data.head()

Unnamed: 0,Respondent ID,Gender,Age,Number of children,Marital Status,Country of Residence,Citizenship,Firm ID,Sector type,Date firm established,...,Loan given repayment report date,Loan given repayment report surveyor,Loan given repayment report account data ID,Loan given repayment report account,Loan given repayment report account report ID,Loan given repayment report date created,Loan given repayment report last updated,Loan given repayment report edit count,Loan given repayment report description,Loan given repayment report tag
0,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,2021-12-18,Etalem,AC00079753,Account 1,AR00214287,2021-12-20,2021-12-20,0,,
1,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,2021-12-18,Etalem,AC00079753,Account 1,AR00214288,2021-12-20,2021-12-20,0,,
2,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,2021-12-18,Etalem,AC00079753,Account 1,AR00214286,2021-12-20,2021-12-20,0,,
3,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,2022-03-16,Etalem,AC00079764,Account 2,AR00356843,2022-04-14,2022-04-14,0,,
4,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,2022-03-24,Etalem,AC00079764,Account 2,AR00356845,2022-04-14,2022-04-14,0,,


In [17]:
loans_given_repay_data.columns

Index(['Respondent ID', 'Gender', 'Age', 'Number of children',
       'Marital Status', 'Country of Residence', 'Citizenship', 'Firm ID',
       'Sector type', 'Date firm established', 'Age of Firm',
       'Number of Owners', 'Owner/s Gender', 'Number of Employees', 'Industry',
       'Location in the Country', 'Loan given repayment report data ID',
       'Loan given repayment report loan given data ID',
       'Loan given repayment report source name',
       'Loan given repayment report repaid amount',
       'Loan given repayment report currency',
       'Loan given repayment report date',
       'Loan given repayment report surveyor',
       'Loan given repayment report account data ID',
       'Loan given repayment report account',
       'Loan given repayment report account report ID',
       'Loan given repayment report date created',
       'Loan given repayment report last updated',
       'Loan given repayment report edit count',
       'Loan given repayment report descript

In [18]:
repay_cols = [
    'Respondent ID', 'Loan given repayment report data ID',
    'Loan given repayment report loan given data ID',
    'Loan given repayment report date', 
    'Loan given repayment report repaid amount'
]

In [20]:
loan_repay_filtered = loans_given_repay_data[repay_cols]
loan_repay_filtered

Unnamed: 0,Respondent ID,Loan given repayment report data ID,Loan given repayment report loan given data ID,Loan given repayment report date,Loan given repayment report repaid amount
0,4425,LR00001415,LG00002008,2021-12-18,1209.0
1,4425,LR00001416,LG00002294,2021-12-18,1209.0
2,4425,LR00001414,LG00002008,2021-12-18,3255.0
3,4425,LR00001873,LG00002644,2022-03-16,2140.0
4,4425,LR00001871,LG00002008,2022-03-24,2140.0
...,...,...,...,...,...
149,4822,LR00001485,LG00002473,2021-11-27,3500.0
150,4822,LR00001477,LG00002475,2021-11-27,1800.0
151,4822,LR00001476,LG00002474,2021-11-27,2400.0
152,4822,LR00001835,LG00002471,2021-12-11,7000.0


In [22]:
loan_repay_filtered.rename(columns={'Loan given repayment report loan given data ID': LOAN_ID}, inplace=True)
loan_repay_filtered

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  loan_repay_filtered.rename(columns={'Loan given repayment report loan given data ID': LOAN_ID}, inplace=True)


Unnamed: 0,Respondent ID,Loan given repayment report data ID,Loan given data ID,Loan given repayment report date,Loan given repayment report repaid amount
0,4425,LR00001415,LG00002008,2021-12-18,1209.0
1,4425,LR00001416,LG00002294,2021-12-18,1209.0
2,4425,LR00001414,LG00002008,2021-12-18,3255.0
3,4425,LR00001873,LG00002644,2022-03-16,2140.0
4,4425,LR00001871,LG00002008,2022-03-24,2140.0
...,...,...,...,...,...
149,4822,LR00001485,LG00002473,2021-11-27,3500.0
150,4822,LR00001477,LG00002475,2021-11-27,1800.0
151,4822,LR00001476,LG00002474,2021-11-27,2400.0
152,4822,LR00001835,LG00002471,2021-12-11,7000.0


In [23]:
loan_repay_filtered = pd.merge(loan_repay_filtered, loans_given_filtered[[LOAN_ID, LOAN_RECEIVER]], 'left', on=LOAN_ID)
loan_repay_filtered

Unnamed: 0,Respondent ID,Loan given repayment report data ID,Loan given data ID,Loan given repayment report date,Loan given repayment report repaid amount,Loan given loan receiver
0,4425,LR00001415,LG00002008,2021-12-18,1209.0,Customer
1,4425,LR00001416,LG00002294,2021-12-18,1209.0,Customer
2,4425,LR00001414,LG00002008,2021-12-18,3255.0,Customer
3,4425,LR00001873,LG00002644,2022-03-16,2140.0,Customer
4,4425,LR00001871,LG00002008,2022-03-24,2140.0,Customer
...,...,...,...,...,...,...
149,4822,LR00001485,LG00002473,2021-11-27,3500.0,Customer
150,4822,LR00001477,LG00002475,2021-11-27,1800.0,Customer
151,4822,LR00001476,LG00002474,2021-11-27,2400.0,Customer
152,4822,LR00001835,LG00002471,2021-12-11,7000.0,Customer


In [24]:
REPAY_AMOUNT_COL = 'Loan given repayment report repaid amount'
LOAN_REPAY_ID = 'Loan given repayment report data ID'


repay_condition_func_map = [
    (
        {
            LOAN_RECEIVER: ['Customer']
        },
        record_func_creator(CASH, AR, LOAN_REPAY_ID, REPAY_AMOUNT_COL)
    ),
    (
        {
            LOAN_RECEIVER: ['Employee']
        },
        record_func_creator(CASH, LOANS_TO_EMPLOYEE, LOAN_REPAY_ID, REPAY_AMOUNT_COL)
    ),
    (
        {
            LOAN_RECEIVER: ['Friend', 'Family']
        },
        record_func_creator(CASH, INFORMAL_LOANS_GIVEN, LOAN_REPAY_ID, REPAY_AMOUNT_COL)
    ),
]

In [26]:
repay_transactions = record_transaction(repay_condition_func_map, loan_repay_filtered, LOAN_REPAY_ID)
repay_transactions.head()

Unnamed: 0,Respondent ID,Loan given repayment report data ID,Loan given data ID,Loan given repayment report date,Loan given repayment report repaid amount,Loan given loan receiver,debit acc,debit amt,credit acc,credit amt
0,4425,LR00001415,LG00002008,2021-12-18,1209.0,Customer,cash,1209.0,AR,1209.0
1,4425,LR00001416,LG00002294,2021-12-18,1209.0,Customer,cash,1209.0,AR,1209.0
2,4425,LR00001414,LG00002008,2021-12-18,3255.0,Customer,cash,3255.0,AR,3255.0
3,4425,LR00001873,LG00002644,2022-03-16,2140.0,Customer,cash,2140.0,AR,2140.0
4,4425,LR00001871,LG00002008,2022-03-24,2140.0,Customer,cash,2140.0,AR,2140.0


In [27]:
repay_transactions.to_csv('lift_data_transactions/loans_given_repayment.csv')