In [1]:
import pandas as pd
import numpy as np

In [3]:
account_report = pd.read_excel('lift_data\Account Report - Mon Feb 13 2023.xlsx', sheet_name='Account Reports')

In [4]:
account_report.head()

Unnamed: 0,Respondent ID,Gender,Age,Number of children,Marital Status,Country of Residence,Citizenship,Firm ID,Sector type,Date firm established,...,Account report amount,Account report currency,Account report date,Account report data type,Account report surveyor,Account report date created,Account report last updated,Account report edit count,Account report description,Account report tag
0,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,1910.0,ETB,2021-03-22,Account,Etalem,2021-07-12,2021-09-12,1,,
1,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,1950.0,ETB,2021-04-13,Expense,Etalem,2021-04-28,2021-09-24,0,,
2,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,280.0,ETB,2021-04-14,Expense,Etalem,2021-04-28,2021-09-24,0,,
3,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,1000.0,ETB,2021-04-15,Expense,Etalem,2021-04-28,2021-09-24,0,,
4,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,...,2100.0,ETB,2021-04-16,Account,Etalem,2021-04-21,2021-04-21,0,,


In [5]:
account_report.columns

Index(['Respondent ID', 'Gender', 'Age', 'Number of children',
       'Marital Status', 'Country of Residence', 'Citizenship', 'Firm ID',
       'Sector type', 'Date firm established', 'Age of Firm',
       'Number of Owners', 'Owner/s Gender', 'Number of Employees', 'Industry',
       'Location in the Country', 'Account report data ID',
       'Account report account data ID', 'Account report account name',
       'Account report deposit/withdrawal', 'Account report amount',
       'Account report currency', 'Account report date',
       'Account report data type', 'Account report surveyor',
       'Account report date created', 'Account report last updated',
       'Account report edit count', 'Account report description',
       'Account report tag'],
      dtype='object')

In [6]:
account_cols = [
    'Respondent ID', 'Account report data ID', 'Account report deposit/withdrawal',
    'Account report amount', 'Account report data type', 'Account report date created'
]

In [7]:
account_filtered = account_report[account_cols]
account_filtered.head()

Unnamed: 0,Respondent ID,Account report data ID,Account report deposit/withdrawal,Account report amount,Account report data type,Account report date created
0,4425,AR00044395,Deposited,1910.0,Account,2021-07-12
1,4425,AR00229106,Withdrawn,1950.0,Expense,2021-04-28
2,4425,AR00229107,Withdrawn,280.0,Expense,2021-04-28
3,4425,AR00229108,Withdrawn,1000.0,Expense,2021-04-28
4,4425,AR00022776,Withdrawn,2100.0,Account,2021-04-21


In [8]:
def check_condition(condition:dict, row: pd.Series) -> bool:
    for key, value in condition.items():
        if type(value) == list:
            found_one = False
            for v in value:
                if v in row[key]:
                    found_one = True
                    break
            if found_one:
                continue
            return False
        elif str(value) in str(row[key]) :
            continue

        return False
    return True

def record_default(row, id_col_name='index'):
    return [row[id_col_name], '', '', '', '']

def record_transaction(cond_map:list[tuple], frame: pd.DataFrame, id_col_name = 'id') -> pd.DataFrame:
    transactions = []
    for row in frame.iterrows():
        recorded = False
        for cond, func in cond_map:
            if check_condition(cond, row[1]):
                transactions.append(func(row[1]))
                recorded = True
                break
        if not recorded:
            transactions.append(record_default(row[1], id_col_name))

    trans_frame = pd.DataFrame(transactions, columns=[id_col_name, 'debit acc', 'debit amt', 'credit acc', 'credit amt'])
    return pd.merge(frame, trans_frame, 'outer', id_col_name)

def record_func_creator(debit_acc, credit_acc, id_col_name, amount_col):
    def func(row: pd.Series):
        return [row[id_col_name], debit_acc, row[amount_col], credit_acc, row[amount_col]]

    return func

In [9]:
CASH = 'cash'

EQUITY = 'equity'

AMOUNT_COL = 'Account report amount'
REPORT_ID = 'Account report data ID'

REPORT_TYPE = 'Account report data type'
DEP_WITH = 'Account report deposit/withdrawal'
WITHDRAWAL = 'withdrawal'

condition_func_map = [

    (
        {
            REPORT_TYPE: [
                'Account'
            ],
            DEP_WITH: [
                'Deposited'
            ]
            
        },
        record_func_creator(CASH, EQUITY, REPORT_ID, AMOUNT_COL)
    ),
    (
        {
            REPORT_TYPE: [
                'Account'
            ],
            DEP_WITH: [
                'Withdrawn'
            ]
            
        },
        record_func_creator(WITHDRAWAL, CASH, REPORT_ID, AMOUNT_COL)
    ),
]


In [10]:
account_transactions = record_transaction(condition_func_map, account_filtered, REPORT_ID)
account_transactions.head()

Unnamed: 0,Respondent ID,Account report data ID,Account report deposit/withdrawal,Account report amount,Account report data type,Account report date created,debit acc,debit amt,credit acc,credit amt
0,4425,AR00044395,Deposited,1910.0,Account,2021-07-12,cash,1910.0,equity,1910.0
1,4425,AR00229106,Withdrawn,1950.0,Expense,2021-04-28,,,,
2,4425,AR00229107,Withdrawn,280.0,Expense,2021-04-28,,,,
3,4425,AR00229108,Withdrawn,1000.0,Expense,2021-04-28,,,,
4,4425,AR00022776,Withdrawn,2100.0,Account,2021-04-21,withdrawal,2100.0,cash,2100.0


In [11]:
account_transactions.to_csv('lift_data_transactions/account.csv')