In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({
    'name': ['a', 'b', 'c', 'c', ''],
    'id': [1,2,3,4, 5]
})

In [3]:
def check_condition(condition:dict, row: pd.Series) -> bool:
    for key, value in condition.items():
        if type(value) == list:
            if row[key] in value:
                continue
            return False
        elif row[key] == value:
            continue

        return False
    return True

In [4]:
condition = {'name': [''], 'id': 5}
for row in df.iterrows():
    if check_condition(condition, row[1]):
        print(row)
        # continue

    # print(row)


(4, name     
id      5
Name: 4, dtype: object)


In [5]:
def record_a(row: pd.Series):
    return [row['id'], 'cash', row['id']*2, 'sales', row['id'] * 2]

def record_c(row: pd.Series):
    return [row['id'], 'AR', row['id'] * 3, 'sales', row['id'] * 3]

def record_default(row, id_col_name='id'):
    return [row[id_col_name], '', '', '', '']

condition_fun_map = [
    ({'name': ['a', 'b']}, record_a),
    ({'name': ['c']}, record_c)
]

def record_transaction(cond_map:list[tuple], frame: pd.DataFrame, id_col_name = 'id') -> pd.DataFrame:
    transactions = []
    for row in frame.iterrows():
        recorded = False
        for cond, func in cond_map:
            if check_condition(cond, row[1]):
                transactions.append(func(row[1]))
                recorded = True
                break
        if not recorded:
            transactions.append(record_default(row[1], id_col_name))

    trans_frame = pd.DataFrame(transactions, columns=[id_col_name, 'debit acc', 'debit amt', 'credit acc', 'credit amt'])
    return pd.merge(frame, trans_frame, 'outer', id_col_name)

transaction_df = record_transaction(condition_fun_map, df)
transaction_df

Unnamed: 0,name,id,debit acc,debit amt,credit acc,credit amt
0,a,1,cash,2.0,sales,2.0
1,b,2,cash,4.0,sales,4.0
2,c,3,AR,9.0,sales,9.0
3,c,4,AR,12.0,sales,12.0
4,,5,,,,


In [6]:
pd.merge(pd.DataFrame([
    [1,'a'],
    [1, 'a'],
    [2, 'b']
], columns=['id', 'name']),
pd.DataFrame(
    [
        [1, 'a', 20],
        [2, 'b', 23]
    ],
    columns=['id', 'name', 'age']
), 'outer', on=['id', 'name']
)

Unnamed: 0,id,name,age
0,1,a,20
1,1,a,20
2,2,b,23


In [7]:
all([acc in transaction_df.columns for acc in ['debit acc', 'credit acc', 'debit amt', 'credit amt']])

True

In [8]:
def get_balance(frame: pd.DataFrame, acc_name:str):
    assert(all([acc in transaction_df.columns for acc in ['debit acc', 'credit acc', 'debit amt', 'credit amt']]) == True)
    debit = sum(frame[frame['debit acc'] == acc_name]['debit amt'].apply(lambda x: float(x)))
    credit = sum(frame[frame['credit acc'] == acc_name]['credit amt'].apply(lambda x: float(x)))
    return debit - credit

print(get_balance(transaction_df, 'cash'))
print(get_balance(transaction_df, 'AR'))
print(get_balance(transaction_df, 'sales'))

6.0
21.0
-27.0


In [9]:
income_report = pd.read_excel(r'C:\Users\DIO\Documents\work\L-IFT\projects\p13 Financial statements\financial statements\lift_data\Income Report - Mon Feb 13 2023.xlsx', sheet_name='Income Reports')

In [10]:
income_sources = pd.read_excel(r'C:\Users\DIO\Documents\work\L-IFT\projects\p13 Financial statements\financial statements\lift_data\Income Report - Mon Feb 13 2023.xlsx', sheet_name='Income Sources')

In [11]:
income_source_cols = [
    'Respondent ID', 'Income source data ID', 'Income source sub-type'
]
income_source_filtered = income_sources[income_source_cols]
income_source_filtered.head()

Unnamed: 0,Respondent ID,Income source data ID,Income source sub-type
0,4425,IS00005780,AD - Printing / photocopying OT
1,4425,IS00008153,AM - transport services OT
2,4429,IS00005883,EE-01 - Soap/liquid soap AP
3,4431,IS00005791,F - Garments production LM
4,4432,IS00005807,U-03 - Car/motorcycle repair OT


In [12]:
income_cols = [
    'Respondent ID', 'Income report data ID', 'Income report income source name',
    'Income report amount', 'Income report expected amount', 'Income report date received',
    'Income report transaction type', 'Income report payment type', 'Income report income source data ID'
]

In [13]:
partial_cols:dict[int, list] = {}

for col in income_report.columns:
    if 'Income partial payment' in col:
        split_up = col.split(' ')
        num = int(split_up[3])
        if num in partial_cols:
            partial_cols[num].append(col)
        else:
            partial_cols[num] = [col]

partial_cols

{1: ['Income partial payment 1 data ID',
  'Income partial payment 1 amount',
  'Income partial payment 1 currency',
  'Income partial payment 1 transaction type',
  'Income partial payment 1 date received',
  'Income partial payment 1 account data ID',
  'Income partial payment 1 account',
  'Income partial payment 1 account report ID'],
 2: ['Income partial payment 2 data ID',
  'Income partial payment 2 amount',
  'Income partial payment 2 currency',
  'Income partial payment 2 transaction type',
  'Income partial payment 2 date received',
  'Income partial payment 2 account data ID',
  'Income partial payment 2 account',
  'Income partial payment 2 account report ID'],
 3: ['Income partial payment 3 data ID',
  'Income partial payment 3 amount',
  'Income partial payment 3 currency',
  'Income partial payment 3 transaction type',
  'Income partial payment 3 date received',
  'Income partial payment 3 account data ID',
  'Income partial payment 3 account',
  'Income partial payment 

In [14]:
income_filtered = income_report[income_cols]
income_filtered.head()

Unnamed: 0,Respondent ID,Income report data ID,Income report income source name,Income report amount,Income report expected amount,Income report date received,Income report transaction type,Income report payment type,Income report income source data ID
0,4425,IN00116879,Income Source 1,1910.0,,2021-04-12,,,IS00005780
1,4425,IN00116837,Income Source 1,1465.0,,2021-04-13,,,IS00005780
2,4425,IN00116838,Income Source 1,1910.0,,2021-04-14,,,IS00005780
3,4425,IN00116839,Income Source 1,815.0,,2021-04-15,,,IS00005780
4,4425,IN00116840,Income Source 1,695.0,,2021-04-16,,,IS00005780


In [15]:
income_filtered = income_filtered.rename(columns={'Income report income source data ID': 'Income source data ID'})

In [16]:
income_filtered

Unnamed: 0,Respondent ID,Income report data ID,Income report income source name,Income report amount,Income report expected amount,Income report date received,Income report transaction type,Income report payment type,Income source data ID
0,4425,IN00116879,Income Source 1,1910.0,,2021-04-12,,,IS00005780
1,4425,IN00116837,Income Source 1,1465.0,,2021-04-13,,,IS00005780
2,4425,IN00116838,Income Source 1,1910.0,,2021-04-14,,,IS00005780
3,4425,IN00116839,Income Source 1,815.0,,2021-04-15,,,IS00005780
4,4425,IN00116840,Income Source 1,695.0,,2021-04-16,,,IS00005780
...,...,...,...,...,...,...,...,...,...
22332,5883,IN00308273,Income Source 1,11050.0,,2022-02-18,Cash,Full payment,IS00006572
22333,5883,IN00308274,Income Source 1,34650.0,,2022-02-25,Cash,Full payment,IS00006572
22334,5883,IN00314821,Income Source 1,17505.0,,2022-03-04,Cash,Full payment,IS00006572
22335,5883,IN00325463,Income Source 1,22950.0,,2022-03-18,Cash,Full payment,IS00006572


In [17]:
income_filtered = pd.merge(income_source_filtered, income_filtered, 'outer', on=['Income source data ID', 'Respondent ID'])

In [18]:
income_filtered

Unnamed: 0,Respondent ID,Income source data ID,Income source sub-type,Income report data ID,Income report income source name,Income report amount,Income report expected amount,Income report date received,Income report transaction type,Income report payment type
0,4425,IS00005780,AD - Printing / photocopying OT,IN00116879,Income Source 1,1910.0,,2021-04-12,,
1,4425,IS00005780,AD - Printing / photocopying OT,IN00116837,Income Source 1,1465.0,,2021-04-13,,
2,4425,IS00005780,AD - Printing / photocopying OT,IN00116838,Income Source 1,1910.0,,2021-04-14,,
3,4425,IS00005780,AD - Printing / photocopying OT,IN00116839,Income Source 1,815.0,,2021-04-15,,
4,4425,IS00005780,AD - Printing / photocopying OT,IN00116840,Income Source 1,695.0,,2021-04-16,,
...,...,...,...,...,...,...,...,...,...,...
22342,5883,IS00006572,W - Construction services OT,IN00325463,Income Source 1,22950.0,,2022-03-18,Cash,Full payment
22343,5883,IS00006572,W - Construction services OT,IN00330120,Income Source 1,28950.0,,2022-03-24,Cash,Full payment
22344,5883,IS00006573,Other non-work Income,IN00146455,Income Source 2,1258.0,,2021-07-03,Cash,Full payment
22345,5883,IS00006573,Other non-work Income,IN00205324,Income Source 2,1260.0,,2021-09-18,Cash,Full payment


In [19]:
income_filtered['Income report transaction type'].unique()

array([nan, 'Cash', 'Bank transfer', 'cheque',
       'Credit given (payment after delivery)', 'Mobile money',
       'In-kind good or service', 'others',
       'In-Kind spoiled/broken/disaster'], dtype=object)

In [20]:
income_filtered['Income report transaction type'].fillna('', inplace=True)

In [21]:
income_filtered['Income report transaction type'].unique()

array(['', 'Cash', 'Bank transfer', 'cheque',
       'Credit given (payment after delivery)', 'Mobile money',
       'In-kind good or service', 'others',
       'In-Kind spoiled/broken/disaster'], dtype=object)

In [22]:
def record_credit_sale(row: pd.Series):
    return [row['Income report data ID'], 'AR', row['Income report amount'], 'Sales', row['Income report amount']]

def record_partial_payment(row: pd.Series):
    return [row['Income report data ID'], 'AR', row['Income report expected amount'], 'Sales', row['Income report expected amount']]

def record_cash_sales(row: pd.Series):
    return [row['Income report data ID'], 'Cash', row['Income report amount'], 'Sales', row['Income report amount']]

condition_fun_map2 = [
    ({'Income report transaction type': [
        'Cash', 'Bank transfer', 'cheque', 'Mobile money', ''
        ]}, record_cash_sales),
    ({'Income report payment type': ['Partial payment']}, record_partial_payment),
    ({'Income report transaction type': ['Credit given (payment after delivery)']}, record_credit_sale)
]

income_transactions = record_transaction(condition_fun_map2, income_filtered, 'Income report data ID')
income_transactions.head()

Unnamed: 0,Respondent ID,Income source data ID,Income source sub-type,Income report data ID,Income report income source name,Income report amount,Income report expected amount,Income report date received,Income report transaction type,Income report payment type,debit acc,debit amt,credit acc,credit amt
0,4425,IS00005780,AD - Printing / photocopying OT,IN00116879,Income Source 1,1910.0,,2021-04-12,,,Cash,1910.0,Sales,1910.0
1,4425,IS00005780,AD - Printing / photocopying OT,IN00116837,Income Source 1,1465.0,,2021-04-13,,,Cash,1465.0,Sales,1465.0
2,4425,IS00005780,AD - Printing / photocopying OT,IN00116838,Income Source 1,1910.0,,2021-04-14,,,Cash,1910.0,Sales,1910.0
3,4425,IS00005780,AD - Printing / photocopying OT,IN00116839,Income Source 1,815.0,,2021-04-15,,,Cash,815.0,Sales,815.0
4,4425,IS00005780,AD - Printing / photocopying OT,IN00116840,Income Source 1,695.0,,2021-04-16,,,Cash,695.0,Sales,695.0


In [23]:
income_transactions.to_csv('lift_data_transactions/income.csv')

# Income Partial Payments

In [24]:
partial_data_1 = income_report[['Respondent ID', 'Income report data ID'] + partial_cols[1]]
partial_data_1
partial_1_transaction = []
for i, row in partial_data_1.iterrows():
    amount_col:str = 'Income partial payment 1 amount'
    if not (pd.isna(row[amount_col])):
        partial_1_transaction.append([row['Income report data ID'], 'Cash', row[amount_col], 'AR', row[amount_col]])
    else:
        partial_1_transaction.append([row['Income report data ID'], pd.NA, pd.NA, pd.NA, pd.NA])

partial_1_trans_df = pd.DataFrame(partial_1_transaction, columns=['Income report data ID', 'debit acc', 'debit amt', 'credit acc', 'credit amt'])
partial_1_trans_df

Unnamed: 0,Income report data ID,debit acc,debit amt,credit acc,credit amt
0,IN00116879,,,,
1,IN00116837,,,,
2,IN00116838,,,,
3,IN00116839,,,,
4,IN00116840,,,,
...,...,...,...,...,...
22332,IN00308273,,,,
22333,IN00308274,,,,
22334,IN00314821,,,,
22335,IN00325463,,,,


In [27]:
partial_transaction_map = {}
for key in partial_cols:
    partial_data = income_report[['Respondent ID', 'Income report data ID'] + partial_cols[key]]
    partial_i_transaction = []
    for i, row in partial_data.iterrows():
        amount_col:str = f'Income partial payment {key} amount'
        if not (pd.isna(row[amount_col])):
            partial_i_transaction.append([row['Income report data ID'], 'Cash', row[amount_col], 'AR', row[amount_col]])
        else:
            partial_i_transaction.append([row['Income report data ID'], pd.NA, pd.NA, pd.NA, pd.NA])

    partial_i_trans_df = pd.DataFrame(partial_i_transaction, columns=['Income report data ID', 'debit acc', 'debit amt', 'credit acc', 'credit amt'])
    partial_i_trans_df = pd.merge(partial_data, partial_i_trans_df, 'outer', on='Income report data ID')
    partial_transaction_map[key] = partial_i_trans_df

In [28]:
for key in partial_transaction_map:
    partial_df = partial_transaction_map[key]
    print(f'Writing partial payment {key}..')
    partial_df.to_csv(f'lift_data_transactions/income_partial_{key}.csv')

Writing partial payment 1..
Writing partial payment 2..
Writing partial payment 3..
Writing partial payment 4..
Writing partial payment 5..
Writing partial payment 6..
