In [1]:
import pandas as pd
import os
import data_table
import datetime as dt

from utils import filter_by_year, filter_by_year_month, filter_up_to, get_unique_months, get_unique_years, get_lastest_month, get_latest_year, get_months_list, get_years_list

from ratios import calc_current_ratio, calc_debt_ratio, calc_EM, calc_FATO, calc_GP, calc_PM, calc_quick_ratio, calc_ROA, calc_ROE, calc_TATO, calc_withdrawal_ratio


In [53]:
ASSET = [
    'cash','ar','equipment', 'building', 
    'live stock', 'land', 
    'informal loans given', 'loans to employees'
]

INCOME = [
    'sales',
]

EXPENSE = [
    'purchases', 'salary expense', 'utility expense',
    'miscellaneous expense', 'rent expense', 'professional fees',
    'transport expense', 'tax expense', 
    'loss from theft', 'bank fee', 'interest expense'
]

OTHER_INC_EXPENSE = [
    'gifts given', 'interest revenue'
]

EQUITY = [
    'withdrawal', 'equity'
]

LIABILITY = [
    'ap', 'informal loan', 'long term loan', 'short term loan'
]

#'interest revenue', 'interest expense'

In [22]:
def get_total_from_summary(summary:list[tuple[str, float]]):
    return sum([item[1] for item in summary])

def get_reverse_sign(summary:list[tuple[str, float]]):
    return [(summ[0], -1*summ[1]) for summ in summary]


def get_balance(id_filter:int, frame: pd.DataFrame, acc_name:str):
    sub = frame[frame['Respondent ID'] == id_filter]
    sub = sub[(~sub['debit amt'].isna()) & (~(sub['credit acc'].isna()))]
    debit = sum(sub[sub['debit acc'] == acc_name]['debit amt'].apply(lambda x: float(x)))
    credit = sum(sub[sub['credit acc'] == acc_name]['credit amt'].apply(lambda x: float(x)))
    # print(debit, credit)
    return debit - credit

def summarize(respondent_id:int, group:list, table: pd.DataFrame):
    return [(acc, get_balance(respondent_id, table, acc)) for acc in group]

def format_number(num):
    if num > 0:
        return f"{num:,.2f} "
    
    if num < 0:
        return f"({-1*num:,.2f})"
    
    return ''

def read_transaction_data():    
    data = []
    to_include = [
        'date', 'debit', 'credit', 'Respondent',
        'Income source',
    ]
    for p, dirs, fs in os.walk('lift_data_transactions'):
        for f in fs:
            path = os.path.join(p, f)
            # print(path)
            temp:pd.DataFrame = pd.read_csv(path)
            df_dict = {}
            for col in temp.columns:
                for hint in to_include:
                    if hint.lower() in col.lower():
                        if hint == 'date':
                            df_dict['date'] = temp[col]
                        else:
                            df_dict[col] = temp[col]
                        break
            df = pd.DataFrame(df_dict)
            # display(df.head())
            data.append(df)

    return pd.concat(data,axis=0).reset_index().drop(columns='index')

def get_retained_earnings(respondent_id, table: pd.DataFrame) -> float:
    income = get_reverse_sign(summarize(respondent_id, INCOME, table))
    total_income = get_total_from_summary(income)

    expenses = get_reverse_sign(summarize(respondent_id, EXPENSE, table))
    total_expense = get_total_from_summary(expenses)

    other = get_reverse_sign(summarize(respondent_id, OTHER_INC_EXPENSE, table))
    total_other = get_total_from_summary(other)
    
    net_income = sum([total_income, total_expense, total_other])
    
    withdrawal = get_total_from_summary(
        get_reverse_sign(summarize(respondent_id, ['withdrawal'], table))
    ) 

    retained_earnings = net_income + withdrawal
    return retained_earnings


def income_statement_and_RE(respondent_id, table: pd.DataFrame) -> pd.DataFrame:
    income = get_reverse_sign(summarize(respondent_id, INCOME, table))
    total_income = get_total_from_summary(income)

    expenses = get_reverse_sign(summarize(respondent_id, EXPENSE, table))
    total_expense = get_total_from_summary(expenses)

    other = get_reverse_sign(summarize(respondent_id, OTHER_INC_EXPENSE, table))
    total_other = get_total_from_summary(other)
    
    net_income = sum([total_income, total_expense, total_other])
    
    withdrawal = get_total_from_summary(
        get_reverse_sign(summarize(respondent_id, ['withdrawal'], table))
    ) 

    retained_earnings = net_income + withdrawal

    table_data = income + \
        [('Total Income', total_income)] + expenses + \
            [('Total Expense', total_expense)] + other + \
                [('Total other Income and expenses', total_other)] + \
                    [('Net Income', net_income), 
                     ('Withdrawal', withdrawal),
                     ('Additional Savings', retained_earnings)]

    formatted_table = pd.DataFrame(
        [
            [name.capitalize(), format_number(amount)]
            for name, amount in table_data
        ],
        columns=['Account', 'Balance']
    )
    formatted_table = formatted_table[formatted_table['Balance'] != ''].reset_index().drop(columns='index')
    return formatted_table


def balance_sheet(respondent_id, table: pd.DataFrame) -> pd.DataFrame:
    retained_earnings = get_retained_earnings(respondent_id, table)

    assets = summarize(respondent_id, ASSET, table)
    liabilities = get_reverse_sign(summarize(respondent_id, LIABILITY, table))
    equity = get_reverse_sign(summarize(respondent_id, ['equity'], table))
    
    total_a = get_total_from_summary(assets)
    total_l = get_total_from_summary(liabilities)
    total_e = get_total_from_summary(equity) + retained_earnings
    
    table_data = assets + [('Total assets', total_a)] + liabilities + [('Total liabilities', total_l)] + [('Equity', total_e), ('Total liability & Equity', total_l + total_e)]

    formatted_table = pd.DataFrame(
        [
            [name.capitalize(), format_number(amount)]
            for name, amount in table_data
        ],
        columns=['Account', 'Balance']
    )
    formatted_table= formatted_table[formatted_table['Balance'] != ''].reset_index().drop(columns='index')
    return formatted_table


def format_date(data:pd.DataFrame, date_col:str):
    dates = []
    for d in data['date']:
        if not pd.isna(d):
            year, month, day = d.split('-')
            dates.append(dt.date(int(year), int(month), int(day)))
        else:
            dates.append(pd.NA)
    data2 = data.copy()
    data2.drop(columns=date_col, inplace=True)
    data2[date_col] = dates
    return data2


def calculate_ratio_df(df: pd.DataFrame, respondent_id: int) -> pd.DataFrame:
    current_ratios = []
    quick_ratios = []
    fatos = []
    tatos = []

    d_ratios = []
    em_ratios = []

    roes = []
    roas = []
    pms = []
    gps = []

    payouts = []

    # years = []
    # months = []

    list_func_map = {
        calc_current_ratio: current_ratios, 
        calc_quick_ratio: quick_ratios,
        calc_FATO: fatos, 
        calc_TATO: tatos,
        calc_debt_ratio: d_ratios,
        calc_EM: em_ratios,
        calc_ROE: roes, 
        calc_ROA: roas,
        calc_PM: pms,
        calc_GP: gps,
        calc_withdrawal_ratio: payouts,
    }

    respondent_filtered = df[df['Respondent ID'] == respondent_id].reset_index().drop(columns='index')
    # for y in get_years_list(respondent_filtered, 'date'):
    #     for m in get_months_list(respondent_filtered, y, 'date'):
    #         monthly_data = filter_by_year_month(respondent_filtered, 'date', y, m)
    #         data_up_to = filter_up_to(respondent_filtered, 'date', y, m)

    #         temp_inc_stat = income_statement_and_RE(respondent_id, monthly_data)
    #         temp_bal_sheet = balance_sheet(respondent_id, data_up_to)
            
    #         years.append(y)
    #         months.append(m)

    #         for func, coll in list_func_map.items():
    #             coll.append(func(temp_inc_stat, temp_bal_sheet))

    inc_stat = income_statement_and_RE(respondent_id, respondent_filtered)
    bal_sheet = balance_sheet(respondent_id, respondent_filtered)

    for func, coll in list_func_map.items():
        coll.append(func(inc_stat, bal_sheet))

    ratio_data = pd.DataFrame({
        # 'Year': years,
        # 'Month': months,
        'Respondent': respondent_id,
        'Current Ratio': current_ratios,
        'Quick Ratio': quick_ratios,
        'FATO': fatos,
        'TATO': tatos,
        'Debt Ratio': d_ratios,
        'Equity Multiplier': em_ratios,
        'Gross Margin': gps,  
        'Profit Margin': pms,
        'Return on Asset': roas,
        'Return on Equity': roas,
        'Withdrawal Ratio': payouts
    })

    # ratio_dates = []
    # for i, row in ratio_data.iterrows():    
    #     y = int(row['Year'])
    #     m = int(row['Month'])
    #     p = pd.Period(f'{y}-{m}-1')
    #     last_day = p.days_in_month
    #     ratio_dates.append(dt.date(y, m, last_day))

    # ratio_data['date'] = ratio_dates

    return ratio_data


In [23]:
data_combined = pd.read_csv(r'data_combined.csv')
data_combined.drop(columns='Unnamed: 0', inplace=True)
data_combined2 = data_combined.copy()

data_combined2.loc[:, 'debit acc'] = data_combined2['debit acc'].str.lower()
data_combined2.loc[:, 'credit acc'] = data_combined2['credit acc'].str.lower()
data_combined2.loc[:, 'credit acc'] = data_combined2['credit acc'].str.replace(r'sales?', 'sales',regex=True)

data_combined2.loc[:, 'debit acc'] = data_combined2['debit acc'].str.replace('salaries and wages', 'salary expense')

data_combined2 = format_date(data_combined2, 'date')

  data_combined = pd.read_csv(r'data_combined.csv')


In [24]:
data_combined2.head()

Unnamed: 0,Respondent ID,debit acc,debit amt,credit acc,credit amt,Income source data ID,Income source sub-type,Income report income source name,date
0,4425.0,cash,1910.0,equity,1910.0,,,,2021-07-12
1,4425.0,,,,,,,,2021-04-28
2,4425.0,,,,,,,,2021-04-28
3,4425.0,,,,,,,,2021-04-28
4,4425.0,withdrawal,2100.0,cash,2100.0,,,,2021-04-21


In [25]:
ratio_df = None
for res_id in data_combined2['Respondent ID'].unique():
    temp = calculate_ratio_df(data_combined2, res_id)
    # display(temp)
    if type(ratio_df) == type(None):
        ratio_df = temp
    else:
        ratio_df = pd.concat([ratio_df, temp])
ratio_df.head()

Unnamed: 0,Respondent,Current Ratio,Quick Ratio,FATO,TATO,Debt Ratio,Equity Multiplier,Gross Margin,Profit Margin,Return on Asset,Return on Equity,Withdrawal Ratio
0,4425.0,44.843021,27.821745,0.305452,0.235211,0.005128,1.005155,0.823915,0.617714,0.145293,0.145293,1.191856
0,4429.0,-73.018893,-73.018893,0.723207,0.519278,-0.003862,0.996153,0.329987,0.203628,0.105739,0.105739,0.382327
0,4431.0,,,0.006458,0.004768,0.0,1.0,-2.071429,-2.951429,-0.014074,-0.014074,-1.082285
0,4432.0,,,0.012265,0.01219,0.0,1.0,1.0,0.637209,0.007767,0.007767,0.211679
0,4433.0,,,1.32105,0.493685,0.0,1.0,0.997352,0.516852,0.255162,0.255162,1.57904


In [26]:
ratio_df = ratio_df.reset_index().drop(columns='index')
ratio_df.head()

Unnamed: 0,Respondent,Current Ratio,Quick Ratio,FATO,TATO,Debt Ratio,Equity Multiplier,Gross Margin,Profit Margin,Return on Asset,Return on Equity,Withdrawal Ratio
0,4425.0,44.843021,27.821745,0.305452,0.235211,0.005128,1.005155,0.823915,0.617714,0.145293,0.145293,1.191856
1,4429.0,-73.018893,-73.018893,0.723207,0.519278,-0.003862,0.996153,0.329987,0.203628,0.105739,0.105739,0.382327
2,4431.0,,,0.006458,0.004768,0.0,1.0,-2.071429,-2.951429,-0.014074,-0.014074,-1.082285
3,4432.0,,,0.012265,0.01219,0.0,1.0,1.0,0.637209,0.007767,0.007767,0.211679
4,4433.0,,,1.32105,0.493685,0.0,1.0,0.997352,0.516852,0.255162,0.255162,1.57904


In [27]:
ratio_df.to_csv('ratio_df_v1.csv')

In [74]:
def get_res_statement_map(trans_df: pd.DataFrame):
    res_statement_map:dict[str, list[pd.DataFrame]] = dict()
    for res_id in trans_df['Respondent ID'].unique():
        respondent_filtered = trans_df[trans_df['Respondent ID'] == res_id].reset_index().drop(columns='index')
        temp_bal = balance_sheet(res_id, respondent_filtered)
        res_statement_map[res_id] = temp_bal
    return res_statement_map

res_statement_map = get_res_statement_map(data_combined2)

In [35]:
res_statement_map.keys()

dict_keys([4425.0, 4429.0, 4431.0, 4432.0, 4433.0, 4435.0, 4436.0, 4437.0, 4438.0, 4440.0, 4442.0, 4443.0, 4444.0, 4445.0, 4446.0, 4448.0, 4450.0, 4451.0, 4452.0, 4453.0, 4456.0, 4457.0, 4458.0, 4459.0, 4460.0, 4462.0, 4463.0, 4464.0, 4465.0, 4466.0, 4467.0, 4468.0, 4469.0, 4470.0, 4471.0, 4472.0, 4473.0, 4474.0, 4475.0, 4477.0, 4478.0, 4480.0, 4481.0, 4482.0, 4483.0, 4484.0, 4485.0, 4487.0, 4490.0, 4491.0, 4493.0, 4494.0, 4495.0, 4496.0, 4499.0, 4500.0, 4501.0, 4502.0, 4503.0, 4506.0, 4507.0, 4508.0, 4509.0, 4510.0, 4511.0, 4513.0, 4517.0, 4518.0, 4520.0, 4521.0, 4522.0, 4523.0, 4526.0, 4527.0, 4528.0, 4530.0, 4531.0, 4533.0, 4534.0, 4535.0, 4536.0, 4537.0, 4538.0, 4539.0, 4541.0, 4544.0, 4568.0, 4569.0, 4570.0, 4572.0, 4574.0, 4575.0, 4577.0, 4578.0, 4579.0, 4580.0, 4583.0, 4584.0, 4608.0, 4618.0, 4622.0, 4623.0, 4624.0, 4625.0, 4627.0, 4629.0, 4630.0, 4632.0, 4634.0, 4635.0, 4636.0, 4638.0, 4661.0, 4668.0, 4670.0, 4672.0, 4676.0, 4678.0, 4714.0, 4716.0, 4720.0, 4722.0, 4723.0, 4724.

In [40]:
res_statement_map[4435]

Unnamed: 0,Account,Balance
0,Cash,561763.00
1,Ar,5000.00
2,Equipment,121950.00
3,Informal loans given,15000.00
4,Total assets,703713.00
5,Informal loan,"(47,000.00)"
6,Total liabilities,"(47,000.00)"
7,Equity,750713.00
8,Total liability & equity,703713.00


In [48]:
from ratios import get_acc_bal_from_statement
import json

In [52]:
def check_incorrect_balances(bal_sheet: pd.DataFrame):
    acc_bal_map = dict()
    for i, row in bal_sheet.iterrows():
        if row['Account'].lower() in ASSET + LIABILITY:
            bal = get_acc_bal_from_statement(bal_sheet, row['Account'])
            if  bal < 0:
                acc_bal_map[row['Account']] = bal
    return acc_bal_map

check_incorrect_balances(res_statement_map[4435])

{'Informal loan': -47000.0}

In [50]:
print(json.dumps([
    {res_id: check_incorrect_balances(bal_sheet)} for res_id, bal_sheet in res_statement_map.items()
], indent=4))

[
    {
        "4425.0": {}
    },
    {
        "4429.0": {
            "Informal loan": -15000.0
        }
    },
    {
        "4431.0": {}
    },
    {
        "4432.0": {}
    },
    {
        "4433.0": {}
    },
    {
        "4435.0": {
            "Informal loan": -47000.0
        }
    },
    {
        "4436.0": {
            "Cash": -199804.6,
            "Ar": -76200.0,
            "Informal loan": -39000.0,
            "Long term loan": -30100.0,
            "Short term loan": -50000.0
        }
    },
    {
        "4437.0": {
            "Informal loan": -8000.0
        }
    },
    {
        "4438.0": {
            "Informal loan": -30000.0
        }
    },
    {
        "4440.0": {
            "Ar": -1855400.0
        }
    },
    {
        "4442.0": {
            "Cash": -517300.0,
            "Long term loan": -90000.0
        }
    },
    {
        "4443.0": {
            "Informal loan": -95000.0,
            "Long term loan": -150000.0
        }
    },
    {
     

In [51]:
data_combined2.columns

Index(['Respondent ID', 'debit acc', 'debit amt', 'credit acc', 'credit amt',
       'Income source data ID', 'Income source sub-type',
       'Income report income source name', 'date'],
      dtype='object')

In [60]:
def get_balance_corrected_trans_df(transaction_df: pd.DataFrame):
    CREDIT_ACC = 'credit acc'
    DEBIT_ACC = 'debit acc'
    DEBIT_AMT = 'debit amt'
    CREDIT_AMT = 'credit amt'
    RESPONDENT_ID = 'Respondent ID'

    correction_df_col = []
    #'interest revenue', 'interest expense'
    for res_id in transaction_df['Respondent ID'].unique():
        bal_sheet = balance_sheet(res_id, transaction_df)
        for i, row in bal_sheet.iterrows():
            acc_name = row['Account']
            bal = get_acc_bal_from_statement(bal_sheet, acc_name)
            if row['Account'].lower() in ASSET + LIABILITY:
                if  bal > 0:
                    continue

            if acc_name.lower() == 'cash':
                correction_df_col.append(
                    pd.DataFrame(
                        {
                        RESPONDENT_ID: [res_id],
                         DEBIT_ACC: ['cash'],
                         DEBIT_AMT: [-1 * bal],
                         CREDIT_ACC: ['equity'],
                         CREDIT_AMT: [-1 * bal]
                        }
                    )
                )
            elif acc_name.lower() == 'ar':
                correction_df_col.append(
                    pd.DataFrame(
                        {
                        RESPONDENT_ID: [res_id],
                         DEBIT_ACC: ['ar'],
                         DEBIT_AMT: [-1 * bal],
                         CREDIT_ACC: ['sales'],
                         CREDIT_AMT: [-1 * bal]
                        }
                    )
                )
            
            elif acc_name.lower() in ['informal loans given', 'loans to employees']:
                correction_df_col.append(
                    pd.DataFrame(
                        {
                        RESPONDENT_ID: [res_id],
                         DEBIT_ACC: [acc_name.lower()],
                         DEBIT_AMT: [-1 * bal],
                         CREDIT_ACC: ['interest revenue'],
                         CREDIT_AMT: [-1 * bal]
                        }
                    )
                )
            elif acc_name.lower() == 'ap':
                correction_df_col.append(
                    pd.DataFrame(
                        {
                        RESPONDENT_ID: [res_id],
                         DEBIT_ACC: ['purchases'],
                         DEBIT_AMT: [-1 * bal],
                         CREDIT_ACC: ['ap'],
                         CREDIT_AMT: [-1 * bal]
                        }
                    )
                )
            elif acc_name.lower() in ['informal loan', 'long term loan', 'short term loan']:
                correction_df_col.append(
                    pd.DataFrame(
                        {
                        RESPONDENT_ID: [res_id],
                         DEBIT_ACC: ['interest expense'],
                         DEBIT_AMT: [-1 * bal],
                         CREDIT_ACC: [acc_name.lower()],
                         CREDIT_AMT: [-1 * bal]
                        }
                    )
                )
    # print(correction_df_col)
    return pd.concat(correction_df_col + [transaction_df])

corrected_data = get_balance_corrected_trans_df(data_combined2)

In [69]:
corrected_data = corrected_data.reset_index().drop(columns='index')
corrected_data

Unnamed: 0,Respondent ID,debit acc,debit amt,credit acc,credit amt,Income source data ID,Income source sub-type,Income report income source name,date
0,4429.0,interest expense,15000.0,informal loan,15000.0,,,,
1,4435.0,interest expense,47000.0,informal loan,47000.0,,,,
2,4436.0,cash,199804.6,equity,199804.6,,,,
3,4436.0,ar,76200.0,sales,76200.0,,,,
4,4436.0,interest expense,39000.0,informal loan,39000.0,,,,
...,...,...,...,...,...,...,...,...,...
270796,5883.0,cash,9500.0,long term loan,9500.0,,,,2021-07-30
270797,5883.0,cash,9500.0,long term loan,9500.0,,,,2021-07-30
270798,5883.0,cash,9500.0,long term loan,9500.0,,,,2021-08-30
270799,5883.0,cash,19000.0,long term loan,19000.0,,,,2021-10-29


In [70]:
def get_ratio_df_for_all(transaction_df: pd.DataFrame):
    ratio_df = None
    for res_id in transaction_df['Respondent ID'].unique():
        temp = calculate_ratio_df(transaction_df, res_id)
        # display(temp)
        if type(ratio_df) == type(None):
            ratio_df = temp
        else:
            ratio_df = pd.concat([ratio_df, temp])
    return ratio_df

ratio_df2 = get_ratio_df_for_all(corrected_data)
ratio_df2.head()

Unnamed: 0,Respondent,Current Ratio,Quick Ratio,FATO,TATO,Debt Ratio,Equity Multiplier,Gross Margin,Profit Margin,Return on Asset,Return on Equity,Withdrawal Ratio
0,4429.0,,,0.723207,0.519278,0.0,1.0,0.329987,0.196191,0.101878,0.101878,0.39682
0,4435.0,,,0.947232,0.164151,0.0,1.0,0.532442,-0.101485,-0.016659,-0.016659,-19.302141
0,4436.0,,,0.161418,0.161418,0.0,1.0,0.299856,-1.377132,-0.222294,-0.222294,-0.78324
0,4437.0,,,11.576842,0.786853,0.0,1.0,0.992908,0.539844,0.424777,0.424777,0.150239
0,4438.0,,,11.300565,2.167707,0.0,1.0,0.484097,0.263669,0.571556,0.571556,1.287214


In [71]:
ratio_df2

Unnamed: 0,Respondent,Current Ratio,Quick Ratio,FATO,TATO,Debt Ratio,Equity Multiplier,Gross Margin,Profit Margin,Return on Asset,Return on Equity,Withdrawal Ratio
0,4429.0,,,0.723207,0.519278,0.0,1.0,0.329987,0.196191,0.101878,0.101878,0.396820
0,4435.0,,,0.947232,0.164151,0.0,1.0,0.532442,-0.101485,-0.016659,-0.016659,-19.302141
0,4436.0,,,0.161418,0.161418,0.0,1.0,0.299856,-1.377132,-0.222294,-0.222294,-0.783240
0,4437.0,,,11.576842,0.786853,0.0,1.0,0.992908,0.539844,0.424777,0.424777,0.150239
0,4438.0,,,11.300565,2.167707,0.0,1.0,0.484097,0.263669,0.571556,0.571556,1.287214
...,...,...,...,...,...,...,...,...,...,...,...,...
0,4862.0,,,8.062264,0.960297,0.0,1.0,0.822919,0.665341,0.638924,0.638924,0.012897
0,4864.0,,,19.464516,0.809553,0.0,1.0,0.645343,0.538863,0.436238,0.436238,0.058435
0,5104.0,,,1.559720,0.279256,0.0,1.0,0.994759,0.638088,0.178190,0.178190,1.076931
0,4759.0,,,0.000000,0.000000,0.0,1.0,,,0.000000,0.000000,


In [72]:
ratio_df2.to_csv('ratio_df_v2.csv')

In [77]:
res_statement_map2 = get_res_statement_map(corrected_data)
res_statement_map2.keys()

dict_keys([4429.0, 4435.0, 4436.0, 4437.0, 4438.0, 4440.0, 4442.0, 4443.0, 4444.0, 4446.0, 4448.0, 4452.0, 4453.0, 4456.0, 4458.0, 4462.0, 4463.0, 4464.0, 4465.0, 4467.0, 4470.0, 4475.0, 4477.0, 4480.0, 4481.0, 4484.0, 4490.0, 4493.0, 4494.0, 4496.0, 4499.0, 4500.0, 4501.0, 4503.0, 4506.0, 4507.0, 4510.0, 4511.0, 4513.0, 4518.0, 4520.0, 4521.0, 4523.0, 4528.0, 4531.0, 4533.0, 4534.0, 4536.0, 4537.0, 4544.0, 4575.0, 4577.0, 4580.0, 4583.0, 4584.0, 4608.0, 4618.0, 4622.0, 4624.0, 4625.0, 4632.0, 4634.0, 4635.0, 4638.0, 4661.0, 4716.0, 4720.0, 4724.0, 4732.0, 4737.0, 4738.0, 4743.0, 4744.0, 4745.0, 4761.0, 4764.0, 4767.0, 4788.0, 4789.0, 4793.0, 4797.0, 4816.0, 4817.0, 4822.0, 5127.0, 5883.0, 4425.0, 4431.0, 4432.0, 4433.0, 4445.0, 4450.0, 4451.0, 4457.0, 4459.0, 4460.0, 4466.0, 4468.0, 4469.0, 4471.0, 4472.0, 4473.0, 4474.0, 4478.0, 4482.0, 4483.0, 4485.0, 4487.0, 4491.0, 4495.0, 4502.0, 4508.0, 4509.0, 4517.0, 4522.0, 4526.0, 4527.0, 4530.0, 4535.0, 4538.0, 4539.0, 4541.0, 4568.0, 4569.

In [78]:
print(json.dumps([
    {res_id: check_incorrect_balances(bal_sheet)} for res_id, bal_sheet in res_statement_map2.items()
], indent=4))

[
    {
        "4429.0": {}
    },
    {
        "4435.0": {}
    },
    {
        "4436.0": {}
    },
    {
        "4437.0": {}
    },
    {
        "4438.0": {}
    },
    {
        "4440.0": {}
    },
    {
        "4442.0": {}
    },
    {
        "4443.0": {}
    },
    {
        "4444.0": {}
    },
    {
        "4446.0": {}
    },
    {
        "4448.0": {}
    },
    {
        "4452.0": {}
    },
    {
        "4453.0": {}
    },
    {
        "4456.0": {}
    },
    {
        "4458.0": {}
    },
    {
        "4462.0": {}
    },
    {
        "4463.0": {}
    },
    {
        "4464.0": {}
    },
    {
        "4465.0": {}
    },
    {
        "4467.0": {}
    },
    {
        "4470.0": {}
    },
    {
        "4475.0": {}
    },
    {
        "4477.0": {}
    },
    {
        "4480.0": {}
    },
    {
        "4481.0": {}
    },
    {
        "4484.0": {}
    },
    {
        "4490.0": {}
    },
    {
        "4493.0": {}
    },
    {
        "4494.0": {}
    },
    {
      

In [82]:
income_statement_and_RE(4467, corrected_data)

Unnamed: 0,Account,Balance
0,Sales,409100.00
1,Total income,409100.00
2,Purchases,"(452,596.00)"
3,Salary expense,"(56,200.00)"
4,Utility expense,"(14,000.00)"
5,Miscellaneous expense,(300.00)
6,Transport expense,(50.00)
7,Total expense,"(523,146.00)"
8,Net income,"(114,046.00)"
9,Withdrawal,"(28,850.00)"
