In [6]:
import pandas as pd
import os
import numpy as np

In [7]:
def read_transaction_data():    
    data = []
    to_include = [
        'date', 'debit', 'credit', 'Respondent',
        'Income source',
    ]
    for p, dirs, fs in os.walk('lift_data_transactions'):
        for f in fs:
            path = os.path.join(p, f)
            print(path)
            temp:pd.DataFrame = pd.read_csv(path)
            df_dict = {}
            for col in temp.columns:
                for hint in to_include:
                    if hint.lower() in col.lower():
                        if hint == 'date':
                            df_dict['date'] = temp[col]
                        else:
                            df_dict[col] = temp[col]
                        break
            df = pd.DataFrame(df_dict)
            # display(df.head())
            data.append(df)

    return pd.concat(data,axis=0).reset_index().drop(columns='index')

data_combined = read_transaction_data()
data_combined

lift_data_transactions\account.csv
lift_data_transactions\assets.csv
lift_data_transactions\employee.csv
lift_data_transactions\expense.csv
lift_data_transactions\expense_partial_1.csv
lift_data_transactions\expense_partial_2.csv
lift_data_transactions\income.csv
lift_data_transactions\income_partial_1.csv
lift_data_transactions\income_partial_2.csv
lift_data_transactions\income_partial_3.csv
lift_data_transactions\income_partial_4.csv
lift_data_transactions\income_partial_5.csv
lift_data_transactions\income_partial_6.csv
lift_data_transactions\loans_given.csv
lift_data_transactions\loans_given_repayment.csv
lift_data_transactions\loans_taken.csv
lift_data_transactions\loan_taken_repayment.csv


Unnamed: 0,Respondent ID,date,debit acc,debit amt,credit acc,credit amt,Income source data ID,Income source sub-type,Income report income source name
0,4425.0,2021-07-12,cash,1910.0,equity,1910.0,,,
1,4425.0,2021-04-28,,,,,,,
2,4425.0,2021-04-28,,,,,,,
3,4425.0,2021-04-28,,,,,,,
4,4425.0,2021-04-21,withdrawal,2100.0,cash,2100.0,,,
...,...,...,...,...,...,...,...,...,...
270677,5883.0,2021-07-30,cash,9500.0,long term loan,9500.0,,,
270678,5883.0,2021-07-30,cash,9500.0,long term loan,9500.0,,,
270679,5883.0,2021-08-30,cash,9500.0,long term loan,9500.0,,,
270680,5883.0,2021-10-29,cash,19000.0,long term loan,19000.0,,,


In [8]:
def get_balance(id_filter:int, frame: pd.DataFrame, acc_name:str):
    sub = frame[frame['Respondent ID'] == id_filter]
    sub = sub[(~sub['debit amt'].isna()) & (~(sub['credit acc'].isna()))]
    # display(sub)
    debit = sum(sub[sub['debit acc'] == acc_name]['debit amt'].apply(lambda x: float(x)))
    credit = sum(sub[sub['credit acc'] == acc_name]['credit amt'].apply(lambda x: float(x)))
    # print(debit, credit)
    return debit - credit


def format_number(num):
    if num > 0:
        return f"{num:.2f} "
    
    if num < 0:
        return f"({-1*num:.2f})"
    
    return ''


In [9]:
data_combined2 = data_combined.copy()

In [10]:
data_combined2.loc[:, 'debit acc'] = data_combined2['debit acc'].str.lower()
data_combined2.loc[:, 'credit acc'] = data_combined2['credit acc'].str.lower()

In [11]:
data_combined2['credit acc'].unique()

array(['equity', nan, 'cash', 'ap', 'purchases', 'sales', 'ar', 'sale',
       'informal loans given', 'loans to employees', 'informal loan',
       'long term loan'], dtype=object)

In [12]:
data_combined2.loc[:, 'credit acc'] = data_combined2['credit acc'].str.replace(r'sales?', 'sales',regex=True)

In [13]:
data_combined2['credit acc'].unique()

array(['equity', nan, 'cash', 'ap', 'purchases', 'sales', 'ar',
       'informal loans given', 'loans to employees', 'informal loan',
       'long term loan'], dtype=object)

In [14]:
data_combined2.loc[:, 'debit acc'] = data_combined2['debit acc'].str.replace('salaries and wages', 'salary expense')

In [15]:
data_combined2['debit acc'].unique()

array(['cash', nan, 'withdrawal', 'equipment', 'building', 'live stock',
       'land', 'salary expense', 'purchases', 'utility expense',
       'miscellaneous expense', 'rent expense', 'professional fees',
       'transport expense', 'tax expense', 'gifts given',
       'loss from theft', 'bank fee', 'ap', 'ar', 'informal loans given',
       'loans to employees', 'informal loan', 'long term loan',
       'short term loan'], dtype=object)

In [16]:
data_combined2['credit acc'].unique()

array(['equity', nan, 'cash', 'ap', 'purchases', 'sales', 'ar',
       'informal loans given', 'loans to employees', 'informal loan',
       'long term loan'], dtype=object)

In [17]:
data_combined2.to_csv('data_combined.csv')

In [18]:
ASSET = [
    'cash','ar','equipment', 'building', 
    'live stock', 'land', 
    'informal loans given', 'loans to employees'
]

INCOME = [
    'sales',
]

EXPENSE = [
    'purchases', 'salary expense', 'utility expense',
    'miscellaneous expense', 'rent expense', 'professional fees',
    'transport expense', 'tax expense', 
    'loss from theft', 'bank fee'
]

OTHER_INC_EXPENSE = [
    'gifts given',
]

EQUITY = [
    'withdrawal', 'equity'
]

LIABILITY = [
    'ap', 'informal loan', 'long term loan', 'short term loan'
]

In [19]:
def summarize(respondent_id:int, group:list, table: pd.DataFrame):
    return [(acc, get_balance(respondent_id, table, acc)) for acc in group]

def get_total_from_summary(summary:list[tuple[str, float]]):
    return sum([item[1] for item in summary])

def get_reverse_sign(summary:list[tuple[str, float]]):
    return [(summ[0], -1*summ[1]) for summ in summary]

In [20]:
summarize(4425, INCOME, data_combined2)

[('sales', -538940.0)]

In [21]:
def income_statement_and_RE(respondent_id, table: pd.DataFrame) -> pd.DataFrame:
    income = get_reverse_sign(summarize(respondent_id, INCOME, table))
    total_income = get_total_from_summary(income)

    expenses = get_reverse_sign(summarize(respondent_id, EXPENSE, table))
    total_expense = get_total_from_summary(expenses)

    other = get_reverse_sign(summarize(respondent_id, OTHER_INC_EXPENSE, table))
    total_other = get_total_from_summary(other)
    
    net_income = sum([total_income, total_expense, total_other])

    # print('IncomeStatement:',income, total_income, total_expense, total_other, net_income)
    
    withdrawal = get_total_from_summary(
        get_reverse_sign(summarize(respondent_id, ['withdrawal'], table))
    ) 

    retained_earnings = net_income + withdrawal

    table_data = income + \
        [('Total Income', total_income)] + expenses + \
            [('Total Expense', total_expense)] + other + \
                [('Total other Income and expenses', total_other)] + \
                    [('Net Income', net_income), 
                     ('Withdrawal', withdrawal),
                     ('Retained Earnings', retained_earnings)]

    formatted_table = pd.DataFrame(
        [
            [name.capitalize(), format_number(amount)]
            for name, amount in table_data
        ],
        columns=['Account', 'Balance']
    )
    # formatted_table= formatted_table[formatted_table['Balance'] != ''].reset_index().drop(columns='index')
    return formatted_table

income_statement_and_RE(4425, data_combined2)

Unnamed: 0,Account,Balance
0,Sales,538940.00
1,Total income,538940.00
2,Purchases,(94899.00)
3,Salary expense,(42000.00)
4,Utility expense,(3339.00)
5,Miscellaneous expense,(35752.00)
6,Rent expense,(24000.00)
7,Professional fees,(1795.00)
8,Transport expense,(600.00)
9,Tax expense,(3644.00)


In [22]:

def get_retained_earnings(respondent_id, table: pd.DataFrame) -> float:
    income = get_reverse_sign(summarize(respondent_id, INCOME, table))
    total_income = get_total_from_summary(income)

    expenses = get_reverse_sign(summarize(respondent_id, EXPENSE, table))
    total_expense = get_total_from_summary(expenses)

    other = get_reverse_sign(summarize(respondent_id, OTHER_INC_EXPENSE, table))
    total_other = get_total_from_summary(other)
    
    net_income = sum([total_income, total_expense, total_other])
    
    withdrawal = get_total_from_summary(
        get_reverse_sign(summarize(respondent_id, ['withdrawal'], table))
    ) 

    retained_earnings = net_income + withdrawal
    return retained_earnings

def balance_sheet(respondent_id, table: pd.DataFrame) -> pd.DataFrame:
    # income_stat = income_statement_and_RE(respondent_id, table)
    retained_earnings = get_retained_earnings(respondent_id, table)

    assets = summarize(respondent_id, ASSET, table)
    liabilities = get_reverse_sign(summarize(respondent_id, LIABILITY, table))
    equity = get_reverse_sign(summarize(respondent_id, ['equity'], table))
    
    total_a = get_total_from_summary(assets)
    total_l = get_total_from_summary(liabilities)
    total_e = get_total_from_summary(equity) + retained_earnings
    
    table_data = assets + [('Total assets', total_a)] + liabilities + [('Total liabilities', total_l)] + [('Equity', total_e)]

    formatted_table = pd.DataFrame(
        [
            [name.capitalize(), format_number(amount)]
            for name, amount in table_data
        ],
        columns=['Account', 'Balance']
    )
    # formatted_table= formatted_table[formatted_table['Balance'] != ''].reset_index().drop(columns='index')
    return formatted_table


balance_sheet(4425, data_combined2)

Unnamed: 0,Account,Balance
0,Cash,321389.5
1,Ar,5516.0
2,Equipment,1764400.0
3,Building,
4,Live stock,
5,Land,
6,Informal loans given,200000.0
7,Loans to employees,
8,Total assets,2291305.5
9,Ap,


In [23]:
income_statement_and_RE(4429, data_combined2)

Unnamed: 0,Account,Balance
0,Sales,2017023.00
1,Total income,2017023.00
2,Purchases,(1351431.60)
3,Salary expense,(187540.00)
4,Utility expense,(1200.00)
5,Miscellaneous expense,(22000.00)
6,Rent expense,
7,Professional fees,
8,Transport expense,(7100.00)
9,Tax expense,(37030.00)


In [24]:
data_combined2.columns

Index(['Respondent ID', 'date', 'debit acc', 'debit amt', 'credit acc',
       'credit amt', 'Income source data ID', 'Income source sub-type',
       'Income report income source name'],
      dtype='object')

In [25]:
import datetime as dt

In [26]:
def format_date(data:pd.DataFrame, date_col:str):
    dates = []
    for d in data_combined2['date']:
        if not pd.isna(d):
            year, month, day = d.split('-')
            dates.append(dt.date(int(year), int(month), int(day)))
        else:
            dates.append(pd.NA)
    data2 = data.copy()
    data2.drop(columns=date_col, inplace=True)
    data2[date_col] = dates
    return data2


In [27]:
data_combined3 = format_date(data_combined2, 'date')
data_combined3

Unnamed: 0,Respondent ID,debit acc,debit amt,credit acc,credit amt,Income source data ID,Income source sub-type,Income report income source name,date
0,4425.0,cash,1910.0,equity,1910.0,,,,2021-07-12
1,4425.0,,,,,,,,2021-04-28
2,4425.0,,,,,,,,2021-04-28
3,4425.0,,,,,,,,2021-04-28
4,4425.0,withdrawal,2100.0,cash,2100.0,,,,2021-04-21
...,...,...,...,...,...,...,...,...,...
270677,5883.0,cash,9500.0,long term loan,9500.0,,,,2021-07-30
270678,5883.0,cash,9500.0,long term loan,9500.0,,,,2021-07-30
270679,5883.0,cash,9500.0,long term loan,9500.0,,,,2021-08-30
270680,5883.0,cash,19000.0,long term loan,19000.0,,,,2021-10-29


In [28]:
data_2021 = data_combined3[(data_combined3['date'] >= dt.date(2021, 1, 1)) & (data_combined3['date'] <=dt.date(2021, 12, 31))]
data_2021.head()

Unnamed: 0,Respondent ID,debit acc,debit amt,credit acc,credit amt,Income source data ID,Income source sub-type,Income report income source name,date
0,4425.0,cash,1910.0,equity,1910.0,,,,2021-07-12
1,4425.0,,,,,,,,2021-04-28
2,4425.0,,,,,,,,2021-04-28
3,4425.0,,,,,,,,2021-04-28
4,4425.0,withdrawal,2100.0,cash,2100.0,,,,2021-04-21


In [29]:
def filter_by_year(df:pd.DataFrame, date_col: str, year:int) -> pd.DataFrame:
    return df[(df[date_col] >= dt.date(year, 1, 1)) & (df[date_col] <=dt.date(year, 12, 31))].reset_index().drop(columns='index')

def filter_by_year_month(df:pd.DataFrame, date_col: str, year:int, month:int):
    p = pd.Period(f'{year}-{month}-1')
    days = p.daysinmonth
    return df[(df[date_col] >= dt.date(year, month, 1)) & (df[date_col] <=dt.date(year, month, days))].reset_index().drop(columns='index')



In [30]:
def filter_up_to(df: pd.DataFrame, date_col: str, year: int, month: int=None):
    if month == None:
        month = 12
    p = pd.Period(f'{year}-{month}-1')
    days = p.days_in_month
    return df[df[date_col] <= dt.date(year, month, days)]

In [31]:
def get_latest_year(df:pd.DataFrame, date_col:str):
    date_ordered = df[~df[date_col].isna()][date_col].sort_values().reset_index().drop(columns='index')
    return date_ordered.loc[len(date_ordered)-1][0].year

get_latest_year(data_combined3, 'date')

2022

In [32]:
def get_lastest_month(df: pd.DataFrame, date_col:str, year:int):
    filtered = filter_by_year(df, date_col, year)
    return filtered[date_col].apply(lambda x: x.month).max()


Financial ratios and health

In [33]:

def get_unique_years(df: pd.DataFrame, date_col: str):
    not_na = df[~df[date_col].isna()]
    return not_na[date_col].apply(lambda x: x.year).unique()


def get_unique_months(df: pd.DataFrame, date_col: str):
    not_na = df[~df[date_col].isna()]
    return not_na[date_col].apply(lambda x: x.month).unique()
    

In [34]:
years = get_unique_years(data_combined3, 'date')
years[years >= 2021]

array([2021, 2022], dtype=int64)

In [35]:
get_unique_months(filter_by_year(data_combined3, 'date', 2022), 'date')

array([ 1,  2,  3,  4,  5,  6, 10, 11,  7, 12,  8,  9], dtype=int64)

In [36]:
res_id = 4425
respondent_filtered = data_combined3[data_combined3['Respondent ID'] == res_id].reset_index().drop(columns='index')

In [37]:
get_unique_months(filter_by_year(respondent_filtered, 'date', 2021), 'date')

array([ 7,  4,  5,  6,  8,  9, 10, 11, 12,  3,  2], dtype=int64)

In [38]:
def unformat_num(num_str:str) -> float:
    if num_str == '':
        return 0
    to_include = ['.']
    to_replace = {'(': '-'}
    new_form = ''
    for char in num_str:
        if char in to_replace:
            new_form += to_replace[char]
            continue
    
        if char.isdecimal() or char in to_include:
            new_form += char
            continue
    
    return float(new_form)

print(unformat_num('(34.00)'))
print(unformat_num('(3,400.00)'))
print(unformat_num('(34,000.00)'))
print(unformat_num('34,000.00'))
print(unformat_num('34,000.00'))

-34.0
-3400.0
-34000.0
34000.0
34000.0


In [39]:
inc_4425 = income_statement_and_RE(4425, data_combined3)
inc_4425['Balance2'] = inc_4425['Balance'].apply(lambda x: unformat_num(x))
display(inc_4425)
bal_4425 = balance_sheet(4425, data_combined3)
display(bal_4425)

Unnamed: 0,Account,Balance,Balance2
0,Sales,538940.00,538940.0
1,Total income,538940.00,538940.0
2,Purchases,(94899.00),-94899.0
3,Salary expense,(42000.00),-42000.0
4,Utility expense,(3339.00),-3339.0
5,Miscellaneous expense,(35752.00),-35752.0
6,Rent expense,(24000.00),-24000.0
7,Professional fees,(1795.00),-1795.0
8,Transport expense,(600.00),-600.0
9,Tax expense,(3644.00),-3644.0


Unnamed: 0,Account,Balance
0,Cash,321389.5
1,Ar,5516.0
2,Equipment,1764400.0
3,Building,
4,Live stock,
5,Land,
6,Informal loans given,200000.0
7,Loans to employees,
8,Total assets,2291305.5
9,Ap,


In [40]:
def get_acc_bal_from_statement(df: pd.DataFrame, acc_name:str) -> float:
    return df[df['Account'] == acc_name]['Balance'].apply(lambda x: unformat_num(x)).sum()

get_acc_bal_from_statement(inc_4425, 'Total expense')

-206029.0

### Liquidity

In [41]:
def check_ratio(func):
    def wrapper(inc_stat, bal_sheet):
        result = None
        try:
            result = func(inc_stat, bal_sheet)
        except ZeroDivisionError:
            pass
        
        if result == np.inf or result == -np.inf:
            return np.nan
        
        return result
    
    return wrapper

@check_ratio
def calc_current_ratio(inc_statement: pd.DataFrame, balance_sheet: pd.DataFrame) -> float:
    current_assets = 0
    for curr in ['Cash', 'Ar', 'Loans to employees', 'Informal loans given']:
        current_assets += get_acc_bal_from_statement(balance_sheet, curr)
        
    current_liab = 0
    for curr in ['Ap', 'Informal loan', 'Short term loan']:
         current_liab += get_acc_bal_from_statement(balance_sheet, curr)
        
    
    return current_assets / current_liab

@check_ratio
def calc_quick_ratio(inc_statement: pd.DataFrame, balance_sheet: pd.DataFrame):
    current_assets = 0
    for curr in ['Cash', 'Ar']:
        current_assets += get_acc_bal_from_statement(balance_sheet, curr)
       
    current_liab = 0
    for curr in ['Ap', 'Informal loan', 'Short term loan']:
         current_liab += get_acc_bal_from_statement(balance_sheet, curr)
        
    return current_assets / current_liab

print(calc_current_ratio(None, bal_4425))
print(calc_quick_ratio(None, bal_4425))


44.84302127659574
27.821744680851065


### Asset management

In [42]:
@check_ratio
def calc_FATO(inc_statement, bal_sheet: pd.DataFrame) -> float:
    fato = 0
    for item in ['Equipment', 'Building', 'Land']:
        fato += get_acc_bal_from_statement(bal_sheet, item)
    
    sales = get_acc_bal_from_statement(inc_statement, 'Sales')
    return sales / fato

@check_ratio
def calc_TATO(inc_statement:pd.DataFrame, bal_sheet: pd.DataFrame) -> float:
    sales = get_acc_bal_from_statement(inc_statement, 'Sales')
    tato = get_acc_bal_from_statement(bal_sheet, 'Total assets')
    return sales/tato

print(calc_FATO(inc_4425, bal_4425))
print(calc_TATO(inc_4425, bal_4425))

0.3054522783949218
0.2352108874176752


### Debt Management

In [43]:
@check_ratio
def calc_debt_ratio(inc_statement:pd.DataFrame, bal_sheet: pd.DataFrame) -> float:
    debt = get_acc_bal_from_statement(bal_sheet, 'Total liabilities')
    assets = get_acc_bal_from_statement(bal_sheet, 'Total assets')
    return debt/assets

@check_ratio
def calc_EM(inc_statement: pd.DataFrame, bal_sheet: pd.DataFrame) -> float:
    assets = get_acc_bal_from_statement(bal_sheet, 'Total assets')
    equity = get_acc_bal_from_statement(bal_sheet, 'Equity')
    return assets/equity

print(calc_debt_ratio(inc_4425, bal_4425))
print(calc_EM(inc_4425, bal_4425))

0.005128080912824589
1.0051545136760214


### Profitability Ratios

In [44]:
@check_ratio
def calc_PM(inc_statement: pd.DataFrame, bal_sheet: pd.DataFrame) -> float:
    ni = get_acc_bal_from_statement(inc_statement, 'Net income')
    sales = get_acc_bal_from_statement(inc_statement, 'Sales')
    return ni / sales

@check_ratio
def calc_GP(inc_statement: pd.DataFrame, bal_sheet: pd.DataFrame) -> float:
    purchases = get_acc_bal_from_statement(inc_statement, 'Purchases')
    sales = get_acc_bal_from_statement(inc_statement, 'Sales')
    return (sales + purchases) / sales

@check_ratio
def calc_ROA(inc_statement: pd.DataFrame, bal_sheet: pd.DataFrame) -> float:
    ni = get_acc_bal_from_statement(inc_statement, 'Net income')
    assets = get_acc_bal_from_statement(bal_sheet, 'Total assets')
    return ni / assets

@check_ratio
def calc_ROE(inc_statement: pd.DataFrame, bal_sheet: pd.DataFrame) -> float:
    ni = get_acc_bal_from_statement(inc_statement, 'Net income')
    equity = get_acc_bal_from_statement(bal_sheet, 'Equity')
    return ni / equity

print(calc_PM(inc_4425, bal_4425))
print(calc_GP(inc_4425, bal_4425))
print(calc_ROA(inc_4425, bal_4425))
print(calc_ROE(inc_4425, bal_4425))

0.6177144023453446
0.8239154636879801
0.14529315274632737
0.14604206828919059


In [45]:
@check_ratio
def calc_withdrawal_ratio(inc_statement: pd.DataFrame, bal_sheet: pd.DataFrame) -> float:
    ni = get_acc_bal_from_statement(inc_statement, 'Net income')
    payouts = get_acc_bal_from_statement(inc_statement, 'Withdrawal')
    return (-1 * payouts) / ni

print(calc_withdrawal_ratio(inc_4425, None))

1.1918560816554575


### Monthly ratios for each firm

In [46]:
def get_years_list(df: pd.DataFrame, date_col:str):
    years = get_unique_years(df, date_col)
    return sorted(list(years[years >= 2021]))

def get_months_list(df: pd.DataFrame, year:float, date_col:str):
    df2 = filter_by_year(df, date_col, year)
    return sorted(list(get_unique_months(df2, date_col)))

display(get_years_list(respondent_filtered, 'date'))
display(get_months_list(respondent_filtered, 2021, 'date'))

[2021, 2022]

[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

In [47]:
current_ratios = []
quick_ratios = []
fatos = []
tatos = []

d_ratios = []
em_ratios = []

roes = []
roas = []
pms = []
gps = []

payouts = []

years = []
months = []

statements = []

list_func_map = {
    calc_current_ratio: current_ratios, 
    calc_quick_ratio: quick_ratios,
    calc_FATO: fatos, 
    calc_TATO: tatos,
    calc_debt_ratio: d_ratios,
    calc_EM: em_ratios,
    calc_ROE: roes, 
    calc_ROA: roas,
    calc_PM: pms,
    calc_GP: gps,
    calc_withdrawal_ratio: payouts,
}

for y in get_years_list(respondent_filtered, 'date'):
    for m in get_months_list(respondent_filtered, y, 'date'):
        monthly_data = filter_by_year_month(respondent_filtered, 'date', y, m)
        data_up_to = filter_up_to(respondent_filtered, 'date', y, m)

        temp_inc_stat = income_statement_and_RE(4425, monthly_data)
        temp_bal_sheet = balance_sheet(4425, data_up_to)
        
        statements.append(temp_inc_stat)
        statements.append(temp_bal_sheet)
        
        years.append(y)
        months.append(m)

        for func, coll in list_func_map.items():
            coll.append(func(temp_inc_stat, temp_bal_sheet))


  return current_assets / current_liab
  return current_assets / current_liab
  return ni / sales
  return (sales + purchases) / sales
  return (-1 * payouts) / ni
  return current_assets / current_liab
  return current_assets / current_liab
  return ni / sales
  return (sales + purchases) / sales
  return (-1 * payouts) / ni
  return current_assets / current_liab
  return current_assets / current_liab
  return current_assets / current_liab
  return current_assets / current_liab
  return current_assets / current_liab
  return current_assets / current_liab
  return current_assets / current_liab
  return current_assets / current_liab


  return current_assets / current_liab
  return current_assets / current_liab
  return ni / sales
  return (sales + purchases) / sales
  return (-1 * payouts) / ni


In [48]:
ratio_data_4425 = pd.DataFrame({
    'Year': years,
    'Month': months,
    'Current Ratio': current_ratios,
    'Quick Ratio': quick_ratios,
    'FATO': fatos,
    'TATO': tatos,
    'Debt Ratio': d_ratios,
    'Equity Multiplier': em_ratios,
    'Gross Margin': gps,  
    'Profit Margin': pms,
    'Return on Asset': roas,
    'Return on Equity': roas,
    'Withdrawal Ratio': payouts
})
ratio_data_4425

Unnamed: 0,Year,Month,Current Ratio,Quick Ratio,FATO,TATO,Debt Ratio,Equity Multiplier,Gross Margin,Profit Margin,Return on Asset,Return on Equity,Withdrawal Ratio
0,2021,2,,,0.0,0.0,0.0,1.0,,,0.0,0.0,
1,2021,3,,,0.0,0.0,0.0,1.0,,,0.0,0.0,
2,2021,4,,,0.013896,0.013829,0.0,1.0,0.466257,0.451909,0.00625,0.00625,0.238122
3,2021,5,,,0.021473,0.021369,0.0,1.0,0.77206,0.235468,0.005032,0.005032,5.280946
4,2021,6,,,0.024723,0.023646,0.0,1.0,0.900009,0.628072,0.014851,0.014851,0.0
5,2021,7,,,0.026681,0.024761,0.0,1.0,0.821619,0.695151,0.017213,0.017213,0.0
6,2021,8,,,0.017992,0.017582,0.0,1.0,0.515286,0.274513,0.004826,0.004826,13.366186
7,2021,9,-15.6063,25.3937,0.023036,0.022055,-0.002729,0.997279,0.772362,0.562613,0.012409,0.012409,2.725921
8,2021,10,-23.426235,24.809059,0.019835,0.018775,-0.00228,0.997725,0.891988,0.752372,0.014126,0.014126,0.66654
9,2021,11,-20.937929,8.347786,0.030842,0.028477,-0.003663,0.99635,0.804109,0.536661,0.015282,0.015282,0.374949


In [49]:
ratio_dates = []
for i, row in ratio_data_4425.iterrows():    
    y = int(row['Year'])
    m = int(row['Month'])
    p = pd.Period(f'{y}-{m}-1')
    last_day = p.days_in_month
    ratio_dates.append(dt.date(y, m, last_day))

ratio_data_4425['date'] = ratio_dates

In [50]:
ratio_data_4425

Unnamed: 0,Year,Month,Current Ratio,Quick Ratio,FATO,TATO,Debt Ratio,Equity Multiplier,Gross Margin,Profit Margin,Return on Asset,Return on Equity,Withdrawal Ratio,date
0,2021,2,,,0.0,0.0,0.0,1.0,,,0.0,0.0,,2021-02-28
1,2021,3,,,0.0,0.0,0.0,1.0,,,0.0,0.0,,2021-03-31
2,2021,4,,,0.013896,0.013829,0.0,1.0,0.466257,0.451909,0.00625,0.00625,0.238122,2021-04-30
3,2021,5,,,0.021473,0.021369,0.0,1.0,0.77206,0.235468,0.005032,0.005032,5.280946,2021-05-31
4,2021,6,,,0.024723,0.023646,0.0,1.0,0.900009,0.628072,0.014851,0.014851,0.0,2021-06-30
5,2021,7,,,0.026681,0.024761,0.0,1.0,0.821619,0.695151,0.017213,0.017213,0.0,2021-07-31
6,2021,8,,,0.017992,0.017582,0.0,1.0,0.515286,0.274513,0.004826,0.004826,13.366186,2021-08-31
7,2021,9,-15.6063,25.3937,0.023036,0.022055,-0.002729,0.997279,0.772362,0.562613,0.012409,0.012409,2.725921,2021-09-30
8,2021,10,-23.426235,24.809059,0.019835,0.018775,-0.00228,0.997725,0.891988,0.752372,0.014126,0.014126,0.66654,2021-10-31
9,2021,11,-20.937929,8.347786,0.030842,0.028477,-0.003663,0.99635,0.804109,0.536661,0.015282,0.015282,0.374949,2021-11-30


In [51]:
import plotly.express as px

In [52]:
px.line(ratio_data_4425, x='date', y=['Current Ratio', 'Quick Ratio'], title='Liquidity ratios')

In [53]:
px.line(ratio_data_4425, x='date', y=['FATO', 'TATO'], title='Asset managment ratios')

In [54]:
px.line(ratio_data_4425, x='date', y=['Debt Ratio', 'Equity Multiplier'], title='Debt managment ratios')

In [55]:
px.line(ratio_data_4425, x='date', y=['Gross Margin', 'Profit Margin', 'Return on Asset', 'Return on Equity'], title='Profitability Ratios')

In [56]:
px.line(ratio_data_4425, x='date', y=['Withdrawal Ratio'], title='Withdrawal ratio (based on net income)')

In [57]:
statements

[                            Account Balance
 0                             Sales        
 1                      Total income        
 2                         Purchases        
 3                    Salary expense        
 4                   Utility expense        
 5             Miscellaneous expense        
 6                      Rent expense        
 7                 Professional fees        
 8                 Transport expense        
 9                       Tax expense        
 10                  Loss from theft        
 11                         Bank fee        
 12                    Total expense        
 13                      Gifts given        
 14  Total other income and expenses        
 15                       Net income        
 16                       Withdrawal        
 17                Retained earnings        ,
                  Account      Balance
 0                   Cash  (200000.00)
 1                     Ar             
 2              Equipment  

In [58]:
def filter_by_respondent(df: pd.DataFrame, res_id):
    return df[df['Respondent ID'] == res_id].reset_index().drop(columns='index')


In [59]:
def get_monthly_financial_statements(df: pd.DataFrame, respondent_id:int):
    income_stats = []
    bal_sheets = []
    years = []
    months = []
    respondent_filtered = df[df['Respondent ID'] == respondent_id].reset_index().drop(columns='index')
    for y in get_years_list(respondent_filtered, 'date'):
        for m in get_months_list(respondent_filtered, y, 'date'):
            monthly_data = filter_by_year_month(respondent_filtered, 'date', y, m)
            data_up_to = filter_up_to(respondent_filtered, 'date', y, m)

            temp_inc_stat = income_statement_and_RE(respondent_id, monthly_data)
            temp_bal_sheet = balance_sheet(respondent_id, data_up_to)
            
            years.append(y)
            months.append(m)
            
            income_stats.append(temp_inc_stat)
            bal_sheets.append(temp_bal_sheet)
    
    return years, months, income_stats, bal_sheets

monthly_financials = get_monthly_financial_statements(data_combined3, 4425)

In [60]:
inc_variables = set()
bal_variables = set()

for inc in monthly_financials[2]:
    for item in inc['Account'].unique():
        inc_variables.add(item)

for bal in monthly_financials[3]:
    for item in bal['Account'].unique():
        bal_variables.add(item)

In [61]:
inc_variables

{'Bank fee',
 'Gifts given',
 'Loss from theft',
 'Miscellaneous expense',
 'Net income',
 'Professional fees',
 'Purchases',
 'Rent expense',
 'Retained earnings',
 'Salary expense',
 'Sales',
 'Tax expense',
 'Total expense',
 'Total income',
 'Total other income and expenses',
 'Transport expense',
 'Utility expense',
 'Withdrawal'}

In [62]:
bal_variables

{'Ap',
 'Ar',
 'Building',
 'Cash',
 'Equipment',
 'Equity',
 'Informal loan',
 'Informal loans given',
 'Land',
 'Live stock',
 'Loans to employees',
 'Long term loan',
 'Short term loan',
 'Total assets',
 'Total liabilities'}

In [63]:
monthly_financials[3][8]

Unnamed: 0,Account,Balance
0,Cash,(112042.50)
1,Ar,6604.00
2,Equipment,1764400.00
3,Building,
4,Live stock,
5,Land,
6,Informal loans given,205000.00
7,Loans to employees,
8,Total assets,1863961.50
9,Ap,750.00


In [64]:
cash_balances = []
for bal in monthly_financials[3]:
    cash_balances.append(unformat_num(bal[bal['Account'] == 'Cash']['Balance'][0]))

In [65]:
cash_balances

[-200000.0,
 -200000.0,
 -193281.0,
 -193177.0,
 -126689.0,
 -70549.0,
 -165702.0,
 -133572.5,
 -112042.5,
 -68426.5,
 -4693.5,
 50216.5,
 102509.5,
 285739.5,
 319499.5,
 321389.5,
 321389.5]

In [66]:
def get_monthly_cash_balances_for_res(df: pd.DataFrame, respondent_id: int) -> pd.DataFrame:
    monthly_statements = get_monthly_financial_statements(df, respondent_id)
    cash_balances = []
    change_in_cash = []
    years, months = monthly_statements[0], monthly_statements[1]
    
    current_cash = 0
    previous_cash = 0
    for bal in monthly_statements[3]:
        current_cash = unformat_num(bal[bal['Account'] == 'Cash']['Balance'][0])
        cash_balances.append(current_cash)
        if len(change_in_cash) == 0:
            change_in_cash.append(0)
        else:
            change_in_cash.append(current_cash - previous_cash)
        previous_cash = current_cash


    data = pd.DataFrame(
        {
            'Respodent': [respondent_id]*len(cash_balances),
            'Year': years,
            'Month': months,
            'Cash': cash_balances,
            'Cash Change': change_in_cash
        }
    )
    return data  

get_monthly_cash_balances_for_res(data_combined3, 4425)

Unnamed: 0,Respodent,Year,Month,Cash,Cash Change
0,4425,2021,2,-200000.0,0.0
1,4425,2021,3,-200000.0,0.0
2,4425,2021,4,-193281.0,6719.0
3,4425,2021,5,-193177.0,104.0
4,4425,2021,6,-126689.0,66488.0
5,4425,2021,7,-70549.0,56140.0
6,4425,2021,8,-165702.0,-95153.0
7,4425,2021,9,-133572.5,32129.5
8,4425,2021,10,-112042.5,21530.0
9,4425,2021,11,-68426.5,43616.0


In [67]:
def get_monthly_cash_balances_for_all(df: pd.DataFrame) -> pd.DataFrame:
    all_firms = df['Respondent ID'].unique()
    all_firm_data = []
    for res_id in all_firms:
        print(f'Getting cash balance for {res_id}')
        cash_data = get_monthly_cash_balances_for_res(df, res_id)
        all_firm_data.append(cash_data)
    
    return pd.concat(all_firm_data).reset_index().drop(columns='index')

monthly_cash_data = get_monthly_cash_balances_for_all(data_combined3)

Getting cash balance for 4425.0
Getting cash balance for 4429.0
Getting cash balance for 4431.0
Getting cash balance for 4432.0
Getting cash balance for 4433.0
Getting cash balance for 4435.0
Getting cash balance for 4436.0
Getting cash balance for 4437.0
Getting cash balance for 4438.0
Getting cash balance for 4440.0
Getting cash balance for 4442.0
Getting cash balance for 4443.0
Getting cash balance for 4444.0
Getting cash balance for 4445.0
Getting cash balance for 4446.0
Getting cash balance for 4448.0
Getting cash balance for 4450.0
Getting cash balance for 4451.0
Getting cash balance for 4452.0
Getting cash balance for 4453.0
Getting cash balance for 4456.0
Getting cash balance for 4457.0
Getting cash balance for 4458.0
Getting cash balance for 4459.0
Getting cash balance for 4460.0
Getting cash balance for 4462.0
Getting cash balance for 4463.0
Getting cash balance for 4464.0
Getting cash balance for 4465.0
Getting cash balance for 4466.0
Getting cash balance for 4467.0
Getting 

In [68]:
monthly_cash_data

Unnamed: 0,Respodent,Year,Month,Cash,Cash Change
0,4425.0,2021,2,-200000.0,0.0
1,4425.0,2021,3,-200000.0,0.0
2,4425.0,2021,4,-193281.0,6719.0
3,4425.0,2021,5,-193177.0,104.0
4,4425.0,2021,6,-126689.0,66488.0
...,...,...,...,...,...
1903,5883.0,2022,2,199013.0,26543.0
1904,5883.0,2022,3,216952.0,17939.0
1905,5883.0,2022,4,216952.0,0.0
1906,4759.0,2021,4,0.0,0.0


In [69]:
date_formated = []
for i, row in monthly_cash_data.iterrows():
    y = int(row['Year'])
    m = int(row['Month'])
    p = pd.Period(f'{y}-{m}-1')
    last_day = p.days_in_month
    date_formated.append(dt.date(y, m, last_day))

In [70]:
date_formated

[datetime.date(2021, 2, 28),
 datetime.date(2021, 3, 31),
 datetime.date(2021, 4, 30),
 datetime.date(2021, 5, 31),
 datetime.date(2021, 6, 30),
 datetime.date(2021, 7, 31),
 datetime.date(2021, 8, 31),
 datetime.date(2021, 9, 30),
 datetime.date(2021, 10, 31),
 datetime.date(2021, 11, 30),
 datetime.date(2021, 12, 31),
 datetime.date(2022, 1, 31),
 datetime.date(2022, 2, 28),
 datetime.date(2022, 3, 31),
 datetime.date(2022, 4, 30),
 datetime.date(2022, 5, 31),
 datetime.date(2022, 6, 30),
 datetime.date(2021, 1, 31),
 datetime.date(2021, 4, 30),
 datetime.date(2021, 5, 31),
 datetime.date(2021, 6, 30),
 datetime.date(2021, 7, 31),
 datetime.date(2021, 8, 31),
 datetime.date(2021, 9, 30),
 datetime.date(2021, 10, 31),
 datetime.date(2021, 11, 30),
 datetime.date(2021, 12, 31),
 datetime.date(2022, 1, 31),
 datetime.date(2022, 2, 28),
 datetime.date(2022, 3, 31),
 datetime.date(2022, 4, 30),
 datetime.date(2021, 3, 31),
 datetime.date(2021, 4, 30),
 datetime.date(2021, 5, 31),
 datetim

In [71]:
monthly_cash_data['date'] = date_formated

In [72]:
credit_scoring_survey = pd.read_excel('lift_data\credit scoring survey.xlsx')

In [73]:
demographic_data = pd.read_excel(r'lift_data\Income Report - Mon Feb 13 2023.xlsx', sheet_name='Demographics')

In [74]:
demographic_data.head()

Unnamed: 0,Respondent ID,Gender,Age,Number of children,Marital Status,Country of Residence,Citizenship,Firm ID,Sector type,Date firm established,Age of Firm,Number of Owners,Owner/s Gender,Number of Employees,Industry,Location in the Country,Activation Date
0,4425,Male,35,3,Married,Ethiopia,Ethiopia,DD-ET-1930,AD,10/ 30/2016,7 - 10 years,1,All men,4,Other,Diredawa,2021-03-29
1,4429,Male,32,0,Single,Ethiopia,Ethiopia,AA-KG-SA-1376,EE-01,dd/03/2015,5 - 6 years,2,All men,18,Light Manu,Addis Ababa,2021-03-29
2,4431,Male,27,0,Single,Ethiopia,Ethiopia,AA-KG-SA-1495,Garment production,2018/03/22,2 years,4 or more,Both men and women,25,Light Manu,Addis Ababa,2021-03-29
3,4432,Male,32,2,Married,Ethiopia,Ethiopia,DD-ET-2194,U-03,0,5 - 6 years,1,All men,5,Other,Diredawa,2021-03-29
4,4433,Female,61,1,Widowed,Ethiopia,Ethiopia,HR-DE-2171,X,08/07/2017,3 - 4 years,3,Both men and women,12,Other,Harar,2021-03-29


In [75]:
demographic_data[['Respondent ID', 'Firm ID']]

Unnamed: 0,Respondent ID,Firm ID
0,4425,DD-ET-1930
1,4429,AA-KG-SA-1376
2,4431,AA-KG-SA-1495
3,4432,DD-ET-2194
4,4433,HR-DE-2171
...,...,...
154,4864,AD-SO-KE-1646
155,5045,AA-KE-SA-1360
156,5104,AA-BA-HE-1111
157,5127,AA-KS-NA-1132


In [76]:
res_id_firm_id_map = dict()
for i, row in demographic_data[['Respondent ID', 'Firm ID']].iterrows():
    res_id = row['Respondent ID']
    firm_id = row['Firm ID']

    res_id_firm_id_map[res_id] = firm_id

res_id_firm_id_map

{4425: 'DD-ET-1930',
 4429: 'AA-KG-SA-1376',
 4431: 'AA-KG-SA-1495',
 4432: 'DD-ET-2194',
 4433: 'HR-DE-2171',
 4435: 'AA-KS-NA-1603',
 4436: 'DD-ET-1918',
 4437: 'AD-CW-CH-2195',
 4438: 'AA-KS-NA-1069',
 4440: 'AA-KG-SA-1317',
 4442: 'AA-ME-HI-1013',
 4443: 'AA-KS-NA-1561',
 4444: 'AA-KS-NA-1383',
 4445: 'AD-NO-TI-1757',
 4446: 'AA-ME-HI-1600',
 4448: 'HR-AL-2090',
 4450: 'HR-DE-2128',
 4451: 'AD-CN-KE-1684',
 4452: 'HR-DE-2127',
 4453: 'HR-AL-2007',
 4456: 'AD-CW-CH-1817',
 4457: 'AD-CN-KE-1878',
 4458: 'AA-FE-HI-2227',
 4459: 'HR-DE-2123',
 4460: 'AD-CN-KE-1877',
 4462: 'DD-ET-1949',
 4463: 'AA-FE-ED-1009',
 4464: 'AA-FE-HI-1051',
 4465: 'HR-AL-2115',
 4466: 'AD-CE-TI-2190',
 4467: 'HR-AL-2107',
 4468: 'AD-CE-TI-2234',
 4469: 'HR-AL-2169',
 4470: 'AA-KG-SA-1315',
 4471: 'AA-KG-SA-1020',
 4472: 'HR-AL-2132',
 4473: 'AA-KG-SA-1343',
 4474: 'AA-KG-SA-1336',
 4475: 'AA-KG-SA-1312',
 4477: 'AA-KG-SA-1342',
 4478: 'HR-AL-2135',
 4480: 'DD-ET-1967',
 4481: 'AA-KG-SA-1068',
 4482: 'HR-DE-20

In [77]:
monthly_cash_data['Firm ID'] = monthly_cash_data['Respodent'].apply(lambda x: res_id_firm_id_map[x])

In [78]:
monthly_cash_data

Unnamed: 0,Respodent,Year,Month,Cash,Cash Change,date,Firm ID
0,4425.0,2021,2,-200000.0,0.0,2021-02-28,DD-ET-1930
1,4425.0,2021,3,-200000.0,0.0,2021-03-31,DD-ET-1930
2,4425.0,2021,4,-193281.0,6719.0,2021-04-30,DD-ET-1930
3,4425.0,2021,5,-193177.0,104.0,2021-05-31,DD-ET-1930
4,4425.0,2021,6,-126689.0,66488.0,2021-06-30,DD-ET-1930
...,...,...,...,...,...,...,...
1903,5883.0,2022,2,199013.0,26543.0,2022-02-28,AA-YA-WN-2222
1904,5883.0,2022,3,216952.0,17939.0,2022-03-31,AA-YA-WN-2222
1905,5883.0,2022,4,216952.0,0.0,2022-04-30,AA-YA-WN-2222
1906,4759.0,2021,4,0.0,0.0,2021-04-30,AA-KE-SA-1331


In [79]:
credit_scoring_survey.head()

Unnamed: 0,firm_id,Score,Weak,Medium,Strong,MaxAmount
0,DD-ET-1930,14,NonDefault,NonDefault,NonDefault,50000
1,AA-KG-SA-1376,14,NonDefault,NonDefault,NonDefault,50000
2,AA-KG-SA-1495,0,Default,Default,Default,0
3,DD-ET-2194,0,Default,Default,Default,0
4,HR-DE-2171,9,NonDefault,NonDefault,Default,10000


In [80]:
class_data = []
for i, row in credit_scoring_survey.iterrows():
    weak = row['Weak']
    medium = row['Medium']
    strong = row['Strong']
    score = ''
    NON_DEFAULT = 'NonDefault'
    DEFAULT = 'Default'
    if strong == NON_DEFAULT:
        score = 'Strong'
    elif medium == NON_DEFAULT:
        score = 'Medium'
    elif weak == NON_DEFAULT:
        score = 'Weak'
    else:
        score = 'Poor'

    class_data.append(score)

credit_scoring_survey['Class'] = class_data
credit_scoring_survey.head() 

Unnamed: 0,firm_id,Score,Weak,Medium,Strong,MaxAmount,Class
0,DD-ET-1930,14,NonDefault,NonDefault,NonDefault,50000,Strong
1,AA-KG-SA-1376,14,NonDefault,NonDefault,NonDefault,50000,Strong
2,AA-KG-SA-1495,0,Default,Default,Default,0,Poor
3,DD-ET-2194,0,Default,Default,Default,0,Poor
4,HR-DE-2171,9,NonDefault,NonDefault,Default,10000,Medium


In [81]:
credit_scoring_survey.rename(columns={'firm_id':'Firm ID'}, inplace=True)
credit_scoring_survey.head()

Unnamed: 0,Firm ID,Score,Weak,Medium,Strong,MaxAmount,Class
0,DD-ET-1930,14,NonDefault,NonDefault,NonDefault,50000,Strong
1,AA-KG-SA-1376,14,NonDefault,NonDefault,NonDefault,50000,Strong
2,AA-KG-SA-1495,0,Default,Default,Default,0,Poor
3,DD-ET-2194,0,Default,Default,Default,0,Poor
4,HR-DE-2171,9,NonDefault,NonDefault,Default,10000,Medium


In [82]:
monthly_cash_data = monthly_cash_data.merge(credit_scoring_survey[['Firm ID', 'Class']], 'inner', 'Firm ID')

In [83]:
monthly_cash_data.to_csv('monthly_cash_balance.csv')

In [84]:
filtered_cash_data = monthly_cash_data[monthly_cash_data['Cash'] >= 0].reset_index().drop(columns='index')
filtered_cash_data.head()

Unnamed: 0,Respodent,Year,Month,Cash,Cash Change,date,Firm ID,Class
0,4425.0,2022,1,50216.5,54910.0,2022-01-31,DD-ET-1930,Strong
1,4425.0,2022,2,102509.5,52293.0,2022-02-28,DD-ET-1930,Strong
2,4425.0,2022,3,285739.5,183230.0,2022-03-31,DD-ET-1930,Strong
3,4425.0,2022,4,319499.5,33760.0,2022-04-30,DD-ET-1930,Strong
4,4425.0,2022,5,321389.5,1890.0,2022-05-31,DD-ET-1930,Strong


In [85]:
import classification_algorithms

In [86]:
rand_forest = classification_algorithms.RandForest(filtered_cash_data, ['Cash', 'Cash Change'], 'Class', 12)

In [87]:
rand_forest.summarize()

{'confusion_matrix': array([[90, 12, 56,  2],
        [29,  7, 18,  2],
        [53, 12, 67,  3],
        [15,  4,  3,  4]], dtype=int64),
 'accuracy': 0.44562334217506633}

### Looking at other financial figures

In [112]:
def all_monthly_figures_for_firm(df: pd.DataFrame, res_id: int) -> pd.DataFrame:
    monthly = get_monthly_financial_statements(df, res_id)

    var_balance_map:dict[str, list] = dict()
    years, months = monthly[0], monthly[1]

    
    for statements in [monthly[2], monthly[3]]:
        for statement in statements:
            for i, row in statement.iterrows():
                var = row['Account']
                balance = unformat_num(row['Balance'])
                if var not in var_balance_map:
                    var_balance_map[var] = [balance]
                else:
                    var_balance_map[var].append(balance)


    data = pd.DataFrame(
        {
            'Respondent': [res_id]*len(years),
            'Year': years,
            'Month': months,
            **var_balance_map
        }
    )
    return data  
    

monthly_financials_4425 = all_monthly_figures_for_firm(data_combined3, 4425)
monthly_financials_4425.head()

Unnamed: 0,Respondent,Year,Month,Sales,Total income,Purchases,Salary expense,Utility expense,Miscellaneous expense,Rent expense,...,Land,Informal loans given,Loans to employees,Total assets,Ap,Informal loan,Long term loan,Short term loan,Total liabilities,Equity
0,4425,2021,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,200000.0,0,1004400.0,0.0,0.0,0,0,0.0,1004400.0
1,4425,2021,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,200000.0,0,1404400.0,0.0,0.0,0,0,0.0,1404400.0
2,4425,2021,4,19515.0,19515.0,-10416.0,0.0,-280.0,0.0,0.0,...,0,200000.0,0,1411119.0,0.0,0.0,0,0,0.0,1411119.0
3,4425,2021,5,30157.0,30157.0,-6874.0,-2500.0,-480.0,-7202.0,-6000.0,...,0,200000.0,0,1411223.0,0.0,0.0,0,0,0.0,1411223.0
4,4425,2021,6,43374.0,43374.0,-4337.0,-4000.0,0.0,0.0,-6000.0,...,0,200000.0,0,1834315.0,0.0,0.0,0,0,0.0,1834315.0


In [114]:
def all_monthly_figures_for_all_firms(df: pd.DataFrame):
    all_firms = df['Respondent ID'].unique()
    all_firm_data = []
    for res_id in all_firms:
        print(f'Getting Monthly Figures for {res_id}')
        cash_data = all_monthly_figures_for_firm(df, res_id)
        all_firm_data.append(cash_data)
    
    return pd.concat(all_firm_data).reset_index().drop(columns='index')

monthly_financial_df = all_monthly_figures_for_all_firms(data_combined3)
monthly_financial_df.head()

Getting Monthly Figures for 4425.0
Getting Monthly Figures for 4429.0
Getting Monthly Figures for 4431.0
Getting Monthly Figures for 4432.0
Getting Monthly Figures for 4433.0
Getting Monthly Figures for 4435.0
Getting Monthly Figures for 4436.0
Getting Monthly Figures for 4437.0
Getting Monthly Figures for 4438.0
Getting Monthly Figures for 4440.0
Getting Monthly Figures for 4442.0
Getting Monthly Figures for 4443.0
Getting Monthly Figures for 4444.0
Getting Monthly Figures for 4445.0
Getting Monthly Figures for 4446.0
Getting Monthly Figures for 4448.0
Getting Monthly Figures for 4450.0
Getting Monthly Figures for 4451.0
Getting Monthly Figures for 4452.0
Getting Monthly Figures for 4453.0
Getting Monthly Figures for 4456.0
Getting Monthly Figures for 4457.0
Getting Monthly Figures for 4458.0
Getting Monthly Figures for 4459.0
Getting Monthly Figures for 4460.0
Getting Monthly Figures for 4462.0
Getting Monthly Figures for 4463.0
Getting Monthly Figures for 4464.0
Getting Monthly Figu

Unnamed: 0,Respondent,Year,Month,Sales,Total income,Purchases,Salary expense,Utility expense,Miscellaneous expense,Rent expense,...,Land,Informal loans given,Loans to employees,Total assets,Ap,Informal loan,Long term loan,Short term loan,Total liabilities,Equity
0,4425.0,2021,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,200000.0,0.0,1004400.0,0.0,0.0,0.0,0.0,0.0,1004400.0
1,4425.0,2021,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,200000.0,0.0,1404400.0,0.0,0.0,0.0,0.0,0.0,1404400.0
2,4425.0,2021,4,19515.0,19515.0,-10416.0,0.0,-280.0,0.0,0.0,...,0.0,200000.0,0.0,1411119.0,0.0,0.0,0.0,0.0,0.0,1411119.0
3,4425.0,2021,5,30157.0,30157.0,-6874.0,-2500.0,-480.0,-7202.0,-6000.0,...,0.0,200000.0,0.0,1411223.0,0.0,0.0,0.0,0.0,0.0,1411223.0
4,4425.0,2021,6,43374.0,43374.0,-4337.0,-4000.0,0.0,0.0,-6000.0,...,0.0,200000.0,0.0,1834315.0,0.0,0.0,0.0,0.0,0.0,1834315.0


In [115]:
monthly_financial_df.to_csv('all_monthly_financials.csv')