## Imports

In [209]:
import pandas as pd
import numpy as np
import tabula
import configparser
import os


In [210]:
cwd = os.getcwd()

In [211]:
pd.set_option('display.max_rows', None)

## Config

In [212]:
#Creating configuration
config_path = os.path.join(cwd, '../docs/config.ini')
config = configparser.ConfigParser()
config.read(config_path)

['c:\\Users\\tasco\\OneDrive\\Python\\Projects\\financialstatements\\financialstatements\\notebooks\\../docs/config.ini']

## Reading Data

### Paystubs

In [213]:
PAYSTUB_DIRECTORY = os.path.join(cwd, config.get("data_inputs_directory", "PAYSTUB_DIRECTORY"))

In [214]:
paystubs_files = os.listdir(PAYSTUB_DIRECTORY)

In [215]:
paystub_paths = []
for file in paystubs_files:
    path = os.path.join(PAYSTUB_DIRECTORY, file)
    paystub_paths.append(path)

In [216]:
dfs = []
for path in paystub_paths:
    tables = tabula.read_pdf(path, pages = 'all', area = [396, 36, 756, 612], guess = False)
    df = pd.concat(tables).reset_index(drop=True)
    df['Date'] = path[-14:-4]
    dfs.append(df)


In [217]:
df = pd.concat(dfs).reset_index(drop=True)

### Table Data

In [218]:
COA_DATA = config.get('table_files', 'COA_DATA')
MONTH_DATA = config.get('table_files', 'MONTH_DATA')

In [219]:
coa_purch_df = pd.read_excel(COA_DATA, sheet_name='coa_paystub_link_table')

In [220]:
month_df = pd.read_excel(MONTH_DATA)

## ETL

In [221]:
def find_deductions(df):
    df = df[['Unnamed: 4', 'Unnamed: 5', 'Date']]
    df = df.rename(columns = {'Unnamed: 4': 'Item', 'Unnamed: 5':'Amount'})
    df = df[1:]
    df = df.dropna().reset_index(drop = True)
    df = df.query("Item != 'Total' and Item != 'DEDUCTIONS' and Item != 'CURRENT'")
    return df

In [222]:
def find_earnings(df):
    df = df[['Unnamed: 0', 'Unnamed: 3', 'Date']]
    df = df.rename(columns = {'Unnamed: 0': 'Item', 'Unnamed: 3':'Amount'})
    df = df[1:]
    df = df.dropna().reset_index(drop = True)
    df = df.query("Item != 'Total' and Item != 'TAX' and Item != 'EARNINGS'")
    return df

In [223]:
deductions_df = find_deductions(df)

In [224]:
earnings_df = find_earnings(df)

In [225]:
df = pd.concat([deductions_df, earnings_df]).reset_index(drop=True)

In [226]:
df = pd.merge(df, 
              coa_purch_df,
                on = 'Item',
                how = 'left')

In [227]:
df = df.rename(columns = {'Item': 'Description'})

In [228]:
df = df[['Date', 'GL_Code', 'Account', 'Description', 'Amount', 'Category', 'Account_Type', 'Order_Col']]

In [229]:
df['Date'] = pd.to_datetime(df['Date'])
df['Amount'] = df['Amount'].str.replace('$', '')

  df['Amount'] = df['Amount'].str.replace('$', '')


In [230]:
df = df.sort_values(by = 'Date').reset_index(drop = True)

In [231]:
df['DEBIT'] = np.where(df['Account_Type'].isin(['Asset', 'Deduction']), df['Amount'], np.nan)
df['CREDIT'] = np.where(df['Account_Type'] == 'Revenue', df['Amount'],np.nan)

In [232]:
df['DEBIT'] = df['DEBIT'].str.replace(',', '').astype('float')
df['CREDIT'] = df['CREDIT'].str.replace(',', '').astype('float')

In [233]:
df_grouped = df.groupby('Date', as_index=False)[['DEBIT', 'CREDIT']].sum()
df_grouped['DEBIT'] = df_grouped['CREDIT'] - df_grouped['DEBIT']
df_grouped['CREDIT'] = np.nan
df_grouped['GL_Code'] = 100101
df_grouped['Account'] = 'Free Checking Bank OZK'
df_grouped['Description'] = 'Cash from paystub'
df_grouped['Amount'] = df_grouped['DEBIT']
df_grouped['Category'] = 'Cash'
df_grouped['Account_Type'] = 'Asset'
df_grouped['Order_Col'] = 1

In [234]:
df_grouped = df_grouped[['Date', 'GL_Code', 'Account', 'Description',
       'Amount', 'Category', 'Account_Type', 'Order_Col','DEBIT', 'CREDIT']]

In [235]:
df = pd.concat([df, df_grouped]).reset_index(drop=True)

In [236]:
df['DEBIT'] = np.where(df['Account_Type'].isin(['Asset', 'Deduction']), df['Amount'], np.nan)
df['CREDIT'] = np.where(df['Account_Type'] == 'Revenue', df['Amount'],np.nan)

In [237]:
df = df.sort_values(by=['Date', 'Order_Col']).reset_index(drop=True)

In [238]:
df['Month_Num'] = df['Date'].dt.month

In [239]:
df['Transaction_ID'] = 'ps-' + df['Month_Num'].astype("str") + '-' + (df.index + 1).astype("str")

In [240]:
df = pd.merge(df,
              month_df,
              on='Month_Num',
              how='left')

In [241]:
months = df['Month'].unique()

In [244]:
file_path = config.get("data_outputs_directory", "JOURNAL_ENTRIES")
month_num = 1
for month in months:
    file_df = df[df['Month'] == month]
    file_df =  file_df[['Transaction_ID','Date', 'GL_Code', 'Account', 'Description', 'DEBIT', 'CREDIT']]
    file_name = f'{month_num}_paystub_{month}_entries.csv'
    file_df.to_csv(os.path.join(file_path, file_name), index=False)
    month_num += 1