## Imports

In [180]:
import pandas as pd
import numpy as np
import configparser
import os

In [181]:
cwd = os.getcwd()

## Config

In [182]:
#Creating configuration
config_path = os.path.join(cwd, '../docs/config.ini')
config = configparser.ConfigParser()
config.read(config_path)

['c:\\Users\\tasco\\OneDrive\\Python\\Projects\\financialstatements\\financialstatements\\notebooks\\../docs/config.ini']

## Utility Functions

In [183]:
def creating_input_paths(directory):
    files = os.listdir(directory)
    paths = []
    for file in files:
        path = os.path.join(directory, file)
        paths.append(path)
    return paths

In [184]:
def missing_gl_check(df):
    null_count = df['GL_Code'].isnull().sum()
    return null_count

In [185]:
def missing_mcc(df):
    df = df[df['GL_Code'].isnull()]
    return print(df)

In [186]:
def debit_credit_check(df):
    debit_total = df['DEBIT'].sum()
    credit_total = df['CREDIT'].sum()
    return debit_total == credit_total

In [187]:
def creating_output(df, month_df, type, config):
    df['Month_Num'] = df['Date'].dt.month
    df['Transaction_ID'] = type + df['Month_Num'].astype("str") + '-' + (df.index + 1).astype("str")
    df = pd.merge(df,
                  month_df,
                  on='Month_Num',
                  how='left')
    months = df['Month'].unique()
    file_path = config.get("data_outputs_directory", "JOURNAL_ENTRIES")
    month_num = 1
    for month in months:
        file_df = df[df['Month'] == month]
        file_df =  file_df[['Transaction_ID','Date', 'GL_Code', 'Account', 'Description', 'DEBIT', 'CREDIT']]
        file_name = f'{month_num}_{type}_{month}_entries.csv'
        file_df.to_csv(os.path.join(file_path, file_name), index=False)
        month_num += 1

## Credit Card ETL Functions

In [188]:
def creating_df(paths):
    dfs = []
    for path in paths:
        df = pd.read_csv(path)
        dfs.append(df)
    return pd.concat(dfs).reset_index(drop=True)

In [189]:
def joining_gl_codes(df, coa_mcc_df):
    df['Amount'] = df['Amount'].abs()
    df = df.pivot_table(index = ['Date', 'Name', 'Memo'], columns='Transaction', values='Amount')
    df = df.reset_index()
    df[['Memo', 'MCC', 'Blank']] = df['Memo'].str.split(';', expand=True)
    df['MCC'] = df['MCC'].str[-4:]
    df['MCC'] = df['MCC'].astype('int')
    df = pd.merge(df, 
              coa_mcc_df, 
              how='left',
              on='MCC')
    return df

In [190]:
def processing_df(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df['Description'] = df['MCC_Description'] + ': ' + df['Name']
    df = df[['Date', 'GL_Code', 'Account', 'Description','DEBIT', 'CREDIT']]
    df = df.round(2)
    df = df.sort_values(by='Date').reset_index(drop=True)
    df['Order_Col'] = df.index + 1
    df['Sub_Order_Col'] = np.where(df['DEBIT'].isnull(), 2, 1)
    return df

In [191]:
def creating_credit_entries(df):
    df = df[df['CREDIT'].isnull()].reset_index(drop=True).copy()
    df['GL_Code'] = 200101
    df['Account'] = 'EdwardJones MasterCard'
    df['Sub_Order_Col'] = 2
    df = df.rename(columns={'DEBIT':'CREDIT', 'CREDIT': 'DEBIT'})
    return df[['Date', 'GL_Code', 'Account', 'Description','DEBIT','CREDIT','Order_Col', 'Sub_Order_Col']]

In [192]:
def creating_debit_entries(df):
    df = df[df['DEBIT'].isnull()].reset_index(drop=True).copy()
    df['GL_Code'] = 200101
    df['Account'] = 'EdwardJones MasterCard'
    df['Sub_Order_Col'] = 1
    df = df.rename(columns={'DEBIT':'CREDIT', 'CREDIT': 'DEBIT'})
    return df[['Date', 'GL_Code', 'Account', 'Description','DEBIT','CREDIT','Order_Col','Sub_Order_Col']]

## Reading Data

### Credit Card Data

In [193]:
# Creating directory to credit card data
CREDITCARD_DIRECTORY = os.path.join(cwd,config.get("data_inputs_directory", "CREDITCARD_DIRECTORY"))

In [194]:
paths = creating_input_paths(CREDITCARD_DIRECTORY)

In [195]:
df = creating_df(paths)

### Table Data

In [196]:
COA_DATA = config.get('table_files', 'COA_DATA')
MONTH_DATA = config.get('table_files', 'MONTH_DATA')

In [197]:
coa_mcc_df = pd.read_excel(COA_DATA, sheet_name='coa_mcc_link_table')

In [198]:
month_df = pd.read_excel(MONTH_DATA)

## ETL

In [199]:
df = joining_gl_codes(df, coa_mcc_df)

In [200]:
# Logic gate for checking nulls in the GL_Code column
if missing_gl_check(df) != 0:
    print("GL_Codes are missing, need to update the coa and mcc link table")
    missing_mcc(df)
else:
    print("No GL_Codes missing")

No GL_Codes missing


In [201]:
df = processing_df(df)

In [202]:
credit_entries_df = creating_credit_entries(df)

In [203]:
debit_entries_df = creating_debit_entries(df)

In [204]:
df = pd.concat([df, credit_entries_df, debit_entries_df]).reset_index(drop=True)

In [205]:
df = df.sort_values(by=['Order_Col','Sub_Order_Col']).reset_index(drop=True)

In [206]:
if debit_credit_check(df) == True:
    print("Debits equal credits. Proceed to next phase")
else:
    print("Something went wrong")

Debits equal credits. Proceed to next phase


In [207]:
type = 'creditcard'

In [208]:
creating_output(df, month_df, type, config)