## Imports

In [1118]:
import pandas as pd
import openpyxl
import configparser
import os

In [1119]:
cwd = os.getcwd()
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Config

In [1120]:
config_path = os.path.join(cwd, '../docs/config.ini')
config = configparser.ConfigParser()
config.read(config_path)

['c:\\Users\\tasco\\OneDrive\\Python\\Projects\\financialstatements\\financialstatements\\notebooks\\../docs/config.ini']

## Reading Data

### Credit Card Data

In [1121]:
CREDITCARD_DIRECTORY = os.path.join(cwd,config.get("data_inputs_directory", "CREDITCARD_DIRECTORY"))

In [1122]:
creditcard_files = os.listdir(CREDITCARD_DIRECTORY)

In [1123]:
creditcard_paths = []
for file in creditcard_files:
    path = os.path.join(CREDITCARD_DIRECTORY, file)
    creditcard_paths.append(path)

In [1124]:
dfs = []
for path in creditcard_paths:
    df = pd.read_csv(path)
    dfs.append(df)

In [1125]:
creditcard_df = pd.concat(dfs).reset_index(drop=True)

### Output Data

In [1126]:
OUTPUT_DIRECTORY = os.path.join(cwd,config.get('data_outputs_directory', 'OUTPUT_DIRECTORY'))
GL_DATA = config.get('output_files', 'GL_DATA')

In [1127]:
gl_df = pd.read_excel(os.path.join(OUTPUT_DIRECTORY, GL_DATA))

### Table Data

In [1128]:
TABLES_DIRECTORY = os.path.join(cwd,config.get('data_tables_directory', 'TABLES_DIRECTORY'))
COA_DATA = config.get('table_files', 'COA_DATA')

In [1129]:
workbook = openpyxl.load_workbook(os.path.join(TABLES_DIRECTORY,COA_DATA))
sheet_names = workbook.sheetnames

In [1130]:
coa_df = pd.read_excel(os.path.join(TABLES_DIRECTORY,COA_DATA), sheet_name=sheet_names[0])
mcc_df = pd.read_excel(os.path.join(TABLES_DIRECTORY,COA_DATA), sheet_name=sheet_names[1])
coa_mcc_df = pd.read_excel(os.path.join(TABLES_DIRECTORY,COA_DATA), sheet_name=sheet_names[2])

## ETL

In [1131]:
# Changing Date to Datetime dtype
creditcard_df['Date'] = pd.to_datetime(creditcard_df['Date'])

In [1132]:
# Changing Amount to absolute amount
creditcard_df['Amount'] = creditcard_df['Amount'].abs()

In [1133]:
creditcard_df = creditcard_df.pivot_table(index = ['Date', 'Name', 'Memo'], columns='Transaction', values='Amount')
creditcard_df = creditcard_df.reset_index()

In [1134]:
# Delimiting memo field
creditcard_df[['Memo', 'MCC', 'Blank']] = creditcard_df['Memo'].str.split(';', expand=True)

In [1135]:
creditcard_df['MCC'] = creditcard_df['MCC'].str[-4:]

In [1136]:
creditcard_df['MCC'] = creditcard_df['MCC'].astype('int')

In [1137]:
creditcard_df = pd.merge(creditcard_df, 
                        coa_mcc_df, 
                        how='left',
                        on='MCC')

In [1138]:
# Creating function for counting number of nulls
def missing_gl_check(df):
    null_count = df['GL_Code'].isnull().sum()
    return null_count

In [1139]:
# Creating function for identifying rows with null GL_Codes
def missing_mcc(df):
    df = df[df['GL_Code'].isnull()]
    return print(df)

In [1140]:
# Logic gate for checking nulls in the GL_Code column
if missing_gl_check(creditcard_df) != 0:
    print("GL_Codes are missing, need to update the coa and mcc link table")
    missing_mcc(creditcard_df)
else:
    print("No GL_Codes missing")

No GL_Codes missing


In [1141]:
creditcard_df['Description'] = creditcard_df['MCC_Description'] + ': ' + creditcard_df['Name']

In [1142]:
creditcard_df = creditcard_df[['Date', 'GL_Code', 'Account', 'Description','DEBIT', 'CREDIT']]

In [1143]:
creditcard_df = creditcard_df.round(2)

In [1144]:
credit_entries = creditcard_df[creditcard_df['CREDIT'].isnull()].reset_index(drop=True).copy()

In [1145]:
credit_entries['GL_Code'] = 200101
credit_entries['Account'] = 'EdwardJones MasterCard'
credit_entries = credit_entries.rename(columns={'DEBIT':'CREDIT', 'CREDIT': 'DEBIT'})

In [1146]:
credit_entries = credit_entries[['Date', 'GL_Code', 'Account', 'Description','DEBIT','CREDIT']]

In [1147]:
debit_entries = creditcard_df[creditcard_df['DEBIT'].isnull()].reset_index(drop=True).copy()

In [1148]:
debit_entries['GL_Code'] = 200101
debit_entries['Account'] = 'EdwardJones MasterCard'
debit_entries = debit_entries.rename(columns={'DEBIT':'CREDIT', 'CREDIT': 'DEBIT'})

In [1149]:
debit_entries = debit_entries[['Date', 'GL_Code', 'Account', 'Description','DEBIT','CREDIT']]

In [1150]:
creditcard_df = pd.concat([creditcard_df, credit_entries, debit_entries]).reset_index(drop=True)

In [1151]:
def debit_credit_check(df):
    debit_total = df['DEBIT'].sum()
    credit_total = df['CREDIT'].sum()
    return debit_total == credit_total

In [1152]:
if debit_credit_check(creditcard_df) == True:
    print("Debits equal credits. Proceed to next phase")
    print(creditcard_df)
else:
    print("Something went wrong")

Debits equal credits. Proceed to next phase
          Date  GL_Code                                            Account  \
0   2023-01-03   600602                                Hobbies - Jiu Jitsu   
1   2023-01-03   600301                                          Utilities   
2   2023-01-03   600802                 Car Expense - Professional Service   
3   2023-01-03   600601               Hobbies - hobby, toy or games stores   
4   2023-01-04   600601               Hobbies - hobby, toy or games stores   
5   2023-01-09   600201                    Gas - AUTOMATED FUEL DISPENSERS   
6   2023-01-09   600501                       Food - Fast Food Restaurants   
7   2023-01-10   600501                       Food - Fast Food Restaurants   
8   2023-01-10   600501                       Food - Fast Food Restaurants   
9   2023-01-10   601003                              Misc - Discount Store   
10  2023-01-10   600201                    Gas - AUTOMATED FUEL DISPENSERS   
11  2023-01-10   600