## Managerial Accounting ETL
This notebook contains the code to transform the [Managerial Accounting](https://www.kaggle.com/datasets/jazidesigns/managerial-accounting) dataset into a double-entry compliant bookkeeping format.

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('../data/managerial_accounting.csv')
df.rename(columns={'Transaction Date': 'Date'}, inplace=True)
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
df.head()

In [None]:
df['Reference Number'].value_counts()

In [None]:
df['Note'].value_counts()

In [None]:
df.drop(['Note'], axis=1, inplace=True)

In [None]:
print(df['Transaction Type'].unique())
print(df['Category'].unique())
print(df['Department'].unique())

In [None]:
#Sample df for testing into 20% of the original size
#df = df.sample(frac=0.2, random_state=1)

In [None]:
new_df = pd.DataFrame(columns=df.columns)

## Transformation 1: Expense

In [None]:
print(df[df['Transaction Type'] == 'Expense']['Category'].unique())

In [None]:
def categorize_transaction(category):
    cash_accounts = ['Utilities', 'Supplies', 'Salaries', 'Rent']
    acounts_payable_accounts = ['Loans']
    asset_accounts = ['Assets']
    revenue_accounts = ['Service Revenue', 'Royalties', 'Product Sales']

    if category in cash_accounts:
        return 'Cash' # assume cash
    elif category in acounts_payable_accounts:
        return 'Accounts Payable'
    elif category in asset_accounts:
        return 'Asset Account'
    elif category in revenue_accounts:
        return 'Revenue Account'
    else:
        return 'Error'
    
for index, row in df[(df['Transaction Type'] == 'Expense') & (df['Category'] == 'Utilities')].iterrows():
    #Debit Entry
    debit_entry = row.copy()
    debit_entry['Account'] = "Expense Account"
    debit_entry['Debit'] = row['Transaction Amount']
    debit_entry['Credit'] = 0.0
    new_df = new_df.append(debit_entry, ignore_index=True)

    account_type = categorize_transaction(row['Category'])
    #Credit Entry
    credit_entry = row.copy()
    credit_entry['Account'] = account_type
    credit_entry['Debit'] = 0.0
    credit_entry['Credit'] = row['Transaction Amount']
    new_df = new_df.append(credit_entry, ignore_index=True)

In [None]:
df[~((df['Transaction Type'] == 'Expense') & (df['Category'] == 'Utilities'))]

In [None]:
for index, row in df[(df['Transaction Type'] == 'Expense') & (df['Category'] == 'Supplies')].iterrows():
    #Debit Entry
    debit_entry = row.copy()
    debit_entry['Account'] = "Expense Account"
    debit_entry['Debit'] = row['Transaction Amount']
    debit_entry['Credit'] = 0.0
    new_df = new_df.append(debit_entry, ignore_index=True)

    account_type = categorize_transaction(row['Category'])
    #Credit Entry
    credit_entry = row.copy()
    credit_entry['Account'] = account_type
    credit_entry['Debit'] = 0.0
    credit_entry['Credit'] = row['Transaction Amount']
    new_df = new_df.append(credit_entry, ignore_index=True)

In [None]:
df[~((df['Transaction Type'] == 'Expense') & (df['Category'] == 'Supplies'))]

In [None]:
for index, row in df[(df['Transaction Type'] == 'Expense') & (df['Category'] == 'Salaries')].iterrows():
    #Debit Entry
    debit_entry = row.copy()
    debit_entry['Account'] = "Expense Account"
    debit_entry['Debit'] = row['Transaction Amount']
    debit_entry['Credit'] = 0.0
    new_df = new_df.append(debit_entry, ignore_index=True)

    account_type = categorize_transaction(row['Category'])
    #Credit Entry
    credit_entry = row.copy()
    credit_entry['Account'] = account_type
    credit_entry['Debit'] = 0.0
    credit_entry['Credit'] = row['Transaction Amount']
    new_df = new_df.append(credit_entry, ignore_index=True)

In [None]:
df[~((df['Transaction Type'] == 'Expense') & (df['Category'] == 'Salaries'))]

In [None]:
for index, row in df[(df['Transaction Type'] == 'Expense') & (df['Category'] == 'Rent')].iterrows():
    #Debit Entry
    debit_entry = row.copy()
    debit_entry['Account'] = "Expense Account"
    debit_entry['Debit'] = row['Transaction Amount']
    debit_entry['Credit'] = 0.0
    new_df = new_df.append(debit_entry, ignore_index=True)

    account_type = categorize_transaction(row['Category'])
    #Credit Entry
    credit_entry = row.copy()
    credit_entry['Account'] = account_type
    credit_entry['Debit'] = 0.0
    credit_entry['Credit'] = row['Transaction Amount']
    new_df = new_df.append(credit_entry, ignore_index=True)

In [None]:
df[~((df['Transaction Type'] == 'Expense') & (df['Category'] == 'Rent'))]

In [None]:
new_df.head()

In [None]:
df = df[~(df['Transaction Type'] == 'Expense')]

## Transformation 2: Liability Payment

In [None]:
print(df[df['Transaction Type'] == 'Liability Payment']['Category'].unique())

In [None]:
def categorize_liability_payment(category):
    expense_accounts = ['Utilities', 'Supplies', 'Salaries', 'Rent']
    asset_accounts = ['Assets', 'Loans']
    revenue_accounts = ['Royalties', 'Service Revenue', 'Product Sales']

    if category in expense_accounts:
        return "Expense Account"
    elif category in asset_accounts:
        return "Asset Account"
    elif category in revenue_accounts:
        return "Revenue Account"
    else:
        return "Error"

for index, row in df[df['Transaction Type'] == 'Liability Payment'].iterrows():
    #Debit Entry
    debit_entry = row.copy()
    debit_entry['Account'] = categorize_liability_payment(row['Category'])
    debit_entry['Debit'] = row['Transaction Amount']
    debit_entry['Credit'] = 0.0
    new_df = new_df.append(debit_entry, ignore_index=True)

    #Credit Entry (Cash)
    credit_entry = row.copy()
    credit_entry['Account'] = 'Cash'
    credit_entry['Debit'] = 0.0
    credit_entry['Credit'] = row['Transaction Amount']
    new_df = new_df.append(credit_entry, ignore_index=True)

In [None]:
df = df[~(df['Transaction Type'] == 'Liability Payment')]

## Transformation 3: Revenue

In [None]:
print(df[df['Transaction Type'] == 'Revenue']['Category'].unique())

In [None]:
def categorize_revenue(category):
    revenue_accounts = ['Royalties', 'Service Revenue', 'Product Sales']

    if category in revenue_accounts:
        return "Revenue Account"
    else:
        return "Error"

for index, row in df[df['Transaction Type'] == 'Revenue'].iterrows():
    #Credit Entry (Increase in Revenue)
    credit_entry = row.copy()
    credit_entry['Account'] = categorize_revenue(row['Category'])
    credit_entry['Debit'] = 0.0
    credit_entry['Credit'] = row['Transaction Amount']
    new_df = new_df.append(credit_entry, ignore_index=True)

    #Debit Entry (Increase in Asset)
    debit_entry = row.copy()
    debit_entry['Account'] = 'Cash'
    debit_entry['Debit'] = row['Transaction Amount']
    debit_entry['Credit'] = 0.0
    new_df = new_df.append(debit_entry, ignore_index=True)

In [None]:
df = df[~(df['Transaction Type'] == 'Revenue')]

## Transformation 4: Asset Purchase

In [None]:
print(df[df['Transaction Type'] == 'Asset Purchase']['Category'].unique())

In [None]:
def categorize_asset_purchase(category):
    if category in ['Loans', 'Assets']:
        return "Asset Account"
    else:
        return "Error"

def is_cash_equivalent(payment_method):
    return payment_method in ["Cash", "Bank Transfer"]

for index, row in df[df['Transaction Type'] == 'Asset Purchase'].iterrows():
    #Debit Entry (Increase in Asset)
    debit_entry = row.copy()
    debit_entry['Account'] = categorize_asset_purchase(row['Category'])
    debit_entry['Debit'] = row['Transaction Amount']
    debit_entry['Credit'] = 0.0
    new_df = new_df.append(debit_entry, ignore_index=True)

    #Credit Entry (Decrease in Cash)
    credit_entry = row.copy()
    credit_entry['Account'] = 'Cash'
    credit_entry['Debit'] = 0.0
    credit_entry['Credit'] = row['Transaction Amount']
    new_df = new_df.append(credit_entry, ignore_index=True)


In [None]:
df = df[~(df['Transaction Type'] == 'Asset Purchase')]

In [None]:
#original dataframe should be empty
df

In [None]:
#Drop transaction amount since it's been split into debit and credit
new_df.drop(['Transaction Amount'], axis=1, inplace=True)

In [None]:
#save new_df to csv
new_df.to_csv('../data/double_entry_managerial_accounting.csv', index=False)

In [None]:
new_df.shape