# Final ProfitAndLoss Report Parsing Scripts   
*Edited 7 July 2021*   
*Paul Cassidy*

## Import libraries and raw data file

In [4]:
import pandas as pd
import json
import re

transaction_list_path = "RawData\\TransactionListReal.json"
general_ledger_path   = "RawData\\GeneralLedgerReal.json"
balance_sheet_path    = "RawData\\BalanceSheetReal.json"
profit_and_loss_path  = "RawData\\ProfitAndLossReal.json"

with open(transaction_list_path) as f:
    transaction_list = json.load(f)

with open(general_ledger_path) as f:
    general_ledger = json.load(f)

with open(balance_sheet_path) as f:
    balance_sheet = json.load(f)

with open(profit_and_loss_path) as f:
    profit_and_loss = json.load(f)

## Define parse_record function to extract individual account Name, ID, and Value 

### *Static function, only for reports like ProfitAndLoss with one account per 'ColData' object*

In [5]:
def parse_record_static(data):
    acc_name = data['value'][0]
    acc_id   = data['id'][0]
    acc_val  = data['value'][1]
    if not acc_val:
        acc_val = "NA"
    return {'AccountName'  : acc_name,
            'AccountID'    : acc_id,
            'AccountValue' : acc_val}

### *Dynamic function, for reports like TransactionList with multiple accounts/transactions per 'ColData' object*

In [6]:
def parse_record_dyno(data):
    for row in range(data.shape[0]):
        while row < data.shape[0]:
            acc_name = data['value'][row]
            acc_id   = data['id'][row]
            acc_val  = data['value'][row + 1]
            if not acc_val:
                acc_val = "NA"
            return {'AccountName'  : acc_name,
                    'AccountID'    : acc_id,
                    'AccountValue' : acc_val}

## Define JSON file crawler

In [7]:
def json_crawler(data):
    # If data is dictionary returned by json.load(), then flatten it to initial pandas data frame
    if isinstance(data, dict):
        data = pd.json_normalize(data)
        return_value = json_crawler(data)
        return return_value 
    
    # Crawl through JSON file, normalizing by different keys to find account data and save to lst
    else:
        # Initialize list to hold account data objects
        lst = []
        for row in range(data.shape[0]):
            # Normalize by 'ColData'
            try:
                record_data = pd.json_normalize(data['ColData'][row])
                lst.append(parse_record_static(record_data))
            except:
                pass
            # Normalize by 'Header.ColData'
            try:
                record_data = pd.json_normalize(data['Header.ColData'][row])
                lst.append(parse_record_static(record_data))
            except:
                pass
            # Normalize by 'Rows.Row'
            try:
                row_data = pd.json_normalize(data['Rows.Row'][row])
                # Capture lst and return value before recursive function call
                return_value = json_crawler(row_data)
                lst = lst + return_value
            except:
                pass
        return lst

## Define function to export to csv

In [8]:
def list_df_csv(lst, prefix, filename):
    df = pd.DataFrame(lst)
    df.to_csv(f"{prefix}/{filename}.csv", index = False)

## Test

In [9]:
list_df_csv(json_crawler(profit_and_loss), "Output", "palrealtest")