In [7]:
import pandas as pd
import json
import re

transaction_list_path = "..\\RawData\\TransactionListReal.json"
general_ledger_path   = "..\\RawData\\GeneralLedgerReal.json"
balance_sheet_path    = "..\\RawData\\BalanceSheetReal.json"
profit_and_loss_path  = "..\\RawData\\ProfitAndLossReal.json"

with open(transaction_list_path) as f:
    transaction_list = json.load(f)

with open(general_ledger_path) as f:
    general_ledger = json.load(f)

with open(balance_sheet_path) as f:
    balance_sheet = json.load(f)

with open(profit_and_loss_path) as f:
    profit_and_loss = json.load(f)

In [5]:
import pandas as pd
import json

def parse_record_static(data):
    acc_name = data['value'][0]
    acc_id   = data['id'][0]
    acc_val  = data['value'][1]
    if not acc_val:
        acc_val = "NA"
    return {'AccountName'  : acc_name,
            'AccountID'    : acc_id,
            'AccountValue' : acc_val}

# def parse_record_dyno(data):
#     for i in range(data.shape[0]):
#         for k, v in data[i].items():
#             var = data[]

#             acc_name = data['value'][0]
#             acc_id   = data['id'][]
#             acc_val  = data['value'][]
#             if not acc_val:
#                 acc_val = "NA"
#     return {'AccountName'  : acc_name,
#             'AccountID'    : acc_id,
#             'AccountValue' : acc_val}

def json_crawler(data):
    # If data is dictionary returned by json.load(), then flatten it to initial pandas data frame
    if isinstance(data, dict):
        data = pd.json_normalize(data)
        return_value = json_crawler(data)
        return return_value 
    
    # Crawl through JSON file, normalizing by different keys to find account data and save to lst
    else:
        # Initialize list to hold account data objects
        lst = []
        for row in range(data.shape[0]):
            # Normalize by 'ColData'
            try:
                record_data = pd.json_normalize(data['ColData'][row])
                lst.append(parse_record_static(record_data))
            except:
                pass
            # Normalize by 'Header.ColData'
            try:
                record_data = pd.json_normalize(data['Header.ColData'][row])
                lst.append(parse_record_static(record_data))
            except:
                pass
            # Normalize by 'Rows.Row'
            try:
                row_data = pd.json_normalize(data['Rows.Row'][row])
                # Capture lst and return value before recursive function call
                return_value = json_crawler(row_data)
                lst = lst + return_value
            except:
                pass
        return lst

def json_crawler_dyno(data):
    # If data is dictionary returned by json.load(), then flatten it to initial pandas data frame
    if isinstance(data, dict):
        data = pd.json_normalize(data)
        return_value = json_crawler_dyno(data)
        return return_value 
    
    # Crawl through JSON file, normalizing by different keys to find account data and save to lst
    else:
        # Initialize list to hold account data objects
        lst = []
        for row in range(data.shape[0]):
            # Normalize by 'ColData'
            try:
                record_data = pd.json_normalize(data['ColData'][row])
                lst.append(parse_record_dyno(record_data))
            except:
                pass
            # Normalize by 'Header.ColData'
            try:
                record_data = pd.json_normalize(data['Header.ColData'][row])
                lst.append(parse_record_dyno(record_data))
            except:
                pass
            # Normalize by 'Rows.Row'
            try:
                row_data = pd.json_normalize(data['Rows.Row'][row])
                # Capture lst and return value before recursive function call
                return_value = json_crawler_dyno(row_data)
                lst = lst + return_value
            except:
                pass
        return lst

def json_to_csv(lst, prefix, filename):
    df = pd.DataFrame(lst)
    df.to_csv(f"{prefix}/{filename}.csv", index = False)

In [9]:
json_to_csv(json_crawler(profit_and_loss), "../Output/", "palrealtest")

In [10]:
json_to_csv(json_crawler(balance_sheet), "../Output/", "bsrealtest")

In [13]:
def parse_record_tl(data):
    """
    Iterate through ColData or Header.ColData data frame. 
    Check what kind of data each data frame contains.
    Parse each record in data frame accordingly and return values.
    """

    # regular expression matching date format
    rex = re.compile("^[0-9]{4}-[0-9]{2}-[0-9]{2}$")

    # check which ColData data frame is being parsed
    for row in range(data.shape[0]):
        # transaction detail data frame
        if re.search(rex, data['value'][0]):
            td   = data['value'][0]
            tt   = data['value'][1]
            ttid = data['id'][1]
            dn   = data['value'][2]
            p    = data['value'][3]
            vn   = data['value'][4]
            vid  = data['id'][4]
            m    = data['value'][5]
            s    = data['value'][6]
            sid  = data['id'][6]
            ta   = data['value'][7]
            
            vals = {
                'TransactionDate' : td,
                'TransactionType' : tt,
                'TransactionTypeID' : ttid,
                'DocumentNumber' : dn,
                'Posting' : p,
                'VendorName' : vn,
                'VendorID' : vid,
                'Memo' : m,
                'Split' : s,
                'SplitID' : sid,
                'TransactionAmount' : ta
                }

            # replace missing values           
            for k in vals:
                if not vals[k]:
                    vals[k] = "NA"
            
            return vals

def json_crawler_tl(data):
    # If data is dictionary returned by json.load(), then flatten it to initial pandas data frame
    if isinstance(data, dict):
        data = pd.json_normalize(data)
        return_value = json_crawler_tl(data)
        return return_value 
    
    # Crawl through JSON file, normalizing by different keys to find account data and save to lst
    else:
        # Initialize list to hold account data objects
        lst = []
        acct_names = []
        
        for row in range(data.shape[0]):
            # Normalize by 'ColData'
            try:
                record_data = pd.json_normalize(data['ColData'][row])
                lst.append(parse_record_tl(record_data))
            except:
                pass
            # Normalize by 'Header.ColData'
            try:
                record_data = pd.json_normalize(data['Header.ColData'][row])
                lst.append(parse_record_tl(record_data))
                # acct_names.append(
                #     {'AccountName' : parse_record_gl_tl(record_data)['AccountName']},
                #     {'AccountID' : parse_record_gl_tl(record_data)['AccountID']})
            except:
                pass
            # Normalize by 'Rows.Row'
            try:
                row_data = pd.json_normalize(data['Rows.Row'][row])
                # Capture lst and return value before recursive function call
                return_value = json_crawler_tl(row_data)
                lst = lst + return_value
            except:
                pass

        return lst


In [15]:
json_to_csv(json_crawler_tl(transaction_list), "../Output/", "tlrealtest")