In [1]:
import pandas as pd
import json
import re
import sys
import os
sys.path.append(os.path.abspath("../final/final"))
import final

general_ledger_path   = "..\\RawData\\GeneralLedgerReal.json"
with open(general_ledger_path) as f:
    general_ledger = json.load(f)

# Sequencing
1. Every time the function successfully runs `parse_record_gl()` in the `Header.ColData` block, save that returned dictionary
    1. don't return it to `lst` yet.

In [106]:
def parse_record_gl(data):
    """
    Iterate through ColData or Header.ColData data frame. 
    Check what kind of data each data frame contains.
    Parse each record in data frame accordingly and return values.
    """
    
    # regular expression matching date format
    date_rex = re.compile("^[0-9]{4}-[0-9]{2}-[0-9]{2}$")

    # check which ColData data frame is being parsed
    for row in range(data.shape[0]):
        # transaction detail data frame
        if re.search(date_rex, data['value'][0]):
            td   = data['value'][0]
            tt   = data['value'][1]
            ttid = data['id'][1]
            dn   = data['value'][2]
            a    = data['value'][3]
            vn   = data['value'][4]
            vid  = data['id'][4]
            m    = data['value'][5]
            s    = data['value'][6]
            sid  = data['id'][6]
            ta   = data['value'][7]
            nab  = data['value'][8]
            
            vals = {
                'AccountName' : "",
                'AccountID' : "",
                'BeginningBalance' : "",
                'TransactionDate' : td,
                'TransactionType' : tt,
                'TransactionTypeID' : ttid,
                'DocumentNumber' : dn,
                'Adjustment' : a,
                'VendorName' : vn,
                'VendorID' : vid,
                'Memo' : m,
                'Split' : s,
                'SplitID' : sid,
                'TransactionAmount' : ta,
                'NewAccountBalance' : nab
            }

            # replace missing values           
            for k in vals:
                if not vals[k]:
                    vals[k] = "NULL"
            
            return vals

        # beginning balance data frame
        elif data['value'][0] == "Beginning Balance":
            bb   = data['value'][8]
            return {
                'BeginningBalance' : bb
            }

        # name data frame
        else:
            if 'id' in data:
                an  = data['value'][0]
                aid = data['id'][0]
            else:
                an  = data['value'][0]
                aid = "NULL"
                
            return {
                'AccountName' : an,
                'AccountID' : aid,
                'BeginningBalance' : "",
                'TransactionDate' : "",
                'TransactionType' : "",
                'TransactionTypeID' : "",
                'DocumentNumber' : "",
                'Adjustment' : "",
                'VendorName' : "",
                'VendorID' : "",
                'Memo' : "",
                'Split' : "",
                'SplitID' : "",
                'TransactionAmount' : "",
                'NewAccountBalance' : ""
            } 

def assign_col_data(coldata, end):
    if coldata['TransactionDate'] == "":
        end['BeginningBalance'] = coldata['BeginningBalance']
    else:
        keys = [
            'TransactionDate',
            'TransactionType',
            'TransactionTypeID',
            'DocumentNumber',
            'Adjustment',
            'VendorName',
            'VendorID',
            'Memo',
            'Split',
            'SplitID',
            'TransactionAmount',
            'NewAccountBalance'
        ]
        
        for i in range(len(keys)):
            end[keys[i]] = coldata[keys[i]]

    return end

def impute_null(dct):
    for k in dct:
        if not dct[k]:
            dct[k] = "NULL"
    return dct

row_dict = {}
index    = 0

def json_crawler_gl(data):
    # initialize global COUNT
    # global count
    global row_dict

    # If data is dictionary returned by json.load(), then flatten it to initial pandas data frame
    if isinstance(data, dict):
        data = pd.json_normalize(data)
        return_value = json_crawler_gl(data)
        return return_value 
    
    # Crawl through JSON file, normalizing by different keys to find account data and save to lst
    else:
        # Initialize list to hold account data objects
        lst = []
        
        for row in range(data.shape[0]):
            # Normalize by 'Header.ColData'
            try:
                record_data = pd.json_normalize(data['Header.ColData'][row])
                # print(type(record_data))
                # print(record_data)
                # lst.append(parse_record_gl(record_data))
                row_dict = impute_null(parse_record_gl(record_data))
                lst.append(row_dict)
                # lst.append(impute_null(parse_record_gl(record_data)))
                # print(parse_record_gl(record_data))
                # print(row_dict)
                # count += 1
            # except Exception as e:
            #     print(e)
            except:
                pass
            # Normalize by 'ColData'
            try:
                record_data = pd.json_normalize(data['ColData'][row])
                if row == 0:
                    row_dict['BeginningBalance'] = parse_record_gl(record_data)['BeginningBalance']
                    # lst[-1]['BeginningBalance'] = parse_record_gl(record_data)['BeginningBalance']
                    # print(row_dict['BeginningBalance'])
                    # lst.append(row_dict)
                else:
                    # print(parse_record_gl(record_data))
                    # lst.append(assign_col_data(parse_record_gl(record_data), row_dict))
                    new_dict = dict(assign_col_data(parse_record_gl(record_data), row_dict))
                    # print(row_dict)
                    lst.append(new_dict)
                    # lst.append(assign_col_data(parse_record_gl(record_data), lst[-1]))
                    # print(lst)
                # lst.append(parse_record_gl(record_data))
            # except Exception as e:
            #     print(e)
            except:
                pass
            # Normalize by 'Rows.Row'
            try:
                row_data = pd.json_normalize(data['Rows.Row'][row])
                # Capture lst and return value before recursive function call
                return_value = json_crawler_gl(row_data)
                lst = lst + return_value
            except:
                pass

        return lst

In [107]:
final.json_to_csv(json_crawler_gl(general_ledger), "..\\Output", "glrealtest")

In [90]:
 lst = [
     {
         'BeginningBalance' : "489032.20"
     }
 ]
 
 lst[-1]['BeginningBalance']

'489032.20'