In [2]:
import pandas as pd
import json
import re

transaction_list_path = "C:\\Users\\paulcassidy\\OneDrive - Archdiocese of Chicago\\Documents\\JSON_Parsing\\RawData\\TransactionList.json"
general_ledger_path   = "C:\\Users\\paulcassidy\\OneDrive - Archdiocese of Chicago\\Documents\\JSON_Parsing\\RawData\\GeneralLedger.json"

with open(transaction_list_path) as f:
    transaction_list = json.load(f)

with open(general_ledger_path) as f:
    general_ledger = json.load(f)

In [2]:
def parse_record_gl(data):
    """
    Iterate through ColData or Header.ColData data frame. 
    Check what kind of data each data frame contains.
    Parse each record in data frame accordingly and return values.
    """
    
    # regular expression matching date format
    date_rex = re.compile("^[0-9]{4}-[0-9]{2}-[0-9]{2}$")

    # check which ColData data frame is being parsed
    for row in range(data.shape[0]):
        # transaction detail data frame
        if re.search(date_rex, data['value'][0]):
            td   = data['value'][0]
            tt   = data['value'][1]
            ttid = data['id'][1]
            dn   = data['value'][2]
            vn   = data['value'][3]
            vid  = data['id'][3]
            m    = data['value'][4]
            s    = data['value'][5]
            sid  = data['id'][5]
            ta   = data['value'][6]
            nab  = data['value'][7]
            
            vals = {
                'TransactionDate' : td,
                'TransactionType' : tt,
                'TransactionTypeID' : ttid,
                'DocumentNumber' : dn,
                'VendorName' : vn,
                'VendorID' : vid,
                'Memo' : m,
                'Split' : s,
                'SplitID' : sid,
                'TransactionAmount' : ta,
                'NewAccountBalance' : nab
            }

            # replace missing values           
            for k in vals:
                if not vals[k]:
                    vals[k] = "NA"
            
            return vals

        # beginning balance data frame
        elif data['value'][0] == "Beginning Balance":
            bb   = data['value'][7]
            return {
                'BeginningBalance' : bb
            }

        # name data frame
        else:
            if 'id' in data:
                an  = data['value'][0]
                aid = data['id'][0]
            else:
                an  = data['value'][0]
                aid = "NA"
                
            return {
                'AccountName' : an,
                'AccountID' : aid
            } 

acc_name_id = {}

def json_crawler_gl(data):
    # initialize global COUNT
    # global count
    global acc_name_id

    # If data is dictionary returned by json.load(), then flatten it to initial pandas data frame
    if isinstance(data, dict):
        data = pd.json_normalize(data)
        return_value = json_crawler_gl(data)
        return return_value 
    
    # Crawl through JSON file, normalizing by different keys to find account data and save to lst
    else:
        # Initialize list to hold account data objects
        lst = []
        
        for row in range(data.shape[0]):
            # Normalize by 'Header.ColData'
            try:
                record_data = pd.json_normalize(data['Header.ColData'][row])
                lst.append(parse_record_gl(record_data))
                # count += 1
                acc_name_id = parse_record_gl(record_data)
            except:
                pass
            # Normalize by 'ColData'
            try:
                record_data = pd.json_normalize(data['ColData'][row])
                lst.append(parse_record_gl(record_data))
                # lst[count].update(parse_record_gl(record_data))
            except:
                pass
            # Normalize by 'Rows.Row'
            try:
                row_data = pd.json_normalize(data['Rows.Row'][row])
                # Capture lst and return value before recursive function call
                return_value = json_crawler_gl(row_data)
                lst = lst + return_value
            except:
                pass

        return lst

In [6]:
data = {
    'Col1' : [
        'fizz',
        'buzz',
        'bang'
    ],
    'Col2' : [
        'foo',
        'bar',
        'baz'
    ]
}

pd.DataFrame(data)

Unnamed: 0,Col1,Col2
0,fizz,foo
1,buzz,bar
2,bang,baz


In [8]:
data = [
    {
        'Col1' : 'fizz',
        'Col2' : 'foo'
    }
]

pd.DataFrame(data)

Unnamed: 0,Col1,Col2
0,fizz,foo
