In [6]:
import pandas as pd
import json
from concurrent.futures import ThreadPoolExecutor, as_completed

def flatten_json(y):
    """A recursive function to flatten nested JSON."""
    out = {}

    def flatten(x, name=''):
        # If the value is a dictionary, then it's nested
        # We need to recursively call flatten
        if type(x) is dict:
            for a in x:
                flatten(x[a], f'{name}{a}_')
        # If the value is a list, we need to handle it by iterating each element
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, f'{name}{i}_')
                i += 1
        # Base case: the value is neither a dict nor a list, so we can directly add it to the output
        else:
            out[name[:-1]] = x

    flatten(y)
    return out

def process_line(line):
    """Process a single line of JSON data and return a flattened DataFrame."""
    # Convert the JSON string to a dictionary
    data = json.loads(line)
    
    # Use the custom flatten_json function to flatten the nested JSON
    flat_data = flatten_json(data)
    
    # Convert the flattened data into a DataFrame
    df = pd.DataFrame([flat_data])
    
    return df

def read_and_flatten_ndjson(file_path):
    """Read an NDJSON file, flatten it, and return a concatenated DataFrame."""
    results = []
    
    # Initialize a ThreadPoolExecutor to parallelize the processing
    with ThreadPoolExecutor() as executor:
        # Open the NDJSON file and submit each line for processing
        futures = []
        with open(file_path, 'r') as file:
            for line in file:
                futures.append(executor.submit(process_line, line))
        
        # Collect the results as they complete
        for future in as_completed(futures):
            results.append(future.result())
    
    # Concatenate all the DataFrames into a single DataFrame
    final_df = pd.concat(results, ignore_index=True)
    
    return final_df

# Path to the NDJSON file
file_path = '/workspaces/synthea_dw/data/fhir/Claim.ndjson'

# Process the NDJSON file and flatten it into a DataFrame
flattened_df = read_and_flatten_ndjson(file_path)

# Display the first few rows of the flattened DataFrame
flattened_df.head()

Unnamed: 0,resourceType,id,status,type_coding_0_system,type_coding_0_code,use,patient_reference,billablePeriod_start,billablePeriod_end,created,...,item_59_productOrService_coding_0_system,item_59_productOrService_coding_0_code,item_59_productOrService_coding_0_display,item_59_productOrService_text,item_59_net_value,item_59_net_currency,item_11_diagnosisSequence_0,item_19_informationSequence_0,item_20_informationSequence_0,item_20_diagnosisSequence_0
0,Claim,1e2ea6b2-e4a9-fe03-18a1-ee5751ac9387,active,http://terminology.hl7.org/CodeSystem/claim-type,pharmacy,claim,Patient/e482bf0c-931b-f8d9-7a0f-cd15fead9ed2,1997-04-24T00:52:02+00:00,1997-04-24T01:07:02+00:00,1997-04-24T01:07:02+00:00,...,,,,,,,,,,
1,Claim,53852cce-33e6-283d-d470-a11dc28ebfd7,active,http://terminology.hl7.org/CodeSystem/claim-type,professional,claim,Patient/e482bf0c-931b-f8d9-7a0f-cd15fead9ed2,1997-05-16T20:07:29+00:00,1997-05-17T01:28:08+00:00,1997-05-17T01:28:08+00:00,...,,,,,,,,,,
2,Claim,298042ae-8403-81d0-93ac-253327de0ef1,active,http://terminology.hl7.org/CodeSystem/claim-type,professional,claim,Patient/e482bf0c-931b-f8d9-7a0f-cd15fead9ed2,1959-04-30T00:52:02+00:00,1959-04-30T01:33:11+00:00,1959-04-30T01:33:11+00:00,...,,,,,,,,,,
3,Claim,1cd31d5e-ed58-904a-a35c-19d6cc5c96e3,active,http://terminology.hl7.org/CodeSystem/claim-type,professional,claim,Patient/452e7848-c7b4-f5d5-f4f1-6ff142c5e16b,1996-03-29T06:41:13+00:00,1996-03-29T09:20:13+00:00,1996-03-29T09:20:13+00:00,...,,,,,,,,,,
4,Claim,6f5f4f12-64ea-6f97-2950-cecc47632c7d,active,http://terminology.hl7.org/CodeSystem/claim-type,professional,claim,Patient/e482bf0c-931b-f8d9-7a0f-cd15fead9ed2,1979-08-09T00:52:02+00:00,1979-08-09T01:33:28+00:00,1979-08-09T01:33:28+00:00,...,,,,,,,,,,


In [8]:
flattened_df.to_csv('/workspaces/synthea_dw/data/fhir/Claim.csv', index=False)