In [34]:
import json
import pandas as pd

In [35]:
# Load the JSON log file
file_path = '<path to log file>'

In [36]:
with open(file_path, 'r') as file:
    log_data = json.load(file)

In [37]:
# Define the fields to extract
main_fields_to_extract = ['eventId', 'category', 'service']
context_fields_to_extract = ['dataStore', 'numberOfStudies', 'numberOfSeries', 'numberOfInstances', 'numberOfFrames']
query_fields_to_extract = ['startTime', 'endTime', 'dicomDataRequestTimeMs', 'ndFramesRequestTimeMs', 'backendTimeMs', 'frontendTimeMs', 'totalTimeMs']
correlation_fields_to_extract = ['requestT0', 'sessionId', 'actionId', 'customId']

In [38]:
# Extract the specified fields from IF_DATASETLOADER_PERF_01 log events
extracted_data = []
for event in log_data:
    if event.get('eventId') == 'IF_DATASETLOADER_PERF_01':
        extracted_event = {field: event.get(field, None) for field in main_fields_to_extract}
        
        # Check for the context node inside payload and extract its fields
        if 'payload' in event and 'context' in event['payload']:
            context = event['payload']['context']
            for field in context_fields_to_extract:
                extracted_event[field] = context.get(field, None)
            
            # Check for the query node inside steps and extract its fields
            if 'steps' in context and 'query' in context['steps']:
                query = context['steps']['query']
                for field in query_fields_to_extract:
                    value = query.get(field, None)
                    if field in ['startTime', 'endTime'] and value is not None:
                        extracted_event[field] = str(value)  # Convert startTime and endTime to string to prevent exponential form
                    else:
                        extracted_event[field] = value
                        
        # Check for the correlationId node and extract its fields
        if 'correlationId' in event:
            try:
                correlation_id = json.loads(event['correlationId'])
                for field in correlation_fields_to_extract:
                    value = correlation_id.get(field, None)
                    if field == 'requestT0' and value is not None:
                        # Convert requestT0 to string to prevent exponential form
                        extracted_event[field] = str(value)
                    else:
                        extracted_event[field] = value
            except json.JSONDecodeError:
                for field in correlation_fields_to_extract:
                    extracted_event[field] = None
        else:
            # If correlationId is not present, set the fields to None
            for field in correlation_fields_to_extract:
                extracted_event[field] = None
        
        extracted_data.append(extracted_event)

In [39]:
# Convert the extracted data into a pandas DataFrame
df = pd.DataFrame(extracted_data)

In [40]:
# Save the DataFrame to an Excel file
output_file_path = '<path>/metadata-performance.xlsx'
df.to_excel(output_file_path, index=False)

In [41]:
print(f"Extracted data has been saved to {output_file_path}")

Extracted data has been saved to /Users/sayoojcyriac/tech-career/github/log-service/log-parser/metadata-performance.xlsx
