In [2]:
import pandas as pd
import json

In [3]:
with open('patientA.json', 'r') as file:
    patient_data = json.load(file)

#### Print Json Data 

In [14]:
patient_data

{'resourceType': 'Patient',
 'id': 'example-patient-1',
 'meta': {'versionId': '1', 'lastUpdated': '2025-08-05T15:00:00Z'},
 'identifier': [{'use': 'usual',
   'type': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/v2-0203',
      'code': 'MR'}],
    'text': 'Medical Record Number'},
   'system': 'urn:oid:1.2.36.146.595.217.0.1',
   'value': '123456'}],
 'active': True,
 'name': [{'use': 'official', 'family': 'Smith', 'given': ['John']}],
 'gender': 'male',
 'birthDate': '1985-05-15',
 'telecom': [{'system': 'phone', 'value': '555-555-5555', 'use': 'home'}],
 'address': [{'use': 'home',
   'line': ['123 Main St'],
   'city': 'Anytown',
   'state': 'CA',
   'postalCode': '90210',
   'country': 'USA'}]}

#### Extracting MRN

In [20]:
identifiers = patient_data.get("identifier", [])
identifiers

[{'use': 'usual',
  'type': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/v2-0203',
     'code': 'MR'}],
   'text': 'Medical Record Number'},
  'system': 'urn:oid:1.2.36.146.595.217.0.1',
  'value': '123456'}]

In [30]:
identifier_first = identifiers[0]
identifier_first 

{'use': 'usual',
 'type': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/v2-0203',
    'code': 'MR'}],
  'text': 'Medical Record Number'},
 'system': 'urn:oid:1.2.36.146.595.217.0.1',
 'value': '123456'}

In [35]:
identifier_type = identifier_first.get("type", {})
identifier_type

{'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/v2-0203',
   'code': 'MR'}],
 'text': 'Medical Record Number'}

In [36]:
identifier_text = identifier_type.get("text", "")
identifier_text


'Medical Record Number'

In [37]:
identifier_value = identifier_first.get("value", {})
identifier_value

'123456'

In [42]:
patient_name = patient_data.get("name", [])
patient_name

[{'use': 'official', 'family': 'Smith', 'given': ['John']}]

#### Patient Name Extraction 

In [46]:
pt_given_name = patient_name[0].get("given", [])
pt_given_name = pt_given_name[0]
pt_given_name

'John'

In [54]:
def pt_given_name_func(patient_data):
    patient_name = patient_data.get("name", [])
    pt_given_name = patient_name[0].get("given", [])
    if pt_given_name is None:
        return "No given name found"
    pt_given_name = pt_given_name[0]
    return pt_given_name

In [55]:
given_name = pt_given_name_func(patient_data)
given_name

'John'

In [None]:
def pt_given_name_func(patient_name):
    patient_name = patient_data.get("name", [])
    pt_given_name_list = patient_name[0].get("given", []) # Safely get the list of given names, defaulting to an empty list
    if pt_given_name_list:    # Check if the list is not empty before trying to access the first item
        return pt_given_name_list[0]     # If the list has items, return the first one
    else:
        return "No given name found"# If the list is empty, return a message or None

In [48]:
pt_family_name = patient_name[0].get("family", [])
pt_family_name

'Smith'

In [None]:
# After confirming the logic works, we can put it into a function

def mrn_extract(patient_data):
    identifiers = patient_data.get("identifier", [])
    if not identifiers:
        return None
    
    identifier_first = identifiers[0]
    identifier_type = identifier_first.get("type", {})
    identifier_text = identifier_type.get("text", "")
    
    if identifier_text == "MRN" or identifier_text == "Medical Record Number":
        return identifier_first.get("value", None)
    
    return None

In [39]:
mrn = mrn_extract(patient_data)
mrn

'123456'

#### Add the value to the dictionary map

In [None]:
# Create a new, flattened dictionary to hold the extracted data
transformed_data = {
    "patient_id": patient_data.get("id"),
    "mrn": mrn, # Initialize with None, after having mrn_extract function
    "first_name": None,
    "last_name": None,
    "gender": patient_data.get("gender"),
    "birth_date": patient_data.get("birthDate"),
    "phone_number": None,
    "address_line": None,
    "city": None,
    "state": None,
    "postal_code": None
}

#### Convert dictionary to Data Frame

In [None]:
df = pd.DataFrame([transformed_data])

In [None]:
df