In [1]:
import pandas as  pd
import glob

In [2]:
file_paths = glob.glob(f"./health_data/healthcare-datasets/*.json")
dfs = [pd.read_json(file_path) for file_path in file_paths]
combined_df = pd.concat(dfs, ignore_index=True)
entries = pd.json_normalize(combined_df['entry'])

In [3]:
entries['resource.resourceType'].unique()

array(['Patient', 'Encounter', 'Condition', 'Observation',
       'MedicationRequest', 'Immunization', 'Procedure', 'CarePlan',
       'AllergyIntolerance', 'DiagnosticReport'], dtype=object)

In [4]:
care_plan_data = entries[entries['resource.resourceType'] == 'CarePlan']
care_plan_data.reset_index(drop=True,inplace=True)
care_plan_data.columns

Index(['fullUrl', 'resource.id', 'resource.text.status', 'resource.text.div',
       'resource.extension', 'resource.identifier', 'resource.name',
       'resource.telecom', 'resource.gender', 'resource.birthDate',
       'resource.address', 'resource.multipleBirthBoolean', 'resource.photo',
       'resource.resourceType', 'resource.status', 'resource.class.code',
       'resource.type', 'resource.patient.reference', 'resource.period.start',
       'resource.period.end', 'resource.reason.coding',
       'resource.clinicalStatus', 'resource.verificationStatus',
       'resource.code.coding', 'resource.subject.reference',
       'resource.context.reference', 'resource.onsetDateTime',
       'resource.abatementDateTime', 'resource.encounter.reference',
       'resource.effectiveDateTime', 'resource.valueQuantity.value',
       'resource.valueQuantity.unit', 'resource.valueQuantity.system',
       'resource.valueQuantity.code', 'resource.stage.coding',
       'resource.medicationCodeableCo

In [5]:
care_plan_category = care_plan_data.get('resource.category',None)
if not care_plan_category is None:
    care_plan_code = pd.json_normalize(care_plan_category,[0,'coding']).get('code',None)
else:
    care_plan_code = None
care_plan_code

0            53950000
1           134435003
2     326051000000105
3     326051000000105
4     872781000000100
           ...       
73          412776001
74          182964004
75          385691007
76           47387005
77          385691007
Name: code, Length: 78, dtype: object

In [6]:
care_plan_patient_id = care_plan_data['resource.subject.reference'].str.replace('urn:uuid:','')
care_plan_patient_id

0     65d12976-9588-4cfa-a795-216302a2ece9
1     65d12976-9588-4cfa-a795-216302a2ece9
2     a195633e-d36d-4abe-92bb-02994e9cb348
3     a195633e-d36d-4abe-92bb-02994e9cb348
4     a195633e-d36d-4abe-92bb-02994e9cb348
                      ...                 
73    1bd855fa-45c4-4c23-a608-fdeeb3bc2d8b
74    1bd855fa-45c4-4c23-a608-fdeeb3bc2d8b
75    00d869e2-6793-4ebf-9340-38bf18d223c4
76    a5399e95-981c-4a78-8019-873662fc7901
77    a5399e95-981c-4a78-8019-873662fc7901
Name: resource.subject.reference, Length: 78, dtype: object

In [7]:
care_plan_encounter_id = care_plan_data['resource.context.reference'].str.replace('urn:uuid:','')
care_plan_encounter_id

0     ddc2fb3c-6ea7-4b66-931b-0a0b4bfd3a75
1     80fd8fc5-4770-4e35-b31c-4d7b1be19c2f
2     fe054716-3d1d-4652-8564-01a86d58cd87
3     fe054716-3d1d-4652-8564-01a86d58cd87
4     9b133606-85fd-4199-adf4-2fdf90027fed
                      ...                 
73    dc68d226-b62b-45b8-baab-a1cc6d26427f
74    dc68d226-b62b-45b8-baab-a1cc6d26427f
75    8625df2e-3c5d-4dc2-83c7-714a126d407e
76    14e39e12-688a-4a2d-942d-d761cff4123e
77    7f88e6e7-b838-497a-86d6-aaf35b8cdf29
Name: resource.context.reference, Length: 78, dtype: object

In [8]:
care_plan_activity = care_plan_data['resource.activity']
care_plan_activity

0     [{'detail': {'code': {'coding': [{'system': 'h...
1     [{'detail': {'code': {'coding': [{'system': 'h...
2     [{'detail': {'code': {'coding': [{'system': 'h...
3     [{'detail': {'code': {'coding': [{'system': 'h...
4     [{'detail': {'code': {'coding': [{'system': 'h...
                            ...                        
73    [{'detail': {'code': {'coding': [{'system': 'h...
74    [{'detail': {'code': {'coding': [{'system': 'h...
75    [{'detail': {'code': {'coding': [{'system': 'h...
76    [{'detail': {'code': {'coding': [{'system': 'h...
77    [{'detail': {'code': {'coding': [{'system': 'h...
Name: resource.activity, Length: 78, dtype: object

In [9]:
intermediate_df = pd.DataFrame({
    'Care Plan Code':care_plan_code,
    'Patient ID':care_plan_patient_id,
    'Encounter ID':care_plan_encounter_id,
    'care plan activity':care_plan_activity
})
intermediate_df.head()

Unnamed: 0,Care Plan Code,Patient ID,Encounter ID,care plan activity
0,53950000,65d12976-9588-4cfa-a795-216302a2ece9,ddc2fb3c-6ea7-4b66-931b-0a0b4bfd3a75,[{'detail': {'code': {'coding': [{'system': 'h...
1,134435003,65d12976-9588-4cfa-a795-216302a2ece9,80fd8fc5-4770-4e35-b31c-4d7b1be19c2f,[{'detail': {'code': {'coding': [{'system': 'h...
2,326051000000105,a195633e-d36d-4abe-92bb-02994e9cb348,fe054716-3d1d-4652-8564-01a86d58cd87,[{'detail': {'code': {'coding': [{'system': 'h...
3,326051000000105,a195633e-d36d-4abe-92bb-02994e9cb348,fe054716-3d1d-4652-8564-01a86d58cd87,[{'detail': {'code': {'coding': [{'system': 'h...
4,872781000000100,a195633e-d36d-4abe-92bb-02994e9cb348,9b133606-85fd-4199-adf4-2fdf90027fed,[{'detail': {'code': {'coding': [{'system': 'h...


## care plan acitivities

In [10]:
pd.set_option('display.max_colwidth',None)
intermediate_df['care plan activity'][0]

[{'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct',
      'code': '304510005',
      'display': 'Recommendation to avoid exercise'}]},
   'status': 'completed'}},
 {'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct',
      'code': '371605008',
      'display': 'Deep breathing and coughing exercises'}]},
   'status': 'completed'}}]

In [11]:
test = intermediate_df.explode('care plan activity')

In [14]:
test_care_plan_activity = test['care plan activity']

In [15]:
test_care_plan_activity.reset_index(drop=True)

0             {'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct', 'code': '304510005', 'display': 'Recommendation to avoid exercise'}]}, 'status': 'completed'}}
1        {'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct', 'code': '371605008', 'display': 'Deep breathing and coughing exercises'}]}, 'status': 'completed'}}
2                          {'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct', 'code': '135892000', 'display': 'Antenatal education'}]}, 'status': 'completed'}}
3                            {'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct', 'code': '226060000', 'display': 'Stress management'}]}, 'status': 'completed'}}
4                     {'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct', 'code': '171054004', 'display': 'Pregnancy diet education'}]}, 'status': 'completed'}}
                                                                                          ...                 

In [24]:
test_care_plan_activity

0            {'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct', 'code': '304510005', 'display': 'Recommendation to avoid exercise'}]}, 'status': 'completed'}}
0       {'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct', 'code': '371605008', 'display': 'Deep breathing and coughing exercises'}]}, 'status': 'completed'}}
1                         {'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct', 'code': '135892000', 'display': 'Antenatal education'}]}, 'status': 'completed'}}
1                           {'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct', 'code': '226060000', 'display': 'Stress management'}]}, 'status': 'completed'}}
1                    {'detail': {'code': {'coding': [{'system': 'http://snomed.info/sct', 'code': '171054004', 'display': 'Pregnancy diet education'}]}, 'status': 'completed'}}
                                                                                         ...                       

In [43]:
care_plan_activity_status = pd.json_normalize(test_care_plan_activity)['detail.status']
care_plan_activity_status

0      completed
1      completed
2      completed
3      completed
4      completed
         ...    
166    completed
167    completed
168    completed
169    completed
170    completed
Name: detail.status, Length: 171, dtype: object

In [47]:
care_plan_activity_code_name = pd.json_normalize(test_care_plan_activity,['detail','code','coding'])

In [48]:
care_plan_activity_code = care_plan_activity_code_name['code']
care_plan_activity_code

0      304510005
1      371605008
2      135892000
3      226060000
4      171054004
         ...    
166    408580007
167    183051005
168    226138001
169    183051005
170    408580007
Name: code, Length: 171, dtype: object

In [49]:
care_plan_activity_name = care_plan_activity_code_name['display']
care_plan_activity_name

0             Recommendation to avoid exercise
1        Deep breathing and coughing exercises
2                          Antenatal education
3                            Stress management
4                     Pregnancy diet education
                        ...                   
166    Physical activity target light exercise
167                     Recommendation to rest
168                          Alcohol-free diet
169                     Recommendation to rest
170    Physical activity target light exercise
Name: display, Length: 171, dtype: object

## Extract care plan activity data

In [3]:
import pandas as pd
import glob

def extract_care_plan_activity_data(root_directory_path):
    file_paths = glob.glob(f"{root_directory_path}*.json")
    dfs = [pd.read_json(file_path) for file_path in file_paths]
    combined_df = pd.concat(dfs, ignore_index=True)
    entries = pd.json_normalize(combined_df['entry'])

    care_plan_data = entries[entries['resource.resourceType'] == 'CarePlan']
    care_plan_data.reset_index(drop=True,inplace=True)
    #main care plan code
    care_plan_category = care_plan_data.get('resource.category',None)
    if not care_plan_category is None:
        care_plan_code = pd.json_normalize(care_plan_category,[0,'coding']).get('code',None)
    else:
        care_plan_code = None
    care_plan_patient_id = care_plan_data['resource.subject.reference'].str.replace('urn:uuid:','')
    care_plan_encounter_id = care_plan_data['resource.context.reference'].str.replace('urn:uuid:','')
    care_plan_activity = care_plan_data['resource.activity']

    intermediate_df = pd.DataFrame({
    'Care Plan Code':care_plan_code,
    'Patient ID':care_plan_patient_id,
    'Encounter ID':care_plan_encounter_id,
    'care plan activity':care_plan_activity
    })

    #care plan activity data extraction
    result_df = intermediate_df.explode('care plan activity')
    result_df.reset_index(drop=True,inplace=True)
    res_care_plan_activity = result_df['care plan activity']
    #status
    care_plan_activity_status = pd.json_normalize(res_care_plan_activity)['detail.status']
    #code and name
    care_plan_activity_code_name_raw = pd.json_normalize(res_care_plan_activity,['detail','code','coding'])
    care_plan_activity_code = care_plan_activity_code_name_raw['code']
    care_plan_activity_name = care_plan_activity_code_name_raw['display']

    #add the activity columns
    result_df['Care Plan Activity Code'] = care_plan_activity_code
    result_df['Care Plan Activity Name'] = care_plan_activity_name
    result_df['Activity Status'] = care_plan_activity_status

    result_df.drop(columns=['care plan activity'],inplace=True)

    result_df.to_csv('./output_csv/careplanactivity_data.csv',index=False)


In [4]:
extract_care_plan_activity_data('./health_data/healthcare-datasets/')

## care plan activity process

In [5]:
import pandas as pd
care_plan_activity = pd.read_csv('./output_csv/careplanactivity_data.csv')

In [6]:
care_plan_activity.head()

Unnamed: 0,Care Plan Code,Patient ID,Encounter ID,Care Plan Activity Code,Care Plan Activity Name,Activity Status
0,53950000,65d12976-9588-4cfa-a795-216302a2ece9,ddc2fb3c-6ea7-4b66-931b-0a0b4bfd3a75,304510005,Recommendation to avoid exercise,completed
1,53950000,65d12976-9588-4cfa-a795-216302a2ece9,ddc2fb3c-6ea7-4b66-931b-0a0b4bfd3a75,371605008,Deep breathing and coughing exercises,completed
2,134435003,65d12976-9588-4cfa-a795-216302a2ece9,80fd8fc5-4770-4e35-b31c-4d7b1be19c2f,135892000,Antenatal education,completed
3,134435003,65d12976-9588-4cfa-a795-216302a2ece9,80fd8fc5-4770-4e35-b31c-4d7b1be19c2f,226060000,Stress management,completed
4,134435003,65d12976-9588-4cfa-a795-216302a2ece9,80fd8fc5-4770-4e35-b31c-4d7b1be19c2f,171054004,Pregnancy diet education,completed


In [9]:
test = care_plan_activity.groupby('Care Plan Code').agg({
    'Care Plan Activity Code':lambda x: list(x.unique())
})
test['Care Plan Activity Count'] = care_plan_activity.groupby('Care Plan Code')['Care Plan Activity Code'].nunique()
test

Unnamed: 0_level_0,Care Plan Activity Code,Care Plan Activity Count
Care Plan Code,Unnamed: 1_level_1,Unnamed: 2_level_1
47387005,"[183051005, 226138001]",2
53950000,"[304510005, 371605008, 710081004]",3
91251008,"[229586001, 229070002]",2
134435003,"[135892000, 226060000, 171054004]",3
182964004,"[133918004, 408957008, 243072006]",3
225358003,"[385949008, 439830001]",2
385691007,"[183051005, 408580007]",2
386257007,"[850261000000100, 710125008, 315043002]",3
412776001,"[710081004, 426990007, 229065009, 15081005]",4
698358001,"[229065009, 226060000, 710081004, 226234005]",4
