In [64]:
import requests
import zipfile
import os
import io
import pandas as pd

# 1. Download the file
url = "https://synthetichealth.github.io/synthea-sample-data/downloads/latest/synthea_sample_data_csv_latest.zip"
response = requests.get(url)
response.raise_for_status()  # Check if the request was successful

# 2. Extract the ZIP file into a temporary directory
with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
    # Create a temporary directory to store the files
    extracted_path = '/tmp/synthea_sample_data'
    os.makedirs(extracted_path, exist_ok=True)
    zip_ref.extractall(extracted_path)
    print(f"Extracted to {extracted_path}")

# 3. List files extracted
extracted_files = os.listdir(extracted_path)
print("Extracted files:", extracted_files)


# 4. Create DataFrames for each CSV file
csv_files = [
    "careplans.csv", 
    "conditions.csv", 
    "encounters.csv", 
    "medications.csv", 
    "observations.csv", 
    "patients.csv", 
    "procedures.csv", 
    "payers.csv", 
    "claims.csv",
    'organizations.csv',
    'providers.csv',
]

# Dictionary to store DataFrames
dataframes_100 = {}

# Read each CSV file into a unique DataFrame
for csv_file in csv_files:
    file_path = os.path.join(extracted_path, csv_file)
    if os.path.exists(file_path):
        # Load the CSV into a Pandas DataFrame
        dataframes_100[csv_file] = pd.read_csv(file_path)
        print(f"Loaded {csv_file}")
    else:
        print(f"{csv_file} not found in the extracted files.")

Extracted to /tmp/synthea_sample_data
Extracted files: ['allergies.csv', 'careplans.csv', 'claims.csv', 'claims_transactions.csv', 'conditions.csv', 'csv', 'devices.csv', 'encounters.csv', 'imaging_studies.csv', 'immunizations.csv', 'medications.csv', 'observations.csv', 'organizations.csv', 'patients.csv', 'payers.csv', 'payer_transitions.csv', 'procedures.csv', 'providers.csv', 'supplies.csv']
Loaded careplans.csv
Loaded conditions.csv
Loaded encounters.csv
Loaded medications.csv
Loaded observations.csv
Loaded patients.csv
Loaded procedures.csv
Loaded payers.csv
Loaded claims.csv
Loaded organizations.csv
Loaded providers.csv


In [65]:
careplans_df_100 = dataframes_100.get('careplans.csv')
conditions_df_100 = dataframes_100.get('conditions.csv')
encounters_df_100 = dataframes_100.get('encounters.csv')
medications_df_100 = dataframes_100.get('medications.csv')
observations_df_100 = dataframes_100.get('observations.csv')
patients_df_100 = dataframes_100.get('patients.csv')
procedures_df_100 = dataframes_100.get('procedures.csv')
payers_df_100 = dataframes_100.get('payers.csv')
claims_df_100 = dataframes_100.get('claims.csv')
organizations_df_100 = dataframes_100.get('organizations.csv')
providers_df_100 = dataframes_100.get('providers.csv')

In [93]:

#rename Id to careplan_id
careplans_df_100.rename(columns={'Id': 'careplan_id'}, inplace=True)
#rename Patient to patient_id
careplans_df_100.rename(columns={'PATIENT': 'patient_id'}, inplace=True)
#rename encounter_id to encounter_id
careplans_df_100.rename(columns={'ENCOUNTER': 'encounter_id'}, inplace=True)
careplans_df_100

Unnamed: 0,careplan_id,START,STOP,patient_id,encounter_id,CODE,DESCRIPTION,REASONCODE,REASONDESCRIPTION
0,a13c3a76-fd15-e00d-a6f7-2c3735fce001,2006-02-07,,8313967b-6432-89aa-090f-4d2b81558360,359aaf0b-6ce5-8a6d-ce7d-7dca74ab8a6c,718361005,Weight management program (regime/therapy),,
1,d7252d0d-0699-c4db-6978-b92fc97ccd13,2016-10-25,2017-05-30,8313967b-6432-89aa-090f-4d2b81558360,0ab8c1de-e107-474e-8c68-6fe49a046020,134435003,Routine antenatal care (regime/therapy),,
2,00fe4941-cebd-bbdc-c423-0f527155221d,2020-08-31,2020-10-11,8313967b-6432-89aa-090f-4d2b81558360,a7e46c3f-ee94-63bb-8e5d-d4f41233ab38,133901003,Burn care (regime/therapy),4.031900e+08,Epidermal burn of skin (disorder)
3,cf240348-ca10-2966-b25a-f1aad69c3731,2021-10-12,2022-05-24,8313967b-6432-89aa-090f-4d2b81558360,14813d1d-9f54-11d1-bb79-5b9201e9cc60,134435003,Routine antenatal care (regime/therapy),,
4,01bf593e-f898-3b0f-4d89-1403492059ea,2018-12-19,2019-01-28,c525e0a9-c37c-419c-db08-86080b4b774d,20b1f81d-9450-c725-8b7d-192dad4e02ea,773513001,Physiotherapy care plan (record artifact),4.446501e+07,Sprain of ankle (disorder)
...,...,...,...,...,...,...,...,...,...
420,cc947a1e-b3dc-0bda-1f12-b5c3ae331060,1966-03-25,,59be6f33-cd77-774a-8e0f-3286eff96d1c,46349625-c3f9-b5da-38e3-93a434e849b9,735985000,Diabetes self management plan (record artifact),7.146280e+08,Prediabetes (finding)
421,5461ce81-0b79-1273-6e79-9798b7c06809,1994-04-17,,59be6f33-cd77-774a-8e0f-3286eff96d1c,8376d569-12d3-1ce4-b1a5-9ae58d4f87a1,734163000,Care plan (record artifact),6.057300e+07,Aortic valve stenosis (disorder)
422,b3f6a258-501a-ec27-1986-0fb1ba009af2,1994-05-26,,59be6f33-cd77-774a-8e0f-3286eff96d1c,debd253d-6a5b-43f6-1b98-c439edf86c3a,736372004,Discharge care plan (record artifact),1.231000e+12,History of aortic valve replacement (situation)
423,08e1c507-ffca-ee00-3f5b-8633509e6428,2000-04-14,,59be6f33-cd77-774a-8e0f-3286eff96d1c,6fee159b-1494-9023-49bf-62748ff4e3c4,736285004,Hyperlipidemia clinical management plan (recor...,5.582200e+07,Hyperlipidemia (disorder)


In [67]:
#rename Id to patient_id
patients_df_100.rename(columns={'Id': 'patient_id'}, inplace=True)

patients_df_100.head()

Unnamed: 0,patient_id,BIRTHDATE,DEATHDATE,SSN,DRIVERS,PASSPORT,PREFIX,FIRST,MIDDLE,LAST,...,CITY,STATE,COUNTY,FIPS,ZIP,LAT,LON,HEALTHCARE_EXPENSES,HEALTHCARE_COVERAGE,INCOME
0,dbaa48b1-720a-f5f2-9360-ae6bb9321037,2022-04-19,,999-46-8625,,,,Georgine810,Caroyln232,Jacobs452,...,Marlborough,Massachusetts,Middlesex County,25017.0,1752,42.30859,-71.567107,6002.42,0.0,60469
1,8313967b-6432-89aa-090f-4d2b81558360,1989-12-26,,999-31-6504,S99917070,X12534338X,Mrs.,Suzanna632,Karissa612,Fay398,...,Leominster,Massachusetts,Worcester County,25027.0,1420,42.615008,-71.775108,257357.78,523549.41,43633
2,c525e0a9-c37c-419c-db08-86080b4b774d,1988-07-06,,999-84-5991,S99966682,X57704278X,Mr.,Johnson679,Warren653,Mann644,...,Plymouth,Massachusetts,Plymouth County,25023.0,2360,41.896981,-70.66451,7576.94,161526.02,12746
3,ecb49d9e-4eb1-4743-237d-d8020ecd4f86,1943-10-22,,999-81-8785,S99942513,X26350147X,Mr.,Freeman822,Lamont867,Schowalter414,...,Lawrence,Massachusetts,Essex County,25009.0,1843,42.721493,-71.165165,22735.16,527081.23,14577
4,8bab136f-0ae3-e4be-3ef0-62d007e0d267,2004-02-29,,999-93-6499,S99940744,X71774583X,Mr.,Barrett790,Alfredo17,McDermott739,...,Boston,Massachusetts,Suffolk County,25025.0,2111,42.3117,-71.107352,3621.16,37226.18,24550


In [68]:
#rename Id to encounter_id
encounters_df_100.rename(columns={'Id': 'encounter_id'}, inplace=True)
#rename Patient to patient_id
encounters_df_100.rename(columns={'PATIENT': 'patient_id'}, inplace=True)
#rename organization_id to organization_id
encounters_df_100.rename(columns={'ORGANIZATION': 'organization_id'}, inplace=True)

#rename payer_id to payer_id
encounters_df_100.rename(columns={'PAYER': 'payer_id'}, inplace=True)

#rename provider to provider_id
encounters_df_100.rename(columns={'PROVIDER': 'provider_id'}, inplace=True)

encounters_df_100.head()


Unnamed: 0,encounter_id,START,STOP,patient_id,organization_id,provider_id,payer_id,ENCOUNTERCLASS,CODE,DESCRIPTION,BASE_ENCOUNTER_COST,TOTAL_CLAIM_COST,PAYER_COVERAGE,REASONCODE,REASONDESCRIPTION
0,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,2022-04-19T00:36:40Z,2022-04-19T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,907b1d2f-2bf8-31b5-abc2-14d6b75faa96,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),136.8,347.38,0.0,,
1,1f391129-92f6-090b-d27e-48cdb42824c2,2022-05-24T00:36:40Z,2022-05-24T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,907b1d2f-2bf8-31b5-abc2-14d6b75faa96,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),136.8,272.8,0.0,,
2,e6f28acc-e581-ab40-7f30-bd9e72a18e61,2022-07-26T00:36:40Z,2022-07-26T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,907b1d2f-2bf8-31b5-abc2-14d6b75faa96,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),136.8,816.8,0.0,,
3,1b1ff37c-ddf8-96c0-4aef-7b0c01b2c845,2022-09-27T00:36:40Z,2022-09-27T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,907b1d2f-2bf8-31b5-abc2-14d6b75faa96,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),136.8,816.8,0.0,,
4,8538767f-d67e-976f-ec4d-5f9ade23c2c6,2022-12-27T00:36:40Z,2022-12-27T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,907b1d2f-2bf8-31b5-abc2-14d6b75faa96,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),136.8,816.8,0.0,,


In [69]:
#rename Id to organization_id
organizations_df_100.rename(columns={'Id': 'organization_id'}, inplace=True)
organizations_df_100.head()

Unnamed: 0,organization_id,NAME,ADDRESS,CITY,STATE,ZIP,LAT,LON,PHONE,REVENUE,UTILIZATION
0,74ab949d-17ac-3309-83a0-13b4405c66aa,Fitchburg Outpatient Clinic,881 Main Street,Fitchburg,MA,1420,42.586487,-71.80521,978-342-9781 Or 978-342-9781,0.0,372
1,588f6ce6-b8db-3588-8189-29db2680a313,BOSTON HEALTH CARE FOR THE HOMELESS PROGRAM INC,461 WALNUT AVE,JAMAICA PLAIN,MA,21302331,42.311588,-71.098001,8576541550,0.0,28
2,faffaf6a-ee1a-3673-b0b0-421a9c249244,ACTIVATED BY WELLNESS LLC,66 WASHINGTON ST,STOUGHTON,MA,20722571,42.144158,-71.103783,6178719807,0.0,46
3,e09d4c49-c2ef-3b0f-9a46-3719d9219306,UMASS MEMORIAL HEALTHALLIANCE CLINTON HOSPITAL...,60 HOSPITAL RD,LEOMINSTER,MA,14533290,42.540319,-71.76313,9784662000,0.0,263
4,77645e49-3f69-3d1d-bb93-dc65210e2fac,SOUTHCOAST HOSPITALS GROUP INC,363 HIGHLAND AVE,FALL RIVER,MA,27205246,41.709378,-71.146078,5086793131,0.0,134


In [70]:
#rename ENCOUNTER to encounter_id
medications_df_100.rename(columns={'ENCOUNTER': 'encounter_id'}, inplace=True)
#rename PATIENT to patient_id
medications_df_100.rename(columns={'PATIENT': 'patient_id'}, inplace=True)
#rename PAYER to payer_id
medications_df_100.rename(columns={'PAYER': 'payer_id'}, inplace=True)
medications_df_100.head()

Unnamed: 0,START,STOP,patient_id,payer_id,encounter_id,CODE,DESCRIPTION,BASE_COST,PAYER_COVERAGE,DISPENSES,TOTALCOST,REASONCODE,REASONDESCRIPTION
0,2014-05-30T18:43:10Z,2015-05-25T18:43:10Z,8313967b-6432-89aa-090f-4d2b81558360,734afbd6-4794-363b-9bc0-6a3981533ed5,7d65425b-8443-cf6b-b2d7-f1422b27a28c,831533,Errin 28 Day Pack,346.67,346.67,12,4160.04,,
1,2015-05-25T18:43:10Z,2016-05-19T18:43:10Z,8313967b-6432-89aa-090f-4d2b81558360,734afbd6-4794-363b-9bc0-6a3981533ed5,c33709c3-0608-d708-3fd5-d0b60a5c6e55,757594,Jolivette 28 Day Pack,400.9,56.19,12,4810.8,,
2,2018-03-20T22:58:36Z,2018-03-20T22:58:36Z,8313967b-6432-89aa-090f-4d2b81558360,734afbd6-4794-363b-9bc0-6a3981533ed5,58e1911f-c777-7d1f-6e69-22e2f7eaf526,1535362,sodium fluoride 0.0272 MG/MG Oral Gel,129.94,0.0,1,129.94,66383009.0,Gingivitis (disorder)
3,2020-08-31T19:05:17Z,2020-10-11T19:05:17Z,8313967b-6432-89aa-090f-4d2b81558360,734afbd6-4794-363b-9bc0-6a3981533ed5,a7e46c3f-ee94-63bb-8e5d-d4f41233ab38,849574,Naproxen sodium 220 MG Oral Tablet,168.16,0.0,1,168.16,,
4,2023-07-07T18:43:10Z,2024-07-01T19:04:15Z,8313967b-6432-89aa-090f-4d2b81558360,734afbd6-4794-363b-9bc0-6a3981533ed5,c58138d0-0e9a-00c9-ee85-9cb789e74191,389221,Etonogestrel 68 MG Drug Implant,13.12,0.0,12,157.44,,


In [71]:
#rename Patient to patient_id
observations_df_100.rename(columns={'PATIENT': 'patient_id'}, inplace=True)
#rename ENCOUNTER to encounter_id
observations_df_100.rename(columns={'ENCOUNTER': 'encounter_id'}, inplace=True)
observations_df_100.head()

Unnamed: 0,DATE,patient_id,encounter_id,CATEGORY,CODE,DESCRIPTION,VALUE,UNITS,TYPE
0,2022-04-19T00:36:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,vital-signs,8302-2,Body Height,50.5,cm,numeric
1,2022-04-19T00:36:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,vital-signs,72514-3,Pain severity - 0-10 verbal numeric rating [Sc...,0.0,{score},numeric
2,2022-04-19T00:36:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,vital-signs,29463-7,Body Weight,3.6,kg,numeric
3,2022-04-19T00:36:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,vital-signs,77606-2,Weight-for-length Per age and sex,36.8,%,numeric
4,2022-04-19T00:36:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,vital-signs,8289-1,Head Occipital-frontal circumference Percentile,33.7,%,numeric


In [72]:
#rename patient_id to patient_id
conditions_df_100.rename(columns={'PATIENT': 'patient_id'}, inplace=True)
#rename encounter_id to encounter_id
conditions_df_100.rename(columns={'ENCOUNTER': 'encounter_id'}, inplace=True)
conditions_df_100.head()

Unnamed: 0,START,STOP,patient_id,encounter_id,SYSTEM,CODE,DESCRIPTION
0,2022-04-19,2023-09-26,dbaa48b1-720a-f5f2-9360-ae6bb9321037,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,http://snomed.info/sct,314529007,Medication review due (situation)
1,2024-03-26,2024-03-26,dbaa48b1-720a-f5f2-9360-ae6bb9321037,7fe8c1c8-f019-976e-4a70-e49dbaec8cd5,http://snomed.info/sct,314529007,Medication review due (situation)
2,2024-09-24,2024-09-24,dbaa48b1-720a-f5f2-9360-ae6bb9321037,1f72daa0-6d9d-4e3c-d317-031ab62f0431,http://snomed.info/sct,314529007,Medication review due (situation)
3,2025-03-25,2025-03-25,dbaa48b1-720a-f5f2-9360-ae6bb9321037,12f7aeb1-70ca-ed6f-2c61-2fbb7ac53d33,http://snomed.info/sct,314529007,Medication review due (situation)
4,2005-05-28,,8313967b-6432-89aa-090f-4d2b81558360,abf41c45-cb23-5525-633b-ba6929ef0ff6,http://snomed.info/sct,197927001,Recurrent urinary tract infection (disorder)


In [73]:
#rename patient_id to patient_id
procedures_df_100.rename(columns={'PATIENT': 'patient_id'}, inplace=True)
#rename encounter_id to encounter_id
procedures_df_100.rename(columns={'ENCOUNTER': 'encounter_id'}, inplace=True)
procedures_df_100.head()

Unnamed: 0,START,STOP,patient_id,encounter_id,SYSTEM,CODE,DESCRIPTION,BASE_COST,REASONCODE,REASONDESCRIPTION
0,2023-09-26T00:36:40Z,2023-09-26T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,8be4f777-e319-7fa6-a795-808d2cdbba0f,http://snomed.info/sct,430193006,Medication reconciliation (procedure),239.94,,
1,2024-03-26T00:36:40Z,2024-03-26T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,7fe8c1c8-f019-976e-4a70-e49dbaec8cd5,http://snomed.info/sct,430193006,Medication reconciliation (procedure),215.7,,
2,2024-09-24T00:36:40Z,2024-09-24T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,1f72daa0-6d9d-4e3c-d317-031ab62f0431,http://snomed.info/sct,430193006,Medication reconciliation (procedure),215.7,,
3,2025-03-25T00:36:40Z,2025-03-25T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,12f7aeb1-70ca-ed6f-2c61-2fbb7ac53d33,http://snomed.info/sct,430193006,Medication reconciliation (procedure),215.7,,
4,2016-10-25T18:43:10Z,2016-10-25T18:58:10Z,8313967b-6432-89aa-090f-4d2b81558360,0ab8c1de-e107-474e-8c68-6fe49a046020,http://snomed.info/sct,252160004,Standard pregnancy test (procedure),4592.67,72892002.0,Normal pregnancy (finding)


In [74]:
#rename Id to payer_id
payers_df_100.rename(columns={'Id': 'payer_id'}, inplace=True)
payers_df_100.head()

Unnamed: 0,payer_id,NAME,OWNERSHIP,ADDRESS,CITY,STATE_HEADQUARTERED,ZIP,PHONE,AMOUNT_COVERED,AMOUNT_UNCOVERED,...,UNCOVERED_ENCOUNTERS,COVERED_MEDICATIONS,UNCOVERED_MEDICATIONS,COVERED_PROCEDURES,UNCOVERED_PROCEDURES,COVERED_IMMUNIZATIONS,UNCOVERED_IMMUNIZATIONS,UNIQUE_CUSTOMERS,QOLS_AVG,MEMBER_MONTHS
0,a735bf55-83e9-331a-899d-a82a60b9f60c,Medicare,GOVERNMENT,,,,,,15317521.31,587287.35,...,0,13791,0,19622,0,1132,0,55,0.667311,13512
1,df166300-5a78-3502-a46a-832842197811,Medicaid,GOVERNMENT,,,,,,14813661.5,360339.18,...,0,2153,0,9200,0,1212,0,57,0.934654,11100
2,d18ef2e6-ef40-324c-be54-34a5ee865625,Dual Eligible,GOVERNMENT,,,,,,884286.49,14352.1,...,0,337,0,791,0,83,0,5,0.794433,780
3,26aab0cd-6aba-3e1b-ac5b-05c8867e762c,Humana,PRIVATE,,,,,,4121320.61,4293247.29,...,0,2893,0,8521,0,1129,0,35,0.963516,15708
4,b046940f-1664-3047-bca7-dfa76be352a4,Blue Cross Blue Shield,PRIVATE,,,,,,5521736.78,669177.02,...,0,271,0,1432,0,196,0,55,0.145391,14052


In [87]:
#rename Id to claim_id
claims_df_100.rename(columns={'Id': 'claim_id'}, inplace=True)
#rename Patient to patient_id
claims_df_100.rename(columns={'PATIENTID': 'patient_id'}, inplace=True)
#rename provider_id to provider_id
claims_df_100.rename(columns={'PROVIDERID': 'provider_id'}, inplace=True)
#rename primarypatientinsuranceid to payer_id
claims_df_100.rename(columns={'PRIMARYPATIENTINSURANCEID': 'payer_id'}, inplace=True)
claims_df_100.head()

Unnamed: 0,claim_id,patient_id,provider_id,payer_id,SECONDARYPATIENTINSURANCEID,DEPARTMENTID,PATIENTDEPARTMENTID,DIAGNOSIS1,DIAGNOSIS2,DIAGNOSIS3,...,STATUS2,STATUSP,OUTSTANDING1,OUTSTANDING2,OUTSTANDINGP,LASTBILLEDDATE1,LASTBILLEDDATE2,LASTBILLEDDATEP,HEALTHCARECLAIMTYPEID1,HEALTHCARECLAIMTYPEID2
0,833cc4c6-f121-b250-c1e2-28e3d17ca563,dbaa48b1-720a-f5f2-9360-ae6bb9321037,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,,10,10,314529007,,,...,CLOSED,CLOSED,0.0,0.0,0.0,2022-04-19T00:51:40Z,2022-04-19T00:51:40Z,2022-04-19T00:51:40Z,1,1
1,87df6cfe-d3c0-ac92-7ff2-91f8b3cac575,dbaa48b1-720a-f5f2-9360-ae6bb9321037,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,,10,10,410620009,,,...,CLOSED,CLOSED,0.0,0.0,0.0,2022-05-24T00:51:40Z,2022-05-24T00:51:40Z,2022-05-24T00:51:40Z,1,1
2,e7030690-90ce-02c1-665b-45ddac472c31,dbaa48b1-720a-f5f2-9360-ae6bb9321037,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,,10,10,410620009,,,...,CLOSED,CLOSED,0.0,0.0,0.0,2022-07-26T00:51:40Z,2022-07-26T00:51:40Z,2022-07-26T00:51:40Z,1,1
3,0964634e-4e73-df43-af11-e9d086d88e3c,dbaa48b1-720a-f5f2-9360-ae6bb9321037,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,,10,10,410620009,,,...,CLOSED,CLOSED,0.0,0.0,0.0,2022-09-27T00:51:40Z,2022-09-27T00:51:40Z,2022-09-27T00:51:40Z,1,1
4,70baec82-5d92-fdd0-b751-409de991e717,dbaa48b1-720a-f5f2-9360-ae6bb9321037,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,,10,10,410620009,,,...,CLOSED,CLOSED,0.0,0.0,0.0,2022-12-27T00:51:40Z,2022-12-27T00:51:40Z,2022-12-27T00:51:40Z,1,1


In [76]:
#rename Id to provider_id
providers_df_100.rename(columns={'Id': 'provider_id'}, inplace=True)
#rename organization_id to organization_id
providers_df_100.rename(columns={'ORGANIZATION': 'organization_id'}, inplace=True)
providers_df_100.head()

Unnamed: 0,provider_id,organization_id,NAME,GENDER,SPECIALITY,ADDRESS,CITY,STATE,ZIP,LAT,LON,ENCOUNTERS,PROCEDURES
0,3009ebde-ee83-3d84-ba8d-3b78656e3e58,74ab949d-17ac-3309-83a0-13b4405c66aa,Ted955 Reilly981,M,GENERAL PRACTICE,881 Main Street,Fitchburg,MA,1420,42.586487,-71.80521,372,0
1,b47eca62-7dc7-3002-8612-8a2f878438c8,588f6ce6-b8db-3588-8189-29db2680a313,Terence292 Brakus656,M,GENERAL PRACTICE,461 WALNUT AVE,JAMAICA PLAIN,MA,21302331,42.311588,-71.098001,28,0
2,59b4ba29-558d-3529-9c77-67ddee65a64f,faffaf6a-ee1a-3673-b0b0-421a9c249244,Jaunita130 Armstrong51,F,GENERAL PRACTICE,66 WASHINGTON ST,STOUGHTON,MA,20722571,42.144158,-71.103783,46,0
3,5fc4f4f2-fe7b-348a-9e8f-b1c25bf637ba,e09d4c49-c2ef-3b0f-9a46-3719d9219306,Jules135 Emard19,M,GENERAL PRACTICE,60 HOSPITAL RD,LEOMINSTER,MA,14533290,42.540319,-71.76313,263,0
4,41e6ebba-c2e9-3117-b9db-69c2036a7afa,77645e49-3f69-3d1d-bb93-dc65210e2fac,Vella930 Ankunding277,F,GENERAL PRACTICE,363 HIGHLAND AVE,FALL RIVER,MA,27205246,41.709378,-71.146078,134,0


In [91]:
# Join careplans_df_100 with patients_df_100 on 'patient_id'
# careplans_with_patients = careplans_df_100.merge(patients_df_100, on='patient_id', how='inner')

# # Join encounters_df_100 with patients_df_100 on 'patient_id'
encounters_with_patients = encounters_df_100.merge(patients_df_100, on='patient_id', how='inner')

# # Join observations_df_100 with patient_df_100 on 'patient_id'
observations_with_patients = observations_df_100.merge(patients_df_100, on='patient_id', how='inner')

# # Join procedures_df_100 with encounters_df_100 on 'encounter_id' and with patients_df_100 on 'patient_id'
procedures_with_encounters = procedures_df_100.merge(encounters_df_100, on='encounter_id', how='inner')

# # Join medications_df_100 with patients_df_100 on 'patient_id'
medications_with_encounters = medications_df_100.merge(encounters_df_100, on='encounter_id', how='inner')

# # Join claims_df_100 with payers_df_100 on 'payer_id' and with patients_df_100 on 'patient_id'
claims_with_payers = claims_df_100.merge(payers_df_100, on='payer_id', how='inner')

## Join providers_df_100 with organizations_df_100 on 'organization_id'
providers_with_organizations = providers_df_100.merge(organizations_df_100, on='organization_id', how='inner')

In [92]:
providers_with_organizations.head()

Unnamed: 0,provider_id,organization_id,NAME_x,GENDER,SPECIALITY,ADDRESS_x,CITY_x,STATE_x,ZIP_x,LAT_x,...,NAME_y,ADDRESS_y,CITY_y,STATE_y,ZIP_y,LAT_y,LON_y,PHONE,REVENUE,UTILIZATION
0,3009ebde-ee83-3d84-ba8d-3b78656e3e58,74ab949d-17ac-3309-83a0-13b4405c66aa,Ted955 Reilly981,M,GENERAL PRACTICE,881 Main Street,Fitchburg,MA,1420,42.586487,...,Fitchburg Outpatient Clinic,881 Main Street,Fitchburg,MA,1420,42.586487,-71.80521,978-342-9781 Or 978-342-9781,0.0,372
1,b47eca62-7dc7-3002-8612-8a2f878438c8,588f6ce6-b8db-3588-8189-29db2680a313,Terence292 Brakus656,M,GENERAL PRACTICE,461 WALNUT AVE,JAMAICA PLAIN,MA,21302331,42.311588,...,BOSTON HEALTH CARE FOR THE HOMELESS PROGRAM INC,461 WALNUT AVE,JAMAICA PLAIN,MA,21302331,42.311588,-71.098001,8576541550,0.0,28
2,59b4ba29-558d-3529-9c77-67ddee65a64f,faffaf6a-ee1a-3673-b0b0-421a9c249244,Jaunita130 Armstrong51,F,GENERAL PRACTICE,66 WASHINGTON ST,STOUGHTON,MA,20722571,42.144158,...,ACTIVATED BY WELLNESS LLC,66 WASHINGTON ST,STOUGHTON,MA,20722571,42.144158,-71.103783,6178719807,0.0,46
3,5fc4f4f2-fe7b-348a-9e8f-b1c25bf637ba,e09d4c49-c2ef-3b0f-9a46-3719d9219306,Jules135 Emard19,M,GENERAL PRACTICE,60 HOSPITAL RD,LEOMINSTER,MA,14533290,42.540319,...,UMASS MEMORIAL HEALTHALLIANCE CLINTON HOSPITAL...,60 HOSPITAL RD,LEOMINSTER,MA,14533290,42.540319,-71.76313,9784662000,0.0,263
4,41e6ebba-c2e9-3117-b9db-69c2036a7afa,77645e49-3f69-3d1d-bb93-dc65210e2fac,Vella930 Ankunding277,F,GENERAL PRACTICE,363 HIGHLAND AVE,FALL RIVER,MA,27205246,41.709378,...,SOUTHCOAST HOSPITALS GROUP INC,363 HIGHLAND AVE,FALL RIVER,MA,27205246,41.709378,-71.146078,5086793131,0.0,134


In [90]:
claims_with_payers

Unnamed: 0,claim_id,patient_id,provider_id,payer_id,SECONDARYPATIENTINSURANCEID,DEPARTMENTID,PATIENTDEPARTMENTID,DIAGNOSIS1,DIAGNOSIS2,DIAGNOSIS3,...,UNCOVERED_ENCOUNTERS,COVERED_MEDICATIONS,UNCOVERED_MEDICATIONS,COVERED_PROCEDURES,UNCOVERED_PROCEDURES,COVERED_IMMUNIZATIONS,UNCOVERED_IMMUNIZATIONS,UNIQUE_CUSTOMERS,QOLS_AVG,MEMBER_MONTHS
0,833cc4c6-f121-b250-c1e2-28e3d17ca563,dbaa48b1-720a-f5f2-9360-ae6bb9321037,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,,10,10,314529007,,,...,0,358,0,2047,0,321,0,24,0.914600,2808
1,87df6cfe-d3c0-ac92-7ff2-91f8b3cac575,dbaa48b1-720a-f5f2-9360-ae6bb9321037,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,,10,10,410620009,,,...,0,358,0,2047,0,321,0,24,0.914600,2808
2,e7030690-90ce-02c1-665b-45ddac472c31,dbaa48b1-720a-f5f2-9360-ae6bb9321037,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,,10,10,410620009,,,...,0,358,0,2047,0,321,0,24,0.914600,2808
3,0964634e-4e73-df43-af11-e9d086d88e3c,dbaa48b1-720a-f5f2-9360-ae6bb9321037,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,,10,10,410620009,,,...,0,358,0,2047,0,321,0,24,0.914600,2808
4,70baec82-5d92-fdd0-b751-409de991e717,dbaa48b1-720a-f5f2-9360-ae6bb9321037,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,,10,10,410620009,,,...,0,358,0,2047,0,321,0,24,0.914600,2808
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22258,d5b7c13c-6a9d-46ef-ea06-13909a5481ed,59be6f33-cd77-774a-8e0f-3286eff96d1c,45b68013-84b2-3345-8c37-2a459f531e8d,a735bf55-83e9-331a-899d-a82a60b9f60c,b046940f-1664-3047-bca7-dfa76be352a4,20,20,66383009,,,...,0,13791,0,19622,0,1132,0,55,0.667311,13512
22259,84831093-431b-5eaa-aceb-331b59f5db14,59be6f33-cd77-774a-8e0f-3286eff96d1c,45b68013-84b2-3345-8c37-2a459f531e8d,a735bf55-83e9-331a-899d-a82a60b9f60c,b046940f-1664-3047-bca7-dfa76be352a4,20,20,66383009,,,...,0,13791,0,19622,0,1132,0,55,0.667311,13512
22260,27451ec0-3ab5-4c66-1f0b-3eeba6e3b18b,59be6f33-cd77-774a-8e0f-3286eff96d1c,60ce9967-05b7-382c-bcb8-680744263e28,a735bf55-83e9-331a-899d-a82a60b9f60c,b046940f-1664-3047-bca7-dfa76be352a4,1,1,55822004,,,...,0,13791,0,19622,0,1132,0,55,0.667311,13512
22261,5f3aebc5-ca32-684f-7753-cf05aae74dea,59be6f33-cd77-774a-8e0f-3286eff96d1c,60ce9967-05b7-382c-bcb8-680744263e28,a735bf55-83e9-331a-899d-a82a60b9f60c,b046940f-1664-3047-bca7-dfa76be352a4,1,1,55822004,,,...,0,13791,0,19622,0,1132,0,55,0.667311,13512


In [82]:
medications_with_encounters.head()

Unnamed: 0,START_x,STOP_x,patient_id_x,payer_id_x,encounter_id,CODE_x,DESCRIPTION_x,BASE_COST,PAYER_COVERAGE_x,DISPENSES,...,provider_id,payer_id_y,ENCOUNTERCLASS,CODE_y,DESCRIPTION_y,BASE_ENCOUNTER_COST,TOTAL_CLAIM_COST,PAYER_COVERAGE_y,REASONCODE_y,REASONDESCRIPTION_y
0,2014-05-30T18:43:10Z,2015-05-25T18:43:10Z,8313967b-6432-89aa-090f-4d2b81558360,734afbd6-4794-363b-9bc0-6a3981533ed5,7d65425b-8443-cf6b-b2d7-f1422b27a28c,831533,Errin 28 Day Pack,346.67,346.67,12,...,5fc4f4f2-fe7b-348a-9e8f-b1c25bf637ba,734afbd6-4794-363b-9bc0-6a3981533ed5,outpatient,698314001,Consultation for treatment (procedure),142.58,142.58,142.58,389095005.0,Contraception care (regime/therapy)
1,2015-05-25T18:43:10Z,2016-05-19T18:43:10Z,8313967b-6432-89aa-090f-4d2b81558360,734afbd6-4794-363b-9bc0-6a3981533ed5,c33709c3-0608-d708-3fd5-d0b60a5c6e55,757594,Jolivette 28 Day Pack,400.9,56.19,12,...,5fc4f4f2-fe7b-348a-9e8f-b1c25bf637ba,734afbd6-4794-363b-9bc0-6a3981533ed5,outpatient,698314001,Consultation for treatment (procedure),142.58,142.58,114.06,389095005.0,Contraception care (regime/therapy)
2,2018-03-20T22:58:36Z,2018-03-20T22:58:36Z,8313967b-6432-89aa-090f-4d2b81558360,734afbd6-4794-363b-9bc0-6a3981533ed5,58e1911f-c777-7d1f-6e69-22e2f7eaf526,1535362,sodium fluoride 0.0272 MG/MG Oral Gel,129.94,0.0,1,...,5fc4f4f2-fe7b-348a-9e8f-b1c25bf637ba,734afbd6-4794-363b-9bc0-6a3981533ed5,ambulatory,185349003,Encounter for check up (procedure),85.55,3968.15,2735.1,66383009.0,Gingivitis (disorder)
3,2020-08-31T19:05:17Z,2020-10-11T19:05:17Z,8313967b-6432-89aa-090f-4d2b81558360,734afbd6-4794-363b-9bc0-6a3981533ed5,a7e46c3f-ee94-63bb-8e5d-d4f41233ab38,849574,Naproxen sodium 220 MG Oral Tablet,168.16,0.0,1,...,a3c5c0bb-9bfc-36ca-b86c-daf8633f8f9a,734afbd6-4794-363b-9bc0-6a3981533ed5,emergency,50849002,Emergency room admission (procedure),146.18,146.18,0.0,48333001.0,Burn injury (morphologic abnormality)
4,2023-07-07T18:43:10Z,2024-07-01T19:04:15Z,8313967b-6432-89aa-090f-4d2b81558360,734afbd6-4794-363b-9bc0-6a3981533ed5,c58138d0-0e9a-00c9-ee85-9cb789e74191,389221,Etonogestrel 68 MG Drug Implant,13.12,0.0,12,...,5fc4f4f2-fe7b-348a-9e8f-b1c25bf637ba,734afbd6-4794-363b-9bc0-6a3981533ed5,outpatient,698314001,Consultation for treatment (procedure),142.58,15783.94,11608.13,389095005.0,Contraception care (regime/therapy)


In [78]:
procedures_with_encounters.head()

Unnamed: 0,START_x,STOP_x,patient_id_x,encounter_id,SYSTEM,CODE_x,DESCRIPTION_x,BASE_COST,REASONCODE_x,REASONDESCRIPTION_x,...,provider_id,payer_id,ENCOUNTERCLASS,CODE_y,DESCRIPTION_y,BASE_ENCOUNTER_COST,TOTAL_CLAIM_COST,PAYER_COVERAGE,REASONCODE_y,REASONDESCRIPTION_y
0,2023-09-26T00:36:40Z,2023-09-26T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,8be4f777-e319-7fa6-a795-808d2cdbba0f,http://snomed.info/sct,430193006,Medication reconciliation (procedure),239.94,,,...,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),136.8,512.74,0.0,,
1,2024-03-26T00:36:40Z,2024-03-26T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,7fe8c1c8-f019-976e-4a70-e49dbaec8cd5,http://snomed.info/sct,430193006,Medication reconciliation (procedure),215.7,,,...,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),136.8,488.5,0.0,,
2,2024-09-24T00:36:40Z,2024-09-24T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,1f72daa0-6d9d-4e3c-d317-031ab62f0431,http://snomed.info/sct,430193006,Medication reconciliation (procedure),215.7,,,...,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),136.8,488.5,0.0,,
3,2025-03-25T00:36:40Z,2025-03-25T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,12f7aeb1-70ca-ed6f-2c61-2fbb7ac53d33,http://snomed.info/sct,430193006,Medication reconciliation (procedure),215.7,,,...,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),136.8,488.5,0.0,,
4,2016-10-25T18:43:10Z,2016-10-25T18:58:10Z,8313967b-6432-89aa-090f-4d2b81558360,0ab8c1de-e107-474e-8c68-6fe49a046020,http://snomed.info/sct,252160004,Standard pregnancy test (procedure),4592.67,72892002.0,Normal pregnancy (finding),...,5fc4f4f2-fe7b-348a-9e8f-b1c25bf637ba,8fa6c185-e44e-3e34-8bd8-39be8694f4ce,ambulatory,424441002,Prenatal initial visit (regime/therapy),142.58,38054.63,23488.23,72892002.0,Normal pregnancy (finding)


In [61]:
observations_with_patients.head()

Unnamed: 0,DATE,patient_id,encounter_id,CATEGORY,CODE,DESCRIPTION,VALUE,UNITS,TYPE,BIRTHDATE,...,CITY,STATE,COUNTY,FIPS,ZIP,LAT,LON,HEALTHCARE_EXPENSES,HEALTHCARE_COVERAGE,INCOME
0,2022-04-19T00:36:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,vital-signs,8302-2,Body Height,50.5,cm,numeric,2022-04-19,...,Marlborough,Massachusetts,Middlesex County,25017.0,1752,42.30859,-71.567107,6002.42,0.0,60469
1,2022-04-19T00:36:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,vital-signs,72514-3,Pain severity - 0-10 verbal numeric rating [Sc...,0.0,{score},numeric,2022-04-19,...,Marlborough,Massachusetts,Middlesex County,25017.0,1752,42.30859,-71.567107,6002.42,0.0,60469
2,2022-04-19T00:36:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,vital-signs,29463-7,Body Weight,3.6,kg,numeric,2022-04-19,...,Marlborough,Massachusetts,Middlesex County,25017.0,1752,42.30859,-71.567107,6002.42,0.0,60469
3,2022-04-19T00:36:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,vital-signs,77606-2,Weight-for-length Per age and sex,36.8,%,numeric,2022-04-19,...,Marlborough,Massachusetts,Middlesex County,25017.0,1752,42.30859,-71.567107,6002.42,0.0,60469
4,2022-04-19T00:36:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,vital-signs,8289-1,Head Occipital-frontal circumference Percentile,33.7,%,numeric,2022-04-19,...,Marlborough,Massachusetts,Middlesex County,25017.0,1752,42.30859,-71.567107,6002.42,0.0,60469


In [59]:
encounters_with_patients.head()

Unnamed: 0,encounter_id,START,STOP,patient_id,organization_id,provider_id,payer_id,ENCOUNTERCLASS,CODE,DESCRIPTION,...,CITY,STATE,COUNTY,FIPS,ZIP,LAT,LON,HEALTHCARE_EXPENSES,HEALTHCARE_COVERAGE,INCOME
0,be04e13d-a16e-c330-3a9e-10f8db9a8aa1,2022-04-19T00:36:40Z,2022-04-19T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,907b1d2f-2bf8-31b5-abc2-14d6b75faa96,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),...,Marlborough,Massachusetts,Middlesex County,25017.0,1752,42.30859,-71.567107,6002.42,0.0,60469
1,1f391129-92f6-090b-d27e-48cdb42824c2,2022-05-24T00:36:40Z,2022-05-24T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,907b1d2f-2bf8-31b5-abc2-14d6b75faa96,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),...,Marlborough,Massachusetts,Middlesex County,25017.0,1752,42.30859,-71.567107,6002.42,0.0,60469
2,e6f28acc-e581-ab40-7f30-bd9e72a18e61,2022-07-26T00:36:40Z,2022-07-26T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,907b1d2f-2bf8-31b5-abc2-14d6b75faa96,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),...,Marlborough,Massachusetts,Middlesex County,25017.0,1752,42.30859,-71.567107,6002.42,0.0,60469
3,1b1ff37c-ddf8-96c0-4aef-7b0c01b2c845,2022-09-27T00:36:40Z,2022-09-27T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,907b1d2f-2bf8-31b5-abc2-14d6b75faa96,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),...,Marlborough,Massachusetts,Middlesex County,25017.0,1752,42.30859,-71.567107,6002.42,0.0,60469
4,8538767f-d67e-976f-ec4d-5f9ade23c2c6,2022-12-27T00:36:40Z,2022-12-27T00:51:40Z,dbaa48b1-720a-f5f2-9360-ae6bb9321037,907b1d2f-2bf8-31b5-abc2-14d6b75faa96,22ae92ae-29c7-3d31-b626-84f83995331e,0133f751-9229-3cfd-815f-b6d4979bdd6a,wellness,410620009,Well child visit (procedure),...,Marlborough,Massachusetts,Middlesex County,25017.0,1752,42.30859,-71.567107,6002.42,0.0,60469


In [5]:
import requests
import zipfile
import os
import io
import pandas as pd

# 1. Download the file
url = "https://mitre.box.com/shared/static/aw9po06ypfb9hrau4jamtvtz0e5ziucz.zip"
response = requests.get(url)
response.raise_for_status()  # Check if the request was successful

# 2. Extract the ZIP file into a temporary directory
with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
    # Create a temporary directory to store the files
    extracted_path = '/tmp/synthea_sample_data'
    os.makedirs(extracted_path, exist_ok=True)
    zip_ref.extractall(extracted_path)
    print(f"Extracted to {extracted_path}")

# 3. List files extracted
extracted_files = os.listdir(extracted_path)
print("Extracted files:", extracted_files)

# 4. Create DataFrames for each CSV file
csv_files = [
    "careplans.csv", 
    "conditions.csv", 
    "encounters.csv", 
    "medications.csv", 
    "observations.csv", 
    "patients.csv", 
    "procedures.csv", 
    "payers.csv", 
    "claims.csv"
]

# Dictionary to store DataFrames
dataframes_1000 = {}

# Read each CSV file into a unique DataFrame
for csv_file in csv_files:
    file_path = os.path.join(extracted_path, csv_file)
    if os.path.exists(file_path):
        # Load the CSV into a Pandas DataFrame
        dataframes_1000[csv_file] = pd.read_csv(file_path)
        print(f"Loaded {csv_file}")
    else:
        print(f"{csv_file} not found in the extracted files.")

Extracted to /tmp/synthea_sample_data
Extracted files: ['allergies.csv', 'careplans.csv', 'claims.csv', 'claims_transactions.csv', 'conditions.csv', 'csv', 'devices.csv', 'encounters.csv', 'imaging_studies.csv', 'immunizations.csv', 'medications.csv', 'observations.csv', 'organizations.csv', 'patients.csv', 'payers.csv', 'payer_transitions.csv', 'procedures.csv', 'providers.csv', 'supplies.csv']
Loaded careplans.csv
Loaded conditions.csv
Loaded encounters.csv
Loaded medications.csv
Loaded observations.csv
Loaded patients.csv
Loaded procedures.csv
Loaded payers.csv
Loaded claims.csv


In [6]:
careplans_df_1000 = dataframes_1000.get('careplans.csv')
conditions_df_1000 = dataframes_1000.get('conditions.csv')
encounters_df_1000 = dataframes_1000.get('encounters.csv')
medications_df_1000 = dataframes_1000.get('medications.csv')
observations_df_1000 = dataframes_1000.get('observations.csv')
patients_df_1000 = dataframes_1000.get('patients.csv')
procedures_df_1000 = dataframes_1000.get('procedures.csv')
payers_df_1000 = dataframes_1000.get('payers.csv')
claims_df_1000 = dataframes_1000.get('claims.csv')