In [1]:
import anndata as ad
import duckdb
# import ehrapy as ep
import ehrdata
import numpy as np

In [2]:
con = duckdb.connect()

In [3]:
ehrdata.dt.gibleed_omop(backend_handle=con)

Downloading data...


BadZipFile: File is not a zip file

In [4]:
ehrdata.dt.mimic_iv_omop(backend_handle=con)


Downloading data...
Download successful. ZIP file downloaded and extracted successfully to ehrapy_data/mimic-iv-demo-data-in-the-omop-common-data-model-0.9.
missing tables:  [['concept'], ['vocabulary'], ['domain'], ['concept_class'], ['concept_relationship'], ['relationship'], ['concept_synonym'], ['concept_ancestor'], ['source_to_concept_map'], ['drug_strength']]


In [5]:
obs = ehrdata.io.omop.extract_person(con)

In [6]:
obs

Unnamed: 0,person_id,gender_concept_id,year_of_birth,month_of_birth,day_of_birth,birth_datetime,race_concept_id,ethnicity_concept_id,location_id,provider_id,care_site_id,person_source_value,gender_source_value,gender_source_concept_id,race_source_value,race_source_concept_id,ethnicity_source_value,ethnicity_source_concept_id
0,3589912774911670296,8507,2095,,,,0,38003563,,,,10009628,M,0,,0,HISPANIC/LATINO,2000001408
1,-3210373572193940939,8507,2079,,,,0,38003563,,,,10011398,M,0,,0,HISPANIC/LATINO,2000001408
2,-775517641933593374,8507,2149,,,,8516,0,,,,10004235,M,0,BLACK/AFRICAN AMERICAN,2000001406,,0
3,-2575767131279873665,8507,2050,,,,8516,0,,,,10024043,M,0,BLACK/AFRICAN AMERICAN,2000001406,,0
4,-8970844422700220177,8507,2114,,,,8527,0,,,,10038933,M,0,WHITE,2000001404,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,-7671795861352464589,8532,2052,,,,2000001401,0,,,,10038081,F,0,UNKNOWN,2000001401,,0
96,5734523979606454056,8532,2069,,,,2000001401,0,,,,10036156,F,0,UNKNOWN,2000001401,,0
97,1532249960797525190,8532,2106,,,,2000001405,0,,,,10014078,F,0,UNABLE TO OBTAIN,2000001405,,0
98,5894416985828315484,8532,2055,,,,2000001405,0,,,,10019172,F,0,UNABLE TO OBTAIN,2000001405,,0


In [5]:
def _get_table_list() -> list:
flat_table_list = []
for _, value_list in get_table_catalog_dict().items():
for value in value_list:
flat_table_list.append(value)
return flat_table_list


def _set_up_duckdb(path: Path, backend_handle: DuckDBPyConnection) -> None:
tables = _get_table_list()

missing_tables = []
for table in tables:
# if path exists lowercse, uppercase, capitalized:
table_path = f"{path}/{table}.csv"
if os.path.exists(table_path):
if table == "measurement":
backend_handle.register(
table, backend_handle.read_csv(f"{path}/{table}.csv", dtype={"measurement_source_value": str})
)
else:
backend_handle.register(table, backend_handle.read_csv(f"{path}/{table}.csv"))
else:
missing_tables.append([table])
print("missing tables: ", missing_tables)


def mimic_iv_omop(backend_handle: DuckDBPyConnection, data_path: Path | None = None) -> None:
"""Loads the MIMIC-IV demo data in the OMOP Common Data model.

More details: https://physionet.org/content/mimic-iv-demo-omop/0.9/#files-panel.

Parameters
----------
backend_handle
A handle to the backend which shall be used. Only duckdb connection supported at the moment.
data_path
Path to the tables. If the path exists, the data is loaded from there. Else, the data is downloaded.

Returns
-------
Returns nothing, but adds the tables to the backend via the handle.

Examples
--------
>>> import ehrapy as ep
>>> import ehrdata as ed
>>> import duckdb
>>> con = duckdb.connect()
>>> ed.dt.mimic_iv_omop(backend_handle=con)
>>> con.execute("SHOW TABLES;").fetchall()
"""
if data_path is None:
data_path = "ehrapy_data/mimic-iv-demo-data-in-the-omop-common-data-model-0.9"

if os.path.exists(data_path):
print(f"Path to data exists, load tables from there: {data_path}")
else:
print("Downloading data...")
URL = "https://physionet.org/static/published-projects/mimic-iv-demo-omop/mimic-iv-demo-data-in-the-omop-common-data-model-0.9.zip"
response = requests.get(URL)

if response.status_code == 200:
# Step 2: Use zipfile and io to open the ZIP file in memory
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
# Extract all contents of the ZIP file
z.extractall("ehrapy_data")  # Specify the folder where files will be extracted
print(f"Download successful. ZIP file downloaded and extracted successfully to {data_path}.")
else:
print(f"Failed to download the file. Status code: {response.status_code}")
return

return _set_up_duckdb(data_path + "/1_omop_data_csv", backend_handle)


def gibleed_omop(backend_handle: DuckDBPyConnection, data_path: Path | None = None) -> None:
"""Loads the GIBleed dataset.

More details: https://github.com/OHDSI/EunomiaDatasets.

Parameters
----------
backend_handle
A handle to the backend which shall be used. Only duckdb connection supported at the moment.
data_path
Path to the tables. If the path exists, the data is loaded from there. Else, the data is downloaded.

Returns
-------
Returns nothing, but adds the tables to the backend via the handle.

Examples
--------
>>> import ehrapy as ep
>>> import ehrdata as ed
>>> import duckdb
>>> con = duckdb.connect()
>>> ed.dt.gibleed_omop(backend_handle=con)
>>> con.execute("SHOW TABLES;").fetchall()
"""

if data_path is None:
data_path = "ehrapy_data/GIBleed_dataset"

if os.path.exists(data_path):
print(f"Path to data exists, load tables from there: {data_path}")
else:
print("Downloading data...")
URL = "https://github.com/OHDSI/EunomiaDatasets/blob/main/datasets/GiBleed/GiBleed_5.3.zip"
response = requests.get(URL)

if response.status_code == 200:
# Step 2: Use zipfile and io to open the ZIP file in memory
with zipfile.ZipFile(io.BytesIO(response.content)) as z:
# Extract all contents of the ZIP file
z.extractall("ehrapy_data")  # Specify the folder where files will be extracted
print(f"Download successful. ZIP file downloaded and extracted successfully to {data_path}.")
else:
print(f"Failed to download the file. Status code: {response.status_code}")
return

return _set_up_duckdb(data_path + "/gibleed_data_csv", backend_handle)

IndentationError: expected an indented block after function definition on line 9 (1137876071.py, line 10)