# Pipeline for OMOP-CDM migration

Set the appropriate configurations in the file: `EHR-QC-Standardise/ehrqc/standardize/Config.py`

## Establish DB connection

In [9]:
import os
import psycopg2

def getConnection():

    # Connect to postgres with a copy of the MIMIC-III database
    con = psycopg2.connect(
        dbname=os.environ['POSTGRES_DB_NAME'],
        user=os.environ['POSTGRES_USER_NAME'],
        host=os.environ['POSTGRES_HOSTNAME'],
        port=os.environ['POSTGRES_PORT_NUMBER'],
        password=os.environ['POSTGRES_PASSWORD']
        )

    return con

## Import Standard Vocabulary

In [5]:
import os


os.system(
    'cd ' + os.environ['EHR_QC_STANDARDISE_BASE'] + ';'
    +
    '''.venv/bin/python -m ehrqc.standardise.Run -l'''
)


2023-11-06 12:18:19,177 - EHR-QC - INFO - Parsing command line arguments
2023-11-06 12:18:19,180 - EHR-QC - INFO - Start!!
2023-11-06 12:18:19,190 - EHR-QC - INFO - Creating schema: eicu_etl_20231106
2023-11-06 12:18:19,190 - EHR-QC - INFO - Creating schema: eicu_etl_20231106
2023-11-06 12:18:19,190 - EHR-QC - INFO - Importing Standard Vocabulary (Athena) from CSV files
2023-11-06 12:18:19,190 - EHR-QC - INFO - Creating schema: eicu_etl_20231106
2023-11-06 12:18:19,191 - EHR-QC - INFO - Creating table: eicu_etl_20231106.voc_concept


  df = pd.read_csv(filePath, sep='\t')


2023-11-06 12:18:36,040 - EHR-QC - INFO - Importing data to table: eicu_etl_20231106.voc_concept
2023-11-06 12:27:14,774 - EHR-QC - INFO - Creating table: eicu_etl_20231106.voc_vocabulary
2023-11-06 12:27:14,809 - EHR-QC - INFO - Importing data to table: eicu_etl_20231106.voc_vocabulary
2023-11-06 12:27:14,815 - EHR-QC - INFO - Creating table: eicu_etl_20231106.voc_domain
2023-11-06 12:27:14,830 - EHR-QC - INFO - Importing data to table: eicu_etl_20231106.voc_domain
2023-11-06 12:27:14,832 - EHR-QC - INFO - Creating table: eicu_etl_20231106.voc_concept_class
2023-11-06 12:27:14,852 - EHR-QC - INFO - Importing data to table: eicu_etl_20231106.voc_concept_class
2023-11-06 12:27:14,862 - EHR-QC - INFO - Creating table: eicu_etl_20231106.voc_concept_relationship
2023-11-06 12:28:24,363 - EHR-QC - INFO - Importing data to table: eicu_etl_20231106.voc_concept_relationship
2023-11-06 13:12:20,817 - EHR-QC - INFO - Creating table: eicu_etl_20231106.voc_relationship
2023-11-06 13:12:20,973 - EH

## Import EHR

In [11]:
import os


os.system(
    'cd ' + os.environ['EHR_QC_STANDARDISE_BASE'] + ';'
    +
    '''.venv/bin/python -m ehrqc.standardise.Run -f'''
)

2023-11-07 19:22:59,681 - EHR-QC - INFO - Parsing command line arguments
2023-11-07 19:22:59,685 - EHR-QC - INFO - Start!!
2023-11-07 19:22:59,695 - EHR-QC - INFO - Creating schema: eicu_etl_20231106
2023-11-07 19:22:59,695 - EHR-QC - INFO - Creating schema: eicu_etl_20231106
2023-11-07 19:22:59,696 - EHR-QC - INFO - Importing EHR data from CSV files
2023-11-07 19:22:59,696 - EHR-QC - INFO - Creating table: eicu_etl_20231106.admissions
2023-11-07 19:23:01,164 - EHR-QC - INFO - Importing data to table: eicu_etl_20231106.admissions
2023-11-07 19:23:12,144 - EHR-QC - INFO - End!!


0

## Create a Cohort

### Creating table episodes within the cohort

In [8]:
con = getConnection()

cohortQuery = '''
    create table eicu_etl_20231106.cohort as
    select
    distinct episode_id
    from
    eicu_etl_20231106.diagnosis
    where
    diagnosis like '%995.91%'
    or diagnosis like '%995.92%'
    or diagnosis like '%785.52%'
    ;
'''
with con:
    with con.cursor() as cursor:
        cursor.execute(cohortQuery)


NameError: name 'getConnection' is not defined

## Stage imported EHR data

In [13]:
import os


os.system(
    'cd ' + os.environ['EHR_QC_STANDARDISE_BASE'] + ';'
    +
    '''.venv/bin/python -m ehrqc.standardise.Run -s'''
)


2023-11-07 21:56:10,681 - EHR-QC - INFO - Parsing command line arguments
2023-11-07 21:56:10,685 - EHR-QC - INFO - Start!!
2023-11-07 21:56:10,695 - EHR-QC - INFO - Creating schema: eicu_etl_20231106
2023-11-07 21:56:10,696 - EHR-QC - INFO - Staging EHR data
2023-11-07 21:56:10,696 - EHR-QC - INFO - Creating staging table: eicu_etl_20231106.src_labevents


## Perform concept mapping

### Labs

#### Get the concepts to map

In [31]:
import pandas as pd


con = getConnection()
labsConceptsDf = pd.read_sql_query("select distinct itemid as concept from eicu_etl_20231106.src_labevents", con)
labsConceptsDf.to_csv(os.environ['EICU_EHR_PIPELINE_BASE'] + '/data/concept_mapping/concepts_labs.csv', index=False)


  labsConceptsDf = pd.read_sql_query("select distinct itemid as concept from eicu_etl_20231106.src_labevents", con)


#### Perform the concept mapping

In [1]:
import os


os.system(
    'cd ' + os.environ['EHR_QC_STANDARDISE_BASE'] + ';'
    +
    '''.venv/bin/python -m ehrqc.standardise.MapConcepts "Measurement" "SNOMED" "Observable Entity" "''' + os.environ['EICU_EHR_PIPELINE_BASE'] + '''/data/concept_mapping/concepts_labs.csv" "concept" "''' + os.environ['EICU_EHR_PIPELINE_BASE'] + '''/data/concept_mapping/concepts_labs_to_review.csv" --model_pack_path="''' + os.environ['EICU_EHR_PIPELINE_BASE'] + '''/data/medcat/mc_modelpack_snomed_int_16_mar_2022_25be3857ba34bdd5.zip"'''
)




Parsing command line arguments
2023-11-07 15:57:10,286 - EHR-QC - INFO - domain_id: Measurement
2023-11-07 15:57:10,286 - EHR-QC - INFO - vocabulary_id: SNOMED
2023-11-07 15:57:10,286 - EHR-QC - INFO - concept_class_id: Observable Entity
2023-11-07 15:57:10,287 - EHR-QC - INFO - concepts_path: /home/yram0006/phd/chapter_2/workspace/eICU-EHR-Pipeline/data/concept_mapping/concepts_labs.csv
2023-11-07 15:57:10,287 - EHR-QC - INFO - concept_name_row: concept
2023-11-07 15:57:10,287 - EHR-QC - INFO - mapped_concepts_save_path: /home/yram0006/phd/chapter_2/workspace/eICU-EHR-Pipeline/data/concept_mapping/concepts_labs_to_review.csv
2023-11-07 15:57:10,287 - EHR-QC - INFO - model_pack_path: /home/yram0006/phd/chapter_2/workspace/eICU-EHR-Pipeline/data/medcat/mc_modelpack_snomed_int_16_mar_2022_25be3857ba34bdd5.zip
2023-11-07 15:57:10,640 - EHR-QC - INFO - Getting connection
2023-11-07 15:57:10,651 - EHR-QC - INFO - Reading concepts


  standardConceptsDf = pd.read_sql_query(standardConceptsQuery, con)


2023-11-07 15:57:11,864 - EHR-QC - INFO - Creating reverse index
2023-11-07 15:57:12,653 - EHR-QC - INFO - Initializing Medcat


  _C._set_default_tensor_type(t)
100%|██████████| 155/155 [00:54<00:00,  2.84it/s]


0

### Vitals

`In this data the vitals are already standardised and aggregated, hence not performing concept mapping for them.`

## Import Mapped Concepts

In [3]:
import os


os.system(
    'cd ' + os.environ['EHR_QC_STANDARDISE_BASE'] + ';'
    +
    '''.venv/bin/python -m ehrqc.standardise.Run -c'''
)


2023-11-07 19:02:25,551 - EHR-QC - INFO - Parsing command line arguments
2023-11-07 19:02:25,554 - EHR-QC - INFO - Start!!
2023-11-07 19:02:25,563 - EHR-QC - INFO - Creating schema: eicu_etl_20231106
2023-11-07 19:02:25,564 - EHR-QC - INFO - Import Concept Mappings
2023-11-07 19:02:25,564 - EHR-QC - INFO - Creating table: eicu_etl_20231106.concept_mapping
2023-11-07 19:02:25,954 - EHR-QC - INFO - Importing data to table: eicu_etl_20231106.concept_mapping
2023-11-07 19:02:25,960 - EHR-QC - INFO - End!!


0

## Perform ETL

In [12]:
import os


os.system(
    'cd ' + os.environ['EHR_QC_STANDARDISE_BASE'] + ';'
    +
    '''.venv/bin/python -m ehrqc.standardise.Run -e'''
)


2023-11-07 21:34:17,165 - EHR-QC - INFO - Parsing command line arguments
2023-11-07 21:34:17,168 - EHR-QC - INFO - Start!!
2023-11-07 21:34:17,177 - EHR-QC - INFO - Creating schema: eicu_etl_20231106
2023-11-07 21:34:17,178 - EHR-QC - INFO - Performing ETL
2023-11-07 21:34:17,178 - EHR-QC - INFO - ETL for the entity: Visit Occurrence
2023-11-07 21:34:17,178 - EHR-QC - INFO - Creating table: eicu_etl_20231106.cdm_visit_occurrence
2023-11-07 21:34:17,178 - EHR-QC - INFO - Dropping table if exists: eicu_etl_20231106.cdm_visit_occurrence
2023-11-07 21:34:17,178 - EHR-QC - INFO - Creating table: eicu_etl_20231106.cdm_visit_occurrence
2023-11-07 21:34:17,183 - EHR-QC - INFO - Loading table: eicu_etl_20231106.cdm_visit_occurrence
2023-11-07 21:34:17,289 - EHR-QC - INFO - End!!


0