# Standardise Sepsis ICD Cohort data

## Verify EHR-QC Docker Installation

In [1]:
import os

os.system(
    '''cd /superbugai-data/yash/chapter_1/workspace/EHRQC;'''
    +
    '''python -m EHRQC -v'''
    )

EHRQC 1.0


0

## Utility Functions

In [2]:
def getConnection():

    import psycopg2
    # Connect to postgres with a copy of the MIMIC-III database
    con = psycopg2.connect(
        dbname='mimic4',
        user='postgres',
        host='localhost',
        port=5434,
        password='mysecretpassword'
        )

    return con

In [3]:
import os

def generateCsv(query, filePath):
    os.system(
        '''export PGPASSWORD="mysecretpassword";'''
        +
        '''psql -h localhost -p 5434 -d mimic4 -U postgres -c "\copy (''' + query + ''') To STDOUT With CSV HEADER DELIMITER ',';" > ''' + filePath
        )

In [4]:
con = getConnection()

## Create Sepsis ICD Cohort csv files

### Create Cohort

In [5]:
dropCohortQuery = """drop table if exists omop_migration_source_20230809.cohort cascade"""
createCohortQuery = """create table omop_migration_source_20230809.cohort as
(
	select
	distinct subject_id, hadm_id
	from
	mimiciv.diagnoses_icd
	where
	(icd_code in ('99591', '99592', '78552') and icd_version = 9)
	or
	(icd_code in ('A419', 'R6520', 'R6521') and icd_version = 10)
)
;
"""

with con:
    with con.cursor() as cursor:
        cursor.execute(dropCohortQuery)
        cursor.execute(createCohortQuery)


### Save Cohort

In [8]:
query = '''select * from omop_migration_source_20230809.cohort'''
filePath='/superbugai-data/mimiciv/sepsis_icd/cohort.csv'

generateCsv(query=query, filePath=filePath)

### Save Patients

In [10]:
query = '''select * from
	mimiciv.patients
	where subject_id in (
		select subject_id from omop_migration_source_20230809.cohort
	)
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/patients.csv'

generateCsv(query=query, filePath=filePath)

### Save Admissions

In [11]:
query = '''select * from
    mimiciv.admissions
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/admissions.csv'

generateCsv(query=query, filePath=filePath)

### Save Transfers

In [12]:
query = '''select * from
    mimiciv.transfers
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/transfers.csv'

generateCsv(query=query, filePath=filePath)

### Diagnoses ICD

In [13]:
query = '''select * from
    mimiciv.diagnoses_icd
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/diagnoses_icd.csv'

generateCsv(query=query, filePath=filePath)

### Services

In [14]:
query = '''select * from
    mimiciv.services
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/services.csv'

generateCsv(query=query, filePath=filePath)

### Labevents

In [15]:
query = '''select * from
    mimiciv.labevents
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/labevents.csv'

generateCsv(query=query, filePath=filePath)

### Procedures ICD

In [16]:
query = '''select * from
    mimiciv.procedures_icd
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/procedures_icd.csv'

generateCsv(query=query, filePath=filePath)

### Prescriptions

In [17]:
query = '''select * from
    mimiciv.prescriptions
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/prescriptions.csv'

generateCsv(query=query, filePath=filePath)

### Procedureevents

In [18]:
query = '''select * from
    mimiciv.procedureevents
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/procedureevents.csv'

generateCsv(query=query, filePath=filePath)

### Date Time Events

In [19]:
query = '''
    select * from
    mimiciv.datetimeevents
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/datetimeevents.csv'

generateCsv(query=query, filePath=filePath)

### HCPCS Events

In [20]:
query = '''select * from
mimiciv.hcpcsevents
where (subject_id, hadm_id) in (
	select subject_id, hadm_id from omop_migration_source_20230809.cohort
)
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/hcpcsevents.csv'

generateCsv(query=query, filePath=filePath)

### Drgcodes

In [21]:
query = '''select * from
    mimiciv.drgcodes
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/drgcodes.csv'

generateCsv(query=query, filePath=filePath)

### Microbiologyevents

In [22]:
query = '''select * from
    mimiciv.microbiologyevents
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/microbiologyevents.csv'

generateCsv(query=query, filePath=filePath)

### Pharmacy

In [23]:
query = '''select * from
    mimiciv.pharmacy
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/pharmacy.csv'

generateCsv(query=query, filePath=filePath)

### Chartevents

In [24]:
query = '''select * from
    mimiciv.chartevents
    where (subject_id, hadm_id) in (
        select subject_id, hadm_id from omop_migration_source_20230809.cohort
    )
'''
filePath='/superbugai-data/mimiciv/sepsis_icd/chartevents.csv'

generateCsv(query=query, filePath=filePath)

## Standardise

### Get help menu

In [25]:
import os

os.system(
    '''cd /superbugai-data/yash/chapter_1/workspace/EHRQC;'''
    +
    '''.venv/bin/python -m ehrqc.standardise.migrate_omop.Run -h'''
    )

2023-08-10 10:25:00,815 - Standardise - INFO - Parsing command line arguments
usage: Run.py [-h] [-l] [-f] [-s] [-m] [-c] [-e] [-u]

Migrate EHR to OMOP-CDM

optional arguments:
  -h, --help            show this help message and exit
  -l, --create_lookup   Create lookup by importing Athena vocabulary and
                        custom mapping
  -f, --import_file     Import EHR from a csv files
  -s, --stage           Stage the data on the ETL schema
  -m, --generate_mapping
                        Generate custom mapping of concepts from the data
  -c, --import_custom_mapping
                        Import custom mapping file
  -e, --perform_etl     Perform migration Extract-Transform-Load (ETL)
                        operations
  -u, --unload          Unload data to CDM schema


0

### Create lookup by importing Athena vocabulary and custom mapping

In [26]:
import os

os.system(
    '''cd /superbugai-data/yash/chapter_1/workspace/EHRQC;'''
    +
    '''.venv/bin/python -m ehrqc.standardise.migrate_omop.Run -l'''
    )

2023-08-10 10:27:18,270 - Standardise - INFO - Parsing command line arguments
2023-08-10 10:27:18,272 - Standardise - INFO - Start!!
2023-08-10 10:27:18,299 - Standardise - INFO - Creating schema: vocabulary_test_20230809
2023-08-10 10:27:18,336 - Standardise - INFO - Creating schema: omop_migration_etl_20230809
2023-08-10 10:27:18,337 - Standardise - INFO - Creating Lookups
2023-08-10 10:27:18,338 - Standardise - INFO - Creating table: omop_migration_etl_20230809.voc_concept


  df = pd.read_csv(filePath, sep='\t')


2023-08-10 10:28:00,069 - Standardise - INFO - Importing data to table: omop_migration_etl_20230809.voc_concept
2023-08-10 10:36:36,028 - Standardise - INFO - Creating table: omop_migration_etl_20230809.voc_vocabulary
2023-08-10 10:36:36,262 - Standardise - INFO - Importing data to table: omop_migration_etl_20230809.voc_vocabulary
2023-08-10 10:36:36,297 - Standardise - INFO - Creating table: omop_migration_etl_20230809.voc_domain
2023-08-10 10:36:36,348 - Standardise - INFO - Importing data to table: omop_migration_etl_20230809.voc_domain
2023-08-10 10:36:36,351 - Standardise - INFO - Creating table: omop_migration_etl_20230809.voc_concept_class
2023-08-10 10:36:36,371 - Standardise - INFO - Importing data to table: omop_migration_etl_20230809.voc_concept_class
2023-08-10 10:36:36,382 - Standardise - INFO - Creating table: omop_migration_etl_20230809.voc_concept_relationship
2023-08-10 10:37:58,691 - Standardise - INFO - Importing data to table: omop_migration_etl_20230809.voc_concept

### Import EHR from a csv files

In [2]:
import os

os.system(
    '''cd /superbugai-data/yash/chapter_1/workspace/EHRQC;'''
    +
    '''.venv/bin/python -m ehrqc.standardise.migrate_omop.Run -f'''
    )

2023-08-10 12:28:51,357 - Standardise - INFO - Parsing command line arguments
2023-08-10 12:28:51,358 - Standardise - INFO - Start!!
2023-08-10 12:28:51,376 - Standardise - INFO - Creating schema: omop_migration_source_20230809
2023-08-10 12:28:51,377 - Standardise - INFO - Creating schema: omop_migration_etl_20230809
2023-08-10 12:28:51,378 - Standardise - INFO - Importing EHR data from CSV files
2023-08-10 12:28:51,378 - Standardise - INFO - Creating table: omop_migration_source_20230809.D_LABITEMS
2023-08-10 12:28:51,497 - Standardise - INFO - Importing data to table: omop_migration_source_20230809.D_LABITEMS
2023-08-10 12:28:51,562 - Standardise - INFO - Creating table: omop_migration_source_20230809.PROCEDURES_ICD
2023-08-10 12:28:51,677 - Standardise - INFO - Importing data to table: omop_migration_source_20230809.PROCEDURES_ICD
2023-08-10 12:28:53,475 - Standardise - INFO - Creating table: omop_migration_source_20230809.HCPCSEVENTS
2023-08-10 12:28:53,481 - Standardise - INFO - 

  df = pd.read_csv(filePath, sep=fileSeparator)


2023-08-10 12:29:00,723 - Standardise - INFO - Importing data to table: omop_migration_source_20230809.PRESCRIPTIONS
2023-08-10 12:31:16,711 - Standardise - INFO - Creating table: omop_migration_source_20230809.MICROBIOLOGYEVENTS
2023-08-10 12:31:18,493 - Standardise - INFO - Importing data to table: omop_migration_source_20230809.MICROBIOLOGYEVENTS
2023-08-10 12:31:49,696 - Standardise - INFO - Creating table: omop_migration_source_20230809.PHARMACY


  df = pd.read_csv(filePath, sep=fileSeparator)


2023-08-10 12:32:00,251 - Standardise - INFO - Importing data to table: omop_migration_source_20230809.PHARMACY
2023-08-10 12:34:55,628 - Standardise - INFO - Creating table: omop_migration_source_20230809.PROCEDUREEVENTS
2023-08-10 12:34:56,807 - Standardise - INFO - Importing data to table: omop_migration_source_20230809.PROCEDUREEVENTS
2023-08-10 12:35:14,815 - Standardise - INFO - Creating table: omop_migration_source_20230809.D_ITEMS
2023-08-10 12:35:14,867 - Standardise - INFO - Importing data to table: omop_migration_source_20230809.D_ITEMS
2023-08-10 12:35:15,086 - Standardise - INFO - Creating table: omop_migration_source_20230809.DATETIMEEVENTS
2023-08-10 12:35:19,949 - Standardise - INFO - Importing data to table: omop_migration_source_20230809.DATETIMEEVENTS
2023-08-10 12:36:54,870 - Standardise - INFO - Creating table: omop_migration_source_20230809.CHARTEVENTS
2023-08-10 12:36:54,887 - Standardise - INFO - Creating child table: omop_migration_source_20230809.CHARTEVENTS_1

  df = pd.read_csv(filePath, sep=fileSeparator)


2023-08-10 12:41:41,095 - Standardise - INFO - Importing data to table: omop_migration_source_20230809.CHARTEVENTS
2023-08-10 14:19:34,848 - Standardise - INFO - End!!


0

### Stage the data on the ETL schema

In [3]:
import os

os.system(
    '''cd /superbugai-data/yash/chapter_1/workspace/EHRQC;'''
    +
    '''.venv/bin/python -m ehrqc.standardise.migrate_omop.Run -s'''
    )

2023-08-10 14:24:14,077 - Standardise - INFO - Parsing command line arguments
2023-08-10 14:24:14,078 - Standardise - INFO - Start!!
2023-08-10 14:24:14,156 - Standardise - INFO - Creating schema: omop_migration_etl_20230809
2023-08-10 14:24:14,164 - Standardise - INFO - Staging EHR data
2023-08-10 14:24:14,164 - Standardise - INFO - Creating staging table: omop_migration_etl_20230809.src_patients
2023-08-10 14:24:14,865 - Standardise - INFO - Creating staging table: omop_migration_etl_20230809.src_admissions
2023-08-10 14:24:15,084 - Standardise - INFO - Creating staging table: omop_migration_etl_20230809.src_transfers
2023-08-10 14:24:15,715 - Standardise - INFO - Creating staging table: omop_migration_etl_20230809.src_diagnoses_icd
2023-08-10 14:24:17,887 - Standardise - INFO - Creating staging table: omop_migration_etl_20230809.src_services
2023-08-10 14:24:18,155 - Standardise - INFO - Creating staging table: omop_migration_etl_20230809.src_labevents
2023-08-10 14:25:27,470 - Stan

0

### Import custom mapping file

In [4]:
import os

os.system(
    '''cd /superbugai-data/yash/chapter_1/workspace/EHRQC;'''
    +
    '''.venv/bin/python -m ehrqc.standardise.migrate_omop.Run -c'''
    )

2023-08-10 15:03:36,337 - Standardise - INFO - Parsing command line arguments
2023-08-10 15:03:36,338 - Standardise - INFO - Start!!
2023-08-10 15:03:36,356 - Standardise - INFO - Creating schema: omop_migration_etl_20230809
2023-08-10 15:03:36,356 - Standardise - INFO - Import Custom Lookups
2023-08-10 15:03:36,356 - Standardise - INFO - Creating table: omop_migration_etl_20230809.tmp_custom_mapping
2023-08-10 15:03:36,416 - Standardise - INFO - Loading table: omop_migration_etl_20230809.tmp_custom_mapping
2023-08-10 15:03:36,533 - Standardise - INFO - Importing data to table: omop_migration_etl_20230809.tmp_custom_mapping
2023-08-10 15:03:36,838 - Standardise - INFO - Process Custom Lookups
2023-08-10 15:03:36,839 - Standardise - INFO - Creating table: vocabulary_test_20230809.tmp_custom_concept
2023-08-10 15:03:36,899 - Standardise - INFO - Creating table: vocabulary_test_20230809.tmp_custom_concept_relationship
2023-08-10 15:03:36,923 - Standardise - INFO - Creating table: vocabula

0

### Perform migration Extract-Transform-Load (ETL) operations

In [5]:
import os

os.system(
    '''cd /superbugai-data/yash/chapter_1/workspace/EHRQC;'''
    +
    '''.venv/bin/python -m ehrqc.standardise.migrate_omop.Run -e'''
    )

2023-08-10 15:13:34,646 - Standardise - INFO - Parsing command line arguments
2023-08-10 15:13:34,647 - Standardise - INFO - Start!!
2023-08-10 15:13:34,664 - Standardise - INFO - Creating schema: omop_migration_etl_20230809
2023-08-10 15:13:34,665 - Standardise - INFO - Performing ETL
2023-08-10 15:13:34,665 - Standardise - INFO - Creating table: omop_migration_etl_20230809.cdm_location
2023-08-10 15:13:34,673 - Standardise - INFO - Creating table: omop_migration_etl_20230809.tmp_subject_ethnicity
2023-08-10 15:13:34,877 - Standardise - INFO - Creating table: omop_migration_etl_20230809.lk_pat_ethnicity_concept
2023-08-10 15:14:37,181 - Standardise - INFO - Creating table: omop_migration_etl_20230809.tmp_subject_ethnicity
2023-08-10 15:14:37,186 - Standardise - INFO - Creating table: omop_migration_etl_20230809.lk_death_adm_mapped
2023-08-10 15:14:37,284 - Standardise - INFO - Creating table: omop_migration_etl_20230809.cdm_death
2023-08-10 15:14:37,387 - Standardise - INFO - Creating

0

### Unload data to CDM schema

In [6]:
import os

os.system(
    '''cd /superbugai-data/yash/chapter_1/workspace/EHRQC;'''
    +
    '''.venv/bin/python -m ehrqc.standardise.migrate_omop.Run -u'''
    )

2023-08-10 18:38:30,638 - Standardise - INFO - Parsing command line arguments
2023-08-10 18:38:30,639 - Standardise - INFO - Start!!
2023-08-10 18:38:30,742 - Standardise - INFO - Creating schema: omop_test_20230809
2023-08-10 18:38:30,801 - Standardise - INFO - Unloading migrated data to CDM schema
2023-08-10 18:38:30,801 - Standardise - INFO - Unloading table: vocabulary_test_20230809.concept
2023-08-10 18:39:22,753 - Standardise - INFO - Unloading table: vocabulary_test_20230809.vocabulary
2023-08-10 18:39:22,843 - Standardise - INFO - Unloading table: vocabulary_test_20230809.domain
2023-08-10 18:39:22,927 - Standardise - INFO - Unloading table: vocabulary_test_20230809.concept_class
2023-08-10 18:39:22,961 - Standardise - INFO - Unloading table: vocabulary_test_20230809.concept_relationship
2023-08-10 18:42:18,246 - Standardise - INFO - Unloading table: vocabulary_test_20230809.relationship
2023-08-10 18:42:18,483 - Standardise - INFO - Unloading table: vocabulary_test_20230809.co

0