# Analysing the coverage of individual attributes

## A function to establish the DB connection

In [1]:
import os

import psycopg2


def getConnection():
    # Connect to postgres with a copy of the MIMIC-III database
    con = psycopg2.connect(
        dbname='omop_migration_etl_20250416',
        user=os.environ['POSTGRES_USER_NAME'],
        host=os.environ['POSTGRES_HOSTNAME'],
        port=os.environ['POSTGRES_PORT_NUMBER'],
        password=os.environ['POSTGRES_PASSWORD']
        )
    return con


## Coverage analysis for vitals

In [4]:
import pandas as pd


con = getConnection()

coverageQuery = '''
    select
        con.concept_name,
        con.concept_code,
        (count(distinct mmt.person_id)::float * 100)/(select count(distinct(person_id)) from omop_migration_etl_20250416.cdm_measurement) as person_level_coverage,
        (count(distinct mmt.visit_occurrence_id)::float * 100)/(select count(distinct(visit_occurrence_id)) from omop_migration_etl_20250416.cdm_measurement) as episode_level_coverage
    from
        omop_migration_etl_20250416.cdm_measurement mmt
    inner join omop_migration_etl_20250416.concept con
        on con.concept_code = mmt.measurement_concept_id
    where
        mmt.unit_id = 'chartevents'
    group by
        con.concept_name, con.concept_code
    order by
        episode_level_coverage desc
    ;
'''

coverageDf = pd.read_sql_query(coverageQuery, con)
coverageDf

  coverageDf = pd.read_sql_query(coverageQuery, con)


Unnamed: 0,concept_name,concept_code,person_level_coverage,episode_level_coverage
0,Systolic blood pressure,271649006,79.559387,46.205586
1,Diastolic blood pressure,271650006,79.482759,46.148807
2,Peripheral oxygen saturation,431314004,80.498084,45.738246
3,Temperature,246508008,78.927203,43.665786
4,Respiratory rate,86290005,78.850575,43.602455
5,Mean blood pressure,6797001,77.796935,37.786901
6,Heart rate,364075005,67.605364,36.070407
7,Peripheral pulse,54718008,78.237548,29.263392
8,Body weight,27113001,68.850575,24.109541
9,Oxygen,24099007,58.888889,16.267389


## Coverage analysis for labs

In [5]:
import pandas as pd


con = getConnection()

coverageQuery = '''
    select
        con.concept_name,
        con.concept_code,
        (count(distinct mmt.person_id)::float * 100)/(select count(distinct(person_id)) from omop_migration_etl_20250416.cdm_measurement) as person_level_coverage,
        (count(distinct mmt.visit_occurrence_id)::float * 100)/(select count(distinct(visit_occurrence_id)) from omop_migration_etl_20250416.cdm_measurement) as episode_level_coverage
    from
        omop_migration_etl_20250416.cdm_measurement mmt
    inner join omop_migration_etl_20250416.concept con
        on con.concept_code = mmt.measurement_concept_id
    where
        mmt.unit_id = 'labevents'
    group by
        con.concept_name, con.concept_code
    order by
        episode_level_coverage desc
    ;
'''

coverageDf = pd.read_sql_query(coverageQuery, con)
coverageDf

  coverageDf = pd.read_sql_query(coverageQuery, con)


Unnamed: 0,concept_name,concept_code,person_level_coverage,episode_level_coverage
0,Haemoglobin estimation,1022431000000105,99.923372,79.568037
1,Eosinophil count,71960002,99.885057,78.642091
2,Monocyte,55918008,99.885057,78.609334
3,Lymphocyte,56972008,99.885057,78.602782
4,Red blood cell count,14089001,99.904215,78.423708
5,Hematocrit,165418001,99.904215,78.423708
6,MCHC - Mean corpuscular haemoglobin concentration,1022481000000109,99.904215,78.421524
7,MCH - Mean corpuscular haemoglobin,1022471000000107,99.904215,78.421524
8,MCV - Mean corpuscular volume,1022491000000106,99.904215,78.417156
9,Red blood cell distribution width,993501000000105,99.904215,78.366928
