# Analysing the coverage of individual attributes

## Get DB Connection

In [1]:
import os
import psycopg2


def getConnection():
    # Connect to postgres with a copy of the MIMIC-III database
    con = psycopg2.connect(
        dbname=os.environ['POSTGRES_DB_NAME'],
        user=os.environ['POSTGRES_USER_NAME'],
        host=os.environ['POSTGRES_HOSTNAME'],
        port=os.environ['POSTGRES_PORT_NUMBER'],
        password=os.environ['POSTGRES_PASSWORD']
        )
    return con

## Concept Coverage Analysis

### Labs

In [2]:
import pandas as pd


con = getConnection()

coverageQuery = '''
    select
    con.concept_name,
    con.concept_id,
    (count(distinct mmt.person_id)::float * 100)/(select count(distinct person_id) from omop_test_20220817.cohort_micro) as person_level_coverage,
    (count(distinct mmt.visit_occurrence_id)::float * 100)/(select count(distinct visit_occurrence_id) from omop_test_20220817.cohort_micro) as episode_level_coverage
    from
    omop_migration_etl_20220817.cdm_measurement mmt
    inner join omop_migration_etl_20220817.voc_concept con
    on con.concept_id = mmt.measurement_concept_id
    inner join omop_test_20220817.cohort_micro coh
    on coh.person_id = mmt.person_id and coh.visit_occurrence_id = mmt.visit_occurrence_id
    where mmt.unit_id like '%labevents%'
    group by con.concept_name, con.concept_id order by person_level_coverage desc;
'''

coverageDf = pd.read_sql_query(coverageQuery, con)

coverageDf

  coverageDf = pd.read_sql_query(coverageQuery, con)


Unnamed: 0,concept_name,concept_id,person_level_coverage,episode_level_coverage
0,MCHC [Mass/volume] by Automated count,3009744,99.944176,99.950739
1,Leukocytes [#/volume] in Blood by Manual count,3003282,99.944176,99.950739
2,Platelets [#/volume] in Blood by Automated count,3024929,99.944176,99.950739
3,MCV [Entitic volume] by Automated count,3023599,99.944176,99.950739
4,MCH [Entitic mass] by Automated count,3012030,99.944176,99.950739
...,...,...,...,...
210,Crystals [type] in Urine sediment by Light mic...,3025590,0.018608,0.016420
211,N-acetylprocainamide [Mass/volume] in Serum or...,3001706,0.018608,0.016420
212,Leucine crystals [Presence] in Urine sediment ...,3019169,0.018608,0.016420
213,Coagulation factor XIII coagulum dissolution [...,3019757,0.018608,0.016420


In [3]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

coverageDf

Unnamed: 0,concept_name,concept_id,person_level_coverage,episode_level_coverage
0,MCHC [Mass/volume] by Automated count,3009744,99.944176,99.950739
1,Leukocytes [#/volume] in Blood by Manual count,3003282,99.944176,99.950739
2,Platelets [#/volume] in Blood by Automated count,3024929,99.944176,99.950739
3,MCV [Entitic volume] by Automated count,3023599,99.944176,99.950739
4,MCH [Entitic mass] by Automated count,3012030,99.944176,99.950739
5,Erythrocyte distribution width [Ratio] by Automated count,3019897,99.944176,99.950739
6,Erythrocytes [#/volume] in Blood by Automated count,3020416,99.944176,99.950739
7,Hematocrit [Volume Fraction] of Blood by Automated count,3023314,99.944176,99.950739
8,Hemoglobin [Mass/volume] in Blood,3000963,99.944176,99.950739
9,Creatinine [Mass/volume] in Serum or Plasma,3016723,99.851135,99.868637


## Vitals

In [4]:
import pandas as pd


con = getConnection()

coverageQuery = '''
    select
    con.concept_name,
    con.concept_id,
    mmt.measurement_source_value,
    (count(distinct mmt.person_id)::float * 100)/(select count(distinct person_id) from omop_test_20220817.cohort_micro) as person_level_coverage,
    (count(distinct mmt.visit_occurrence_id)::float * 100)/(select count(distinct visit_occurrence_id) from omop_test_20220817.cohort_micro) as episode_level_coverage
    from
    omop_migration_etl_20220817.cdm_measurement mmt
    inner join omop_migration_etl_20220817.voc_concept con
    on con.concept_id = mmt.measurement_concept_id
    inner join omop_test_20220817.cohort_micro coh
    on coh.person_id = mmt.person_id and coh.visit_occurrence_id = mmt.visit_occurrence_id
    where mmt.unit_id like '%chartevents%'
    group by con.concept_name, con.concept_id, mmt.measurement_source_value order by person_level_coverage desc;
'''

coverageDf = pd.read_sql_query(coverageQuery, con)

coverageDf

  coverageDf = pd.read_sql_query(coverageQuery, con)


Unnamed: 0,concept_name,concept_id,measurement_source_value,person_level_coverage,episode_level_coverage
0,Heart rate,3027018,220045,58.113137,54.220033
1,Body weight,3025315,226512,58.075921,54.187192
2,Oxygen saturation in Arterial blood by Pulse oximetry,40762499,220277,58.057313,54.154351
3,Respiratory rate,3024171,220210,58.057313,54.170772
4,Heart rate rhythm,3022318,220048,57.927056,54.022989
5,Glasgow coma score verbal,3009094,223900,57.927056,54.055829
6,Glasgow coma score motor,3008223,223901,57.927056,54.055829
7,Glasgow coma score eye opening,3016335,220739,57.927056,54.055829
8,Skin integrity,21492838,224026,57.796799,53.940887
9,Color of Skin,3043970,224028,57.778191,53.908046


In [5]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

coverageDf

Unnamed: 0,concept_name,concept_id,measurement_source_value,person_level_coverage,episode_level_coverage
0,Heart rate,3027018,220045,58.113137,54.220033
1,Body weight,3025315,226512,58.075921,54.187192
2,Oxygen saturation in Arterial blood by Pulse oximetry,40762499,220277,58.057313,54.154351
3,Respiratory rate,3024171,220210,58.057313,54.170772
4,Heart rate rhythm,3022318,220048,57.927056,54.022989
5,Glasgow coma score verbal,3009094,223900,57.927056,54.055829
6,Glasgow coma score motor,3008223,223901,57.927056,54.055829
7,Glasgow coma score eye opening,3016335,220739,57.927056,54.055829
8,Skin integrity,21492838,224026,57.796799,53.940887
9,Color of Skin,3043970,224028,57.778191,53.908046
