# Analysing the coverage of individual attributes

## Get DB Connection

In [2]:
import os
import psycopg2


def getConnection():
    # Connect to postgres with a copy of the MIMIC-III database
    con = psycopg2.connect(
        dbname=os.environ['POSTGRES_DB_NAME'],
        user=os.environ['POSTGRES_USER_NAME'],
        host=os.environ['POSTGRES_HOSTNAME'],
        port=os.environ['POSTGRES_PORT_NUMBER'],
        password=os.environ['POSTGRES_PASSWORD']
        )
    return con

## Concept Coverage Analysis

### Labs

In [9]:
import pandas as pd


con = getConnection()

coverageQuery = '''
    select
    con.concept_name,
    con.concept_id,
    (count(distinct mmt.person_id)::float * 100)/(select count(distinct person_id) from omop_test_20220817.cohort_icd) as person_level_coverage,
    (count(distinct mmt.visit_occurrence_id)::float * 100)/(select count(distinct visit_occurrence_id) from omop_test_20220817.cohort_icd) as episode_level_coverage
    from
    omop_migration_etl_20220817.cdm_measurement mmt
    inner join omop_migration_etl_20220817.voc_concept con
    on con.concept_id = mmt.measurement_concept_id
    inner join omop_test_20220817.cohort_icd coh
    on coh.person_id = mmt.person_id and coh.visit_occurrence_id = mmt.visit_occurrence_id
    where mmt.unit_id like '%labevents%'
    group by con.concept_name, con.concept_id order by person_level_coverage desc;
'''

coverageDf = pd.read_sql_query(coverageQuery, con)

coverageDf

  coverageDf = pd.read_sql_query(coverageQuery, con)


Unnamed: 0,concept_name,concept_id,person_level_coverage,episode_level_coverage
0,No matching concept,0,99.722222,99.722222
1,Leukocytes [#/volume] in Blood by Manual count,3003282,99.705882,99.705882
2,Hematocrit [Volume Fraction] of Blood by Autom...,3023314,99.697712,99.697712
3,MCHC [Mass/volume] by Automated count,3009744,99.697712,99.697712
4,MCH [Entitic mass] by Automated count,3012030,99.697712,99.697712
...,...,...,...,...
210,Coagulation factor XII activity actual/normal ...,3002348,0.032680,0.032680
211,Calcium carbonate crystals [Presence] in Urine...,3007501,0.024510,0.024510
212,Leucine crystals [Presence] in Urine sediment ...,3019169,0.016340,0.016340
213,Coagulation factor XIII coagulum dissolution [...,3019757,0.008170,0.008170


In [13]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

coverageDf

Unnamed: 0,concept_name,concept_id,person_level_coverage,episode_level_coverage
0,No matching concept,0,99.722222,99.722222
1,Leukocytes [#/volume] in Blood by Manual count,3003282,99.705882,99.705882
2,Hematocrit [Volume Fraction] of Blood by Automated count,3023314,99.697712,99.697712
3,MCHC [Mass/volume] by Automated count,3009744,99.697712,99.697712
4,MCH [Entitic mass] by Automated count,3012030,99.697712,99.697712
5,Erythrocyte distribution width [Ratio] by Automated count,3019897,99.697712,99.697712
6,MCV [Entitic volume] by Automated count,3023599,99.697712,99.697712
7,Erythrocytes [#/volume] in Blood by Automated count,3020416,99.697712,99.697712
8,Platelets [#/volume] in Blood by Automated count,3024929,99.697712,99.697712
9,Hemoglobin [Mass/volume] in Blood,3000963,99.697712,99.697712


## Vitals

In [9]:
import pandas as pd


con = getConnection()

coverageQuery = '''
    select
    con.concept_name,
    con.concept_id,
    mmt.measurement_source_value,
    (count(distinct mmt.person_id)::float * 100)/(select count(distinct person_id) from omop_test_20220817.cohort_icd) as person_level_coverage,
    (count(distinct mmt.visit_occurrence_id)::float * 100)/(select count(distinct visit_occurrence_id) from omop_test_20220817.cohort_icd) as episode_level_coverage
    from
    omop_migration_etl_20220817.cdm_measurement mmt
    inner join omop_migration_etl_20220817.voc_concept con
    on con.concept_id = mmt.measurement_concept_id
    inner join omop_test_20220817.cohort_icd coh
    on coh.person_id = mmt.person_id and coh.visit_occurrence_id = mmt.visit_occurrence_id
    where mmt.unit_id like '%chartevents%'
    group by con.concept_name, con.concept_id, mmt.measurement_source_value order by person_level_coverage desc;
'''

coverageDf = pd.read_sql_query(coverageQuery, con)

coverageDf

  coverageDf = pd.read_sql_query(coverageQuery, con)


Unnamed: 0,concept_name,concept_id,measurement_source_value,person_level_coverage,episode_level_coverage
0,Respiratory rate,3024171,220210,69.550654,69.550654
1,Heart rate,3027018,220045,69.550654,69.550654
2,Body weight,3025315,226512,69.526144,69.526144
3,Oxygen saturation in Arterial blood by Pulse oximetry,40762499,220277,69.468954,69.468954
4,Glasgow coma score eye opening,3016335,220739,69.444444,69.444444
5,Glasgow coma score verbal,3009094,223900,69.419935,69.419935
6,Glasgow coma score motor,3008223,223901,69.403595,69.403595
7,Heart rate rhythm,3022318,220048,69.370915,69.370915
8,Physical findings of Abdomen by Palpation,21494966,224003,69.330065,69.330065
9,Skin integrity,21492838,224026,69.240196,69.240196


In [4]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

coverageDf

Unnamed: 0,concept_name,concept_id,person_level_coverage,episode_level_coverage
0,Heart rate,3027018,69.550654,69.550654
1,Respiratory rate,3024171,69.550654,69.550654
2,Body weight,3025315,69.526144,69.526144
3,Oxygen saturation in Arterial blood by Pulse oximetry,40762499,69.468954,69.468954
4,Glasgow coma score eye opening,3016335,69.444444,69.444444
5,Glasgow coma score verbal,3009094,69.419935,69.419935
6,Glasgow coma score motor,3008223,69.403595,69.403595
7,Heart rate rhythm,3022318,69.370915,69.370915
8,Physical findings of Abdomen by Palpation,21494966,69.330065,69.330065
9,Body temperature,3020891,69.281046,69.281046
