# Intialisation Steps
Connecting to Snowflake

## Importing required modules

In [2]:
import pandas as pd
from drg_connect import Snowflake
import qgrid 
from datetime import timedelta, datetime
import math

import warnings
warnings.filterwarnings('ignore')

## Snowflake connection parameter

In [3]:
##defining parameters of snowflake
snow = Snowflake(role = 'RWD_ANALYTICS_RW',database='SANDBOX_ANALYTICS',schema = 'SANDBOX')
engine = snow.engine

%reload_ext sql_magic
%config SQL.output_result = True  #Enable output to std out
%config SQL.notify_result = False #disable browser notifications
%config SQL.conn_name = 'engine'  #Set the sql_magic connection engine

# Queries from here

# Creating reference table for ICD+SNOMED codes

## Category 1: Atherosclerotic CVD

### Relevant ICD codes

In [5]:
%%read_sql

select
    *
from
    RWD_DB.RWD.ICD_GROUPER
where
    level_4_short_description_icd10 ilike '%atherosclerotic%'
    or level_4_short_description_icd10 ilike '%coronary%'
    or level_4_short_description_icd10 ilike '%myocardial%'

Query started at 01:27:54 PM India Standard Time; Query executed in 0.06 m

Unnamed: 0,id,level_1,level_1_description,level_2,level_2_description,level_3,level_3_description,level_4,icd9_mapped_codes,icd9_description,level_4_short_description_icd10,level_4_long_description_icd10,create_ts,update_ts
0,10867,I00 - I999,Diseases of the circulatory system,I20-I25,Ischemic heart diseases,I21-I219,Acute myocardial infarction,I21,No map,No map,Acute myocardial infarction,Acute myocardial infarction,2017-11-02,2017-11-02
1,10868,I00 - I999,Diseases of the circulatory system,I20-I25,Ischemic heart diseases,I21-I219,Acute myocardial infarction,I210,No map,No map,ST elevation (STEMI) myocardial infarction of ...,ST elevation (STEMI) myocardial infarction of ...,2017-11-02,2017-11-02
2,10869,I00 - I999,Diseases of the circulatory system,I20-I25,Ischemic heart diseases,I21-I219,Acute myocardial infarction,I2101,No map,No map,STEMI involving left main coronary artery,ST elevation (STEMI) myocardial infarction inv...,2017-11-02,2017-11-02
3,10870,I00 - I999,Diseases of the circulatory system,I20-I25,Ischemic heart diseases,I21-I219,Acute myocardial infarction,I2102,No map,No map,STEMI involving left anterior descending coron...,ST elevation (STEMI) myocardial infarction inv...,2017-11-02,2017-11-02
4,10871,I00 - I999,Diseases of the circulatory system,I20-I25,Ischemic heart diseases,I21-I219,Acute myocardial infarction,I2109,41000,Acute myocardial infarction of anterolateral w...,STEMI involving oth coronary artery of anterio...,ST elevation (STEMI) myocardial infarction inv...,2017-11-02,2017-11-02
5,10872,I00 - I999,Diseases of the circulatory system,I20-I25,Ischemic heart diseases,I21-I219,Acute myocardial infarction,I211,No map,No map,ST elevation (STEMI) myocardial infarction of ...,ST elevation (STEMI) myocardial infarction of ...,2017-11-02,2017-11-02
6,10873,I00 - I999,Diseases of the circulatory system,I20-I25,Ischemic heart diseases,I21-I219,Acute myocardial infarction,I2111,41030,Acute myocardial infarction of inferoposterior...,STEMI involving right coronary artery,ST elevation (STEMI) myocardial infarction inv...,2017-11-02,2017-11-02
7,10874,I00 - I999,Diseases of the circulatory system,I20-I25,Ischemic heart diseases,I21-I219,Acute myocardial infarction,I2119,41020,Acute myocardial infarction of inferolateral w...,STEMI involving oth coronary artery of inferio...,ST elevation (STEMI) myocardial infarction inv...,2017-11-02,2017-11-02
8,10875,I00 - I999,Diseases of the circulatory system,I20-I25,Ischemic heart diseases,I21-I219,Acute myocardial infarction,I212,No map,No map,ST elevation (STEMI) myocardial infarction of ...,ST elevation (STEMI) myocardial infarction of ...,2017-11-02,2017-11-02
9,10876,I00 - I999,Diseases of the circulatory system,I20-I25,Ischemic heart diseases,I21-I219,Acute myocardial infarction,I2121,No map,No map,STEMI involving left circumflex coronary artery,ST elevation (STEMI) myocardial infarction inv...,2017-11-02,2017-11-02


In [108]:
%%read_sql

create or replace temporary table st_ath_ref1 as

select
    'dx' as cat1,
    'cvd' cat2,
    'ath' as cat3,
    null as cat4,
    null as cat5,
    null as cat6,
    'icd_10' as type,
    cast(level_4 as varchar(10))value,
    level_4_long_description_icd10 as description,
    current_date() as active_date,
    'Swar' as active_reason,
    null as deactive_date,
    null as deactive_reason,
    'RWD_DB.RWD.ICD_GROUPER' as source
from 
    RWD_DB.RWD.ICD_GROUPER
where
    level_4_short_description_icd10 ilike '%atherosclerotic%'
    or level_4_short_description_icd10 ilike '%coronary%'
    or level_4_short_description_icd10 ilike '%myocardial%'
    or (level_4_short_description_icd10 ilike '%peripheral%'
        and level_4_short_description_icd10 ilike '%vascular%')
    or (level_4_short_description_icd10 ilike '%peripheral%'
        and level_4_short_description_icd10 ilike '%arterial%')
    or (level_4_short_description_icd10 ilike '%ischemic%'
        and level_4_short_description_icd10 ilike '%stroke%')
    or (level_4_short_description_icd10 ilike '%carotid%'
        and level_4_short_description_icd10 ilike '%endarterectomy%')
    or (level_4_short_description_icd10 ilike '%carotid%'
        and level_4_short_description_icd10 ilike '%stenting%')

Query started at 10:15:17 PM India Standard Time; Query executed in 0.14 m

Unnamed: 0,status
0,Table ST_ATH_REF1 successfully created.


### Relevant SNOMED codes

In [8]:
%%read_sql

select * from 
    
    RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT 

where 
    fully_specified_name ilike '%atherosclerotic%'
    or fully_specified_name ilike '%ischemic stroke%'
    or fully_specified_name ilike '%abdominal aortic aneurysm%'
    or fully_specified_name ilike '%percutaneous coronary intervention%'
    or fully_specified_name ilike '%coronary artery bypass%'
    or fully_specified_name ilike '%coronary%'
    or fully_specified_name ilike '%myocardial%'
    or (fully_specified_name ilike '%peripheral%'
        and fully_specified_name ilike '%vascular%')
    or (fully_specified_name ilike '%peripheral%'
        and fully_specified_name ilike '%arterial%')
    or (fully_specified_name ilike '%ischemic%'
        and fully_specified_name ilike '%stroke%')
    or (fully_specified_name ilike '%carotid%'
        and fully_specified_name ilike '%endarterectomy%')
    or (fully_specified_name ilike '%carotid%'
        and fully_specified_name ilike '%stenting%')

Query started at 02:05:48 PM India Standard Time; Query executed in 0.15 m

Unnamed: 0,concept_id,concept_status,fully_specified_name,ctvc_id,snomed_id,is_primitive
0,194815007,2,Aborted myocardial infarction (disorder),G3110,D3-13011,1
1,244256001,0,Entire left posterior descending branch of cir...,X74eN,T-4313B,1
2,448652001,0,Percutaneous transluminal injection of therape...,XUkou,P1-33503,1
3,233998004,2,Coronary artery dissection (disorder),X204f,D3-13004,1
4,194641003,0,Entire atrioventricular node branch of right c...,XS0hI,T-4321A,1
5,194821006,0,Coronary thrombosis not resulting in myocardia...,G312.,D3-13015,1
6,433448008,0,Family history of carotid endarterectomy (situ...,XUhDg,F-05020,0
7,204492002,6,Peripheral vascular system anomaly NOS (disorder),P76z.,D4-3002A,1
8,194366007,0,Entire atrioventricular branch of circumflex b...,XS0h8,T-43135,1
9,44927006,0,Measurement of coronary blood flow (regime/the...,XUEJm,P2-36174,1


In [109]:
%%read_sql

create or replace temporary table st_ath_ref2 as

select
    'dx' as cat1,
    'cvd' cat2,
    'ath' as cat3,
    null as cat4,
    null as cat5,
    null as cat6,
    'snomed' as type,
    cast(concept_id as varchar(30))value,
    fully_specified_name as description,
    current_date() as active_date,
    'Swar' as active_reason,
    null as deactive_date,
    null as deactive_reason,
    'RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT' as source
from 
    RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
where
    fully_specified_name ilike '%atherosclerotic%'
    or fully_specified_name ilike '%ischemic stroke%'
    or fully_specified_name ilike '%abdominal aortic aneurysm%'
    or fully_specified_name ilike '%percutaneous coronary intervention%'
    or fully_specified_name ilike '%coronary artery bypass%'
    or fully_specified_name ilike '%coronary%'
    or fully_specified_name ilike '%myocardial%'
    or (fully_specified_name ilike '%peripheral%'
        and fully_specified_name ilike '%vascular%')
    or (fully_specified_name ilike '%peripheral%'
        and fully_specified_name ilike '%arterial%')
    or (fully_specified_name ilike '%ischemic%'
        and fully_specified_name ilike '%stroke%')
    or (fully_specified_name ilike '%carotid%'
        and fully_specified_name ilike '%endarterectomy%')
    or (fully_specified_name ilike '%carotid%'
        and fully_specified_name ilike '%stenting%')

Query started at 10:15:28 PM India Standard Time; Query executed in 0.17 m

Unnamed: 0,status
0,Table ST_ATH_REF2 successfully created.


## Category 2: Diabetes no atherosclerotic CVD

### ICD codes

In [12]:
%%read_sql

select
    *
from
    RWD_DB.RWD.ICD_GROUPER
where
    level_4_short_description_icd10 ilike '%diabetes%'
    and not (level_4_long_description_icd10 ilike '%gestational%'
             and level_4_long_description_icd10 ilike '%diabetes%')

Query started at 02:45:50 PM India Standard Time; Query executed in 0.12 m

Unnamed: 0,id,level_1,level_1_description,level_2,level_2_description,level_3,level_3_description,level_4,icd9_mapped_codes,icd9_description,level_4_short_description_icd10,level_4_long_description_icd10,create_ts,update_ts
0,3714,E00 - E8989,Endocrine nutritional and metabolic diseases,E08-E13,Diabetes mellitus,E08-E089,Diabetes mellitus due to underlying condition,E08,No map,No map,Diabetes mellitus due to underlying condition,Diabetes mellitus due to underlying condition,2017-11-02,2017-11-02
1,3715,E00 - E8989,Endocrine nutritional and metabolic diseases,E08-E13,Diabetes mellitus,E08-E089,Diabetes mellitus due to underlying condition,E080,No map,No map,Diabetes due to underlying condition w hyperos...,Diabetes mellitus due to underlying condition ...,2017-11-02,2017-11-02
2,3717,E00 - E8989,Endocrine nutritional and metabolic diseases,E08-E13,Diabetes mellitus,E08-E089,Diabetes mellitus due to underlying condition,E0801,24920,Secondary diabetes mellitus with hyperosmolari...,Diabetes due to underlying condition w hyprosm...,Diabetes mellitus due to underlying condition ...,2017-11-02,2017-11-02
3,3718,E00 - E8989,Endocrine nutritional and metabolic diseases,E08-E13,Diabetes mellitus,E08-E089,Diabetes mellitus due to underlying condition,E081,No map,No map,Diabetes mellitus due to underlying condition ...,Diabetes mellitus due to underlying condition ...,2017-11-02,2017-11-02
4,3719,E00 - E8989,Endocrine nutritional and metabolic diseases,E08-E13,Diabetes mellitus,E08-E089,Diabetes mellitus due to underlying condition,E0810,24910,Secondary diabetes mellitus with ketoacidosis ...,Diabetes due to underlying condition w ketoaci...,Diabetes mellitus due to underlying condition ...,2017-11-02,2017-11-02
5,3720,E00 - E8989,Endocrine nutritional and metabolic diseases,E08-E13,Diabetes mellitus,E08-E089,Diabetes mellitus due to underlying condition,E0811,24930,Secondary diabetes mellitus with other coma no...,Diabetes due to underlying condition w ketoaci...,Diabetes mellitus due to underlying condition ...,2017-11-02,2017-11-02
6,3721,E00 - E8989,Endocrine nutritional and metabolic diseases,E08-E13,Diabetes mellitus,E08-E089,Diabetes mellitus due to underlying condition,E082,No map,No map,Diabetes due to underlying condition w kidney ...,Diabetes mellitus due to underlying condition ...,2017-11-02,2017-11-02
7,3722,E00 - E8989,Endocrine nutritional and metabolic diseases,E08-E13,Diabetes mellitus,E08-E089,Diabetes mellitus due to underlying condition,E0821,24940,Secondary diabetes mellitus with renal manifes...,Diabetes due to underlying condition w diabeti...,Diabetes mellitus due to underlying condition ...,2017-11-02,2017-11-02
8,3723,E00 - E8989,Endocrine nutritional and metabolic diseases,E08-E13,Diabetes mellitus,E08-E089,Diabetes mellitus due to underlying condition,E0822,No map,No map,Diabetes due to undrl cond w diabetic chronic ...,Diabetes mellitus due to underlying condition ...,2017-11-02,2017-11-02
9,3724,E00 - E8989,Endocrine nutritional and metabolic diseases,E08-E13,Diabetes mellitus,E08-E089,Diabetes mellitus due to underlying condition,E0829,No map,No map,Diabetes due to undrl condition w oth diabetic...,Diabetes mellitus due to underlying condition ...,2017-11-02,2017-11-02


In [110]:
%%read_sql

create or replace temporary table st_ath_ref3 as

select
    'dx' as cat1,
    'no_cvd' cat2,
    'dia' as cat3,
    null as cat4,
    null as cat5,
    null as cat6,
    'icd_10' as type,
    cast(level_4 as varchar(10))value, 
    level_4_long_description_icd10 as description,
    current_date() as active_date,
    'Swar' as active_reason,
    null as deactive_date,
    null as deactive_reason,
    'RWD_DB.RWD.ICD_GROUPER' as source
from 
    RWD_DB.RWD.ICD_GROUPER
where
    level_4_short_description_icd10 ilike '%diabetes%'
    and not (level_4_long_description_icd10 ilike '%gestational%'
             and level_4_long_description_icd10 ilike '%diabetes%')

Query started at 10:15:42 PM India Standard Time; Query executed in 0.12 m

Unnamed: 0,status
0,Table ST_ATH_REF3 successfully created.


### SNOMED codes

In [111]:
%%read_sql

create or replace temporary table st_ath_ref8 as

select
    'dx' as cat1,
    'no_cvd' cat2,
    'dia' as cat3,
    null as cat4,
    null as cat5,
    null as cat6,
    'snomed' as type,
    cast(concept_id as varchar(30))value,
    fully_specified_name as description,
    current_date() as active_date,
    'Swar' as active_reason,
    null as deactive_date,
    null as deactive_reason,
    'RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT' as source
from 
    RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
where
    fully_specified_name ilike '%diabetes mellitus%'

Query started at 10:16:01 PM India Standard Time; Query executed in 0.08 m

Unnamed: 0,status
0,Table ST_ATH_REF8 successfully created.


In [4]:
%%read_sql

select * from RWD_DB.RWD.PELICAN_SMOKE

Query started at 12:49:37 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,smoke_id,concept_id,description
0,1,266919005,0 cigarettes per day (non-smoker or less than ...
1,7,77176002,Current status unknown
2,14,428071000124103,Heavy tobacco smoker
3,16,77176002,"Smoker, current status unknown"
4,31,228494002,Snuff user
5,22,405746006,Current non smoker but past smoking history un...
6,17,428041000124106,Current some day smoker
7,20,8517006,Ex-smoker
8,11,266919005,Never smoker
9,29,81703003,Chews tobacco


In [112]:
%%read_sql

create or replace temporary table st_ath_ref4 as

select
    'dx' as cat1,
    'no_cvd' cat2,
    'smoke' as cat3,
    null as cat4,
    null as cat5,
    null as cat6,
    'snomed' as type,
    cast(smoke_id as varchar(30))value,
    description as description,
    current_date() as active_date,
    'Swar' as active_reason,
    null as deactive_date,
    null as deactive_reason,
    'RWD_DB.RWD.PELICAN_SMOKE' as source
from 
    RWD_DB.RWD.PELICAN_SMOKE
where
  description not ilike '%snuff%'
    or description not ilike '%former smoker%'
    or description not ilike '%ex-smoker%'
    or description not ilike '%non-smoker%'

Query started at 10:16:27 PM India Standard Time; Query executed in 0.08 m

Unnamed: 0,status
0,Table ST_ATH_REF4 successfully created.


### Use transcript table for blood pressure values

In [6]:
%%read_sql

select * from RWD_DB.RWD.PELICAN_TRANSCRIPT limit 3

Query started at 12:59:39 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,transcript_id,patient_id,provider_id,signed_by_provider_id,signed_by_time,dos,weight,height,c_bmi,systolic_bp,diastolic_bp,pulse,resp_rate,note_type,last_modified
0,860388328923430349,36A6544A-E215-7D5A-D703-38A3F395FF9E,F050C815-53B5-068A-2C9B-ED64F30E7BF3,EF4FD2F0-EA17-228D-EB2A-37E32DDF5FC9,2015-09-26,2015-08-14,,,,,,,,SOAP Note,2015-09-26
1,5429098221239066103,36A6544A-E215-7D5A-D703-38A3F395FF9E,F050C815-53B5-068A-2C9B-ED64F30E7BF3,EF4FD2F0-EA17-228D-EB2A-37E32DDF5FC9,2016-02-25,2016-01-21,,,,,,,,SOAP Note,2016-02-25
2,1518559993145257351,22CA50D8-27FC-E4C4-3F25-1FDAAE139DCF,7F9AAD01-8951-9CFF-84D7-6D93523CB40C,7F9AAD01-8951-9CFF-84D7-6D93523CB40C,2012-09-25,2012-09-25,215.0,66.0,35.0,130.0,90.0,79.0,20.0,SOAP Note,2012-09-25


### Use loinc codes for albuminuria and hs-CRP

In [9]:
%%read_sql
    select
        loinc_num,
        component

    from RWD_DB.RWD.PELICAN_E_LOINC
    
    where component ilike '%C reactive protein%'
        or component ilike '%albumin/creatinine%'

Query started at 03:57:49 PM India Standard Time; Query executed in 0.04 m

Unnamed: 0,loinc_num,component
0,16503-5,C reactive protein
1,35648-5,C reactive protein
2,20621-9,Albumin/Creatinine
3,13705-9,Albumin/Creatinine
4,59182-6,C reactive protein
5,44707-8,Albumin/Creatinine
6,1988-5,C reactive protein
7,45062-7,C reactive protein
8,34535-5,Microalbumin/Creatinine ratio panel
9,71426-1,C reactive protein


In [113]:
%%read_sql

create or replace temporary table st_ath_ref5 as

select
    'dx' as cat1,
    'no_cvd' cat2,
    'lab_val' as cat3,
    null as cat4,
    null as cat5,
    null as cat6,
    'loinc' as type,
    cast(loinc_num as varchar(20))value,
    RELATEDNAMES2 as description,
    current_date() as active_date,
    'Swar' as active_reason,
    null as deactive_date,
    null as deactive_reason,
    'RWD_DB.RWD.PELICAN_E_LOINC' as source
from 
    RWD_DB.RWD.PELICAN_E_LOINC
where
  component ilike '%C reactive protein%'
    or component ilike '%albumin/creatinine%'

Query started at 10:16:41 PM India Standard Time; Query executed in 0.11 m

Unnamed: 0,status
0,Table ST_ATH_REF5 successfully created.


## Category 3: High CVD risk

### SNOMED: Smoking codes

In [118]:
%%read_sql

create or replace temporary table st_ath_ref6 as

select
    'dx' as cat1,
    'high_cvd' cat2,
    'smoke' as cat3,
    null as cat4,
    null as cat5,
    null as cat6,
    'snomed' as type,
    cast(smoke_id as varchar(30))value, 
    description as description,
    current_date() as active_date,
    'Swar' as active_reason,
    null as deactive_date,
    null as deactive_reason,
    'RWD_DB.RWD.PELICAN_SMOKE' as source
from 
    RWD_DB.RWD.PELICAN_SMOKE
where
  description not ilike '%snuff%'
    or description not ilike '%former smoker%'
    or description not ilike '%ex-smoker%'
    or description not ilike '%non-smoker%'

Query started at 10:19:45 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,status
0,Table ST_ATH_REF6 successfully created.


### Loinc codes
- hs-CRP >2.0
- Impaired renal function
- Coronary calcium score >300 Agatston units (AU)

In [21]:
%%read_sql
    select
        loinc_num,
        component

    from RWD_DB.RWD.PELICAN_E_LOINC
    
    where component ilike '%Glomerular filtration rate/1.73 sq M%'
        --or component ilike '%coronary%'
       -- or loinc_num in '%83289%'

Query started at 05:44:15 PM India Standard Time; Query executed in 0.04 m

Unnamed: 0,loinc_num,component
0,50210-4,Glomerular filtration rate/1.73 sq M.predicted
1,65862-5,Recurring coronary heart disease 2Y risk
2,50384-7,Glomerular filtration rate/1.73 sq M.predicted
3,58331-0,Decedent's cause of death due to coronary hear...
4,65850-0,Hard coronary heart disease 10Y risk
5,67429-1,Have you ever had coronary artery catheterization
6,33914-3,Glomerular filtration rate/1.73 sq M.predicted
7,65858-3,Coronary event 10Y risk
8,58322-9,Coronary insufficiency in interim
9,69405-9,Glomerular filtration rate/1.73 sq M.predicted


In [115]:
%%read_sql

create or replace temporary table st_ath_ref7 as

select
    'dx' as cat1,
    'high_cvd' cat2,
    'lab_val' as cat3,
    null as cat4,
    null as cat5,
    null as cat6,
    'loinc' as type,
    cast(loinc_num as varchar(20))value,
    RELATEDNAMES2 as description,
    current_date() as active_date,
    'Swar' as active_reason,
    null as deactive_date,
    null as deactive_reason,
    'RWD_DB.RWD.PELICAN_E_LOINC' as source
from 
    RWD_DB.RWD.PELICAN_E_LOINC
where
  component ilike '%C reactive protein%'
    or component ilike '%Glomerular filtration rate/1.73 sq M%'

Query started at 10:17:21 PM India Standard Time; Query executed in 0.08 m

Unnamed: 0,status
0,Table ST_ATH_REF7 successfully created.


## Merging all the tables as reference table

In [119]:
%%read_sql df

select * from st_ath_ref1
union
select * from st_ath_ref2
union
select * from st_ath_ref3
union
select * from st_ath_ref4
union
select * from st_ath_ref5
union
select * from st_ath_ref6
union
select * from st_ath_ref7
union
select * from st_ath_ref8

Query started at 10:20:01 PM India Standard Time; Query executed in 0.14 m

Unnamed: 0,cat1,cat2,cat3,cat4,cat5,cat6,type,value,description,active_date,active_reason,deactive_date,deactive_reason,source
0,dx,cvd,ath,,,,snomed,253728007,Right dominant coronary system (disorder),2019-01-24,Swar,,,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
1,dx,cvd,ath,,,,snomed,232736004,Open operation for coronary artery fistula (pr...,2019-01-24,Swar,,,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
2,dx,cvd,ath,,,,snomed,399957001,Peripheral arterial occlusive disease (disorder),2019-01-24,Swar,,,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
3,dx,cvd,ath,,,,snomed,277198009,Atherosclerotic aneurysm (disorder),2019-01-24,Swar,,,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
4,dx,cvd,ath,,,,snomed,698357006,Abdominal aortic aneurysm screening declined (...,2019-01-24,Swar,,,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
5,dx,cvd,ath,,,,snomed,175012009,Other specified saphenous vein graft replaceme...,2019-01-24,Swar,,,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
6,dx,cvd,ath,,,,snomed,232737008,Embolization of coronary artery fistula (proce...,2019-01-24,Swar,,,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
7,dx,cvd,ath,,,,snomed,266260007,Other peripheral vascular disease (disorder),2019-01-24,Swar,,,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
8,dx,cvd,ath,,,,snomed,175030002,Prosthetic replacement of one coronary artery ...,2019-01-24,Swar,,,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
9,dx,cvd,ath,,,,snomed,88596007,Coronary artery arising from aorta (disorder),2019-01-24,Swar,,,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT


In [120]:
# Export to check, and then will inactivate codes that are not relevant and re-upload as final reference table
df.to_excel("out/allcodes_check.xlsx", index=False)

## Upload into snowflake

In [4]:
cvd_ref = pd.read_excel("in/allcodes_ref.xlsx")

In [5]:
snow.drop_table("st_codes_ref")

DROP TABLE IF EXISTS sandbox_analytics.sandbox.st_codes_ref;
Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
Table sandbox_analytics.sandbox.st_codes_ref dropped! (╯°□°）╯︵ ┻━┻


In [6]:
snow.upload_dataframe(cvd_ref, "st_codes_ref")

Upload into sandbox_analytics.sandbox.st_codes_ref successful! ┬──┬◡ﾉ(°-°ﾉ)


In [7]:
%%read_sql

select *
from st_codes_ref
where deactive_date is not null

Query started at 09:16:10 AM India Standard Time; Query executed in 0.08 m

Unnamed: 0,cat1,cat2,cat3,cat4,cat5,cat6,type,value,description,active_date,active_reason,deactive_date,deactive_reason,source
0,dx,no_cvd,dia,,,,icd_10,O24819,Other pre-existing diabetes mellitus in pregna...,2019-01-24,Swar,2019-01-24,not applicable,RWD_DB.RWD.ICD_GROUPER
1,dx,no_cvd,dia,,,,snomed,472969004,History of diabetes mellitus type 2 (situation),2019-01-24,Swar,2019-01-24,not applicable,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
2,dx,no_cvd,dia,,,,snomed,430679000,Family history of diabetes mellitus type 2 (si...,2019-01-24,Swar,2019-01-24,not applicable,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
3,dx,no_cvd,dia,,,,snomed,420662003,Coma associated with diabetes mellitus (disorder),2019-01-24,Swar,2019-01-24,not applicable,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
4,dx,no_cvd,dia,,,,snomed,199225007,Diabetes mellitus during pregnancy - baby deli...,2019-01-24,Swar,2019-01-24,not applicable,RWD_DB.RWD.PELICAN_E_SNOMED_CONCEPT
5,dx,no_cvd,dia,,,,icd_10,O243,Unspecified pre-existing diabetes mellitus in ...,2019-01-24,Swar,2019-01-24,not applicable,RWD_DB.RWD.ICD_GROUPER
6,dx,no_cvd,dia,,,,icd_10,O240,Pre-existing type 1 diabetes mellitus in pregn...,2019-01-24,Swar,2019-01-24,not applicable,RWD_DB.RWD.ICD_GROUPER
7,dx,cvd,ath,,,,icd_10,T463X5S,Adverse effect of coronary vasodilators sequela,2019-01-24,Swar,2019-01-24,not applicable,RWD_DB.RWD.ICD_GROUPER
8,dx,no_cvd,dia,,,,icd_10,E09618,Drug or chemical induced diabetes mellitus wit...,2019-01-24,Swar,2019-01-24,not applicable,RWD_DB.RWD.ICD_GROUPER
9,dx,no_cvd,dia,,,,icd_10,E09630,Drug or chemical induced diabetes mellitus wit...,2019-01-24,Swar,2019-01-24,not applicable,RWD_DB.RWD.ICD_GROUPER


# Cohorts with CVD categories

## Cat 1: ath CVD

### ICD 10

In [7]:
%%read_sql

select * from RWD_DB.RWD.PELICAN_DIAGNOSIS_ICD10 limit 3

Query started at 02:10:09 PM India Standard Time; Query executed in 0.13 m

Unnamed: 0,diagnosis_id,icd10,etl_icd10
0,8315548696366771885,Z00.129,Z00129
1,544951539877855949,K21.9,K219
2,-2694686121832699510,Z00.121,Z00121


In [15]:
%%read_sql

create or replace table st_cvd_diag1 as

select *
from
    RWD_DB.RWD.PELICAN_DIAGNOSIS_ICD10
where
    icd10 in (select cast(value as varchar(30))value from st_codes_ref where cat2 = 'cvd' and deactive_date is null)

Query started at 02:30:14 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,status
0,Table ST_CVD_DIAG1 successfully created.


In [16]:
%%read_sql

create or replace table st_cvd_diag2 as

select *
from
    RWD_DB.RWD.PELICAN_DIAGNOSIS
where
    diagnosis_id in (select diagnosis_id from st_cvd_diag1)

Query started at 02:30:23 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,status
0,Table ST_CVD_DIAG2 successfully created.


In [17]:
snow.select("select count(distinct patient_id) from st_cvd_diag2")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,0


In [11]:
%%read_sql

create or replace table st_cvd_diag3 as

select *
from
    RWD_DB.RWD.PELICAN_TRANSCRIPT_DIAGNOSIS
where
    diagnosis_id in (select diagnosis_id from st_cvd_diag1)

Query started at 02:26:16 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,status
0,Table ST_CVD_DIAG3 successfully created.


In [12]:
%%read_sql

create or replace table st_cvd_diag4 as

select *
from
    RWD_DB.RWD.PELICAN_TRANSCRIPT
where
    transcript_id in (select transcript_id from st_cvd_diag3)

Query started at 02:27:56 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,status
0,Table ST_CVD_DIAG4 successfully created.


In [13]:
snow.select("select count(distinct patient_id) from st_cvd_diag4")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,0


### SNOMED

In [18]:
%%read_sql

select * from RWD_DB.RWD.PELICAN_DIAGNOSIS_SNOMED limit 3

Query started at 02:33:42 PM India Standard Time; Query executed in 0.12 m

Unnamed: 0,diagnosis_id,concept_id,source
0,-1647543161824374421,81546003,original
1,-1647543161824374421,8771003,original
2,4529733700115316151,249489001,original


In [125]:
%%read_sql

create or replace table st_cvd_diag1 as

select diagnosis_id,
       cast(concept_id as varchar(30))concept_id
from
    RWD_DB.RWD.PELICAN_DIAGNOSIS_SNOMED
where
    cast(concept_id as varchar(30)) in (select cast(value as varchar(30)) from st_codes_ref where 
                                        cat2 = 'cvd' and deactive_date is null)

Query started at 11:57:57 PM India Standard Time; Query executed in 0.16 m

Unnamed: 0,status
0,Table ST_CVD_DIAG1 successfully created.


In [30]:
snow.select("select * from st_cvd_diag1")

Unnamed: 0,diagnosis_id,concept_id
0,-7574159237769430888,443502000
1,3921892902994187507,53741008
2,-1011263075015495000,53741008
3,-3391909984665047065,401314000
4,-6055355806616496491,400047006
5,-3587195722699523912,422504002
6,-2753078344199855221,53741008
7,-4410432161346474011,442421004
8,-4225113749115039237,400047006
9,-7487005630462581621,1755008


In [126]:
%%read_sql

create or replace table st_cvd_diag2 as

select *
from
    RWD_DB.RWD.PELICAN_DIAGNOSIS
where
    diagnosis_id in (select diagnosis_id from st_cvd_diag1)

Query started at 11:58:23 PM India Standard Time; Query executed in 0.18 m

Unnamed: 0,status
0,Table ST_CVD_DIAG2 successfully created.


In [127]:
snow.select("select count(distinct patient_id) from st_cvd_diag2")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,1714816


In [37]:
snow.select("select * from st_cvd_diag2 limit 3")

Unnamed: 0,diagnosis_id,patient_id,provider_id,icd9,is_active,start_date,start_date_source,stop_date,last_modified,created_at,etl_icd9
0,-2970930843487979504,F348B95E-9B9E-3F4A-12B2-9E81E4DD46C5,16DF30D3-3ADA-9E0D-EB48-F40B2EDAC1B9,414.00,True,2016-03-23,original,2016-03-23,2016-03-23,2016-03-23,41400
1,15860238010725301,EA403ACC-D11D-9748-705F-804DB548F939,D6769763-4176-190A-A31C-9FFA52E3B715,443.89,True,2015-11-16,original,2015-11-16,2015-11-17,2015-11-17,44389
2,-8471554834044165081,5B311FAA-35D6-E7E9-F90F-AEFC77989077,CCB09F93-1BC0-C3A0-9500-0A588A12A38F,V45.82,True,2012-06-09,transcript,,2012-06-10,2012-06-10,V4582


In [128]:
%%read_sql

create or replace table st_cvd_diag3 as

select *
from
    RWD_DB.RWD.PELICAN_TRANSCRIPT_DIAGNOSIS
where
    diagnosis_id in (select diagnosis_id from st_cvd_diag1)

Query started at 11:58:47 PM India Standard Time; Query executed in 0.22 m

Unnamed: 0,status
0,Table ST_CVD_DIAG3 successfully created.


In [129]:
%%read_sql

create or replace table st_cvd_diag4 as

select *
from
    RWD_DB.RWD.PELICAN_TRANSCRIPT
where
    transcript_id in (select transcript_id from st_cvd_diag3)

Query started at 11:59:05 PM India Standard Time; Query executed in 0.33 m

Unnamed: 0,status
0,Table ST_CVD_DIAG4 successfully created.


In [130]:
snow.select("select count(distinct patient_id) from st_cvd_diag4")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,1600421


In [36]:
snow.select("select * from st_cvd_diag4 limit 3")

Unnamed: 0,transcript_id,patient_id,provider_id,signed_by_provider_id,signed_by_time,dos,weight,height,c_bmi,systolic_bp,diastolic_bp,pulse,resp_rate,note_type,last_modified
0,1531009268484430831,776F0B2D-7EA5-0457-AA14-3E9A3F657534,451BBD5D-4944-2F2C-9F87-8919628ADEFF,451BBD5D-4944-2F2C-9F87-8919628ADEFF,2018-06-21,2018-06-21,170.0,64.0,29,110,80,73.0,,SOAP Note,2018-06-21
1,-8481172094830778118,9AE9F690-1EBC-8E4A-D97F-A50A25D9B9F9,12DE6A8C-0CA5-6A4A-6ADB-0BEFBB2FEA56,12DE6A8C-0CA5-6A4A-6ADB-0BEFBB2FEA56,2013-12-24,2013-12-24,190.0,66.0,31,140,70,76.0,0.0,SOAP Note,2013-12-24
2,-1685019848704111383,B4E46E4B-B454-1294-FEE3-75C922DCDA79,13DC8834-BA9E-F4B8-25EA-6A3DAFCC1F6C,13DC8834-BA9E-F4B8-25EA-6A3DAFCC1F6C,2013-01-19,2013-01-19,170.0,64.0,29,130,78,66.0,12.0,SOAP Note,2013-01-19


### Observational cohort

In [131]:
%%read_sql

create or replace table st_cvd_cohort as

select diagnosis_id,
       patient_id
    from st_cvd_diag2
    
union

select transcript_id as diagnosis_id,
        patient_id
    from st_cvd_diag4

Query started at 11:59:39 PM India Standard Time; Query executed in 0.15 m

Unnamed: 0,status
0,Table ST_CVD_COHORT successfully created.


In [71]:
snow.select("select count(distinct patient_id) from st_cvd_cohort")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,1714817


## Cat 2: Diabetes, no ath CVD

### Diabetes cohort based on ICD codes

In [39]:
%%read_sql

create or replace table st_dia_diag1 as

select *
from
    RWD_DB.RWD.PELICAN_DIAGNOSIS_ICD10
where
    cast(icd10 as varchar(30)) in (select cast(value as varchar(30))value from st_codes_ref 
                                   where cat3 = 'dia' and deactive_date is null)

Query started at 04:56:40 PM India Standard Time; Query executed in 0.19 m

Unnamed: 0,status
0,Table ST_DIA_DIAG1 successfully created.


In [40]:
%%read_sql

create or replace table st_dia_diag2 as

select *
from
    RWD_DB.RWD.PELICAN_DIAGNOSIS
where
    diagnosis_id in (select diagnosis_id from st_dia_diag1)

Query started at 05:01:26 PM India Standard Time; Query executed in 0.22 m

Unnamed: 0,status
0,Table ST_DIA_DIAG2 successfully created.


In [41]:
snow.select("select count(distinct patient_id) from st_dia_diag2")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,25


In [42]:
%%read_sql

create or replace table st_dia_diag3 as

select *
from
    RWD_DB.RWD.PELICAN_TRANSCRIPT_DIAGNOSIS
where
    diagnosis_id in (select diagnosis_id from st_dia_diag1)

Query started at 05:03:21 PM India Standard Time; Query executed in 0.19 m

Unnamed: 0,status
0,Table ST_DIA_DIAG3 successfully created.


In [43]:
%%read_sql

create or replace table st_dia_diag4 as

select *
from
    RWD_DB.RWD.PELICAN_TRANSCRIPT
where
    transcript_id in (select transcript_id from st_dia_diag3)

Query started at 05:04:36 PM India Standard Time; Query executed in 0.18 m

Unnamed: 0,status
0,Table ST_DIA_DIAG4 successfully created.


In [44]:
snow.select("select count(distinct patient_id) from st_dia_diag4")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,23


### Diabetes cohort based on SNOMED codes

In [61]:
%%read_sql

create or replace table st_dia_diag1 as

select diagnosis_id,
       cast(concept_id as varchar(30))concept_id
from
    RWD_DB.RWD.PELICAN_DIAGNOSIS_SNOMED
where
    cast(concept_id as varchar(30)) in (select cast(value as varchar(30)) from st_codes_ref where 
                                        cat2 = 'no_cvd' and cat3 = 'dia' and deactive_date is null)

Query started at 06:13:45 PM India Standard Time; Query executed in 0.19 m

Unnamed: 0,status
0,Table ST_DIA_DIAG1 successfully created.


In [63]:
snow.select("select * from st_dia_diag1 limit 20")

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...


Unnamed: 0,diagnosis_id,concept_id
0,3460183952550197985,421326000
1,2750833049353030557,313436004
2,1337704167552891567,73211009
3,5848262600154338474,443694000
4,5848262600154338474,44054006
5,-7128667845831263408,313436004
6,-2862032206313971798,313436004
7,7585664655965883130,313436004
8,-6820747276203137593,313436004
9,8769031058684315031,313436004


In [64]:
%%read_sql

create or replace table st_dia_diag2 as

select *
from
    RWD_DB.RWD.PELICAN_DIAGNOSIS
where
    diagnosis_id in (select diagnosis_id from st_dia_diag1)

Query started at 06:18:27 PM India Standard Time; Query executed in 0.15 m

Unnamed: 0,status
0,Table ST_DIA_DIAG2 successfully created.


In [65]:
snow.select("select count(distinct patient_id) from st_dia_diag2")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,4205579


In [66]:
%%read_sql

create or replace table st_dia_diag3 as

select *
from
    RWD_DB.RWD.PELICAN_TRANSCRIPT_DIAGNOSIS
where
    diagnosis_id in (select diagnosis_id from st_dia_diag1)

Query started at 06:25:14 PM India Standard Time; Query executed in 0.16 m

Unnamed: 0,status
0,Table ST_DIA_DIAG3 successfully created.


In [14]:
%%read_sql

create or replace table st_dia_diag4 as

select *
from
    RWD_DB.RWD.PELICAN_TRANSCRIPT
where
    transcript_id in (select transcript_id from st_dia_diag3)
    --and 
    --(systolic_bp > 140 or diastolic_bp > 90)

Query started at 10:01:45 AM India Standard Time; Query executed in 0.45 m

Unnamed: 0,status
0,Table ST_DIA_DIAG4 successfully created.


In [15]:
snow.select("select count(distinct patient_id) from st_dia_diag4")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,3966086


### Sub-cohort 1 (diabetes + blood pressure)

In [52]:
%%read_sql

create or replace table st_dia_cohort1 as

select *
from
    RWD_DB.RWD.PELICAN_TRANSCRIPT
where
    transcript_id in (select transcript_id from st_dia_diag3)
    and 
    (systolic_bp > 140 or diastolic_bp > 90)
    and
    (patient_id not in (select patient_id from st_cvd_cohort))

Query started at 12:51:40 PM India Standard Time; Query executed in 0.54 m

Unnamed: 0,status
0,Table ST_DIA_COHORT1 successfully created.


### Loinc codes

In [41]:
%%read_sql

create or replace table st_dia_loinc as

select
    patient_id,
    loinc_num,
    result_status,
    report_date,
    obs_quan,
    obs_qual,
    unit

from
    RWD_DB.RWD.PELICAN_LABORDER
where
    cast(loinc_num as varchar(30)) in (select cast(value as varchar(30)) from st_codes_ref where 
                                        cat2 = 'no_cvd' and cat3 = 'lab_val' and deactive_date is null)
    and ((obs_quan > 0.2 and unit ilike 'mg/dL')
        or (obs_quan > 0.2 and unit ilike 'mg/L')
        or (obs_quan > 19.05 and unit ilike 'nmol/L')
        or (obs_quan > 30 and unit ilike 'mg/g')
        or (obs_quan > 30 and unit ilike 'mcg/mg'))

Query started at 11:59:51 AM India Standard Time; Query executed in 0.14 m

Unnamed: 0,status
0,Table ST_DIA_LOINC successfully created.


In [81]:
snow.select("select count(distinct patient_id) from st_dia_loinc")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,826815


In [79]:
%%read_sql

select * from st_dia_loinc limit 20

Query started at 07:58:33 PM India Standard Time; Query executed in 0.05 m

Unnamed: 0,patient_id,loinc_num,result_status,report_date,obs_quan,obs_qual,unit
0,5ED9D18E-E198-7EA2-377B-CEA716C3707E,9318-7,Final,2017-06-19,14.0,,mcg/mg creat
1,5ED9D18E-E198-7EA2-377B-CEA716C3707E,9318-7,Final,2018-01-08,9.0,,mcg/mg creat
2,92EAFCD3-900A-FBB5-3988-18F3724AA6A7,1988-5,Final,2018-06-07,3.6,,mg/L
3,92EAFCD3-900A-FBB5-3988-18F3724AA6A7,1988-5,Final,2017-09-09,0.3,,mg/dL
4,92EAFCD3-900A-FBB5-3988-18F3724AA6A7,1988-5,Final,2017-02-18,0.25,,mg/dL
5,92EAFCD3-900A-FBB5-3988-18F3724AA6A7,1988-5,Final,2016-12-16,0.38,,mg/dL
6,92EAFCD3-900A-FBB5-3988-18F3724AA6A7,1988-5,Final,2017-07-11,0.2,,mg/dL
7,FF57747B-CB62-8E60-B501-7BFCCBE17CE3,30522-7,Final,2014-04-20,0.7,,mg/L
8,A1A310BE-8716-B099-636F-20D04F4297CC,1988-5,Final,2015-06-27,2.1,,mg/L
9,4457AF5D-D9C0-B9A3-10F7-9F1DDE75B47D,9318-7,Final,2016-02-25,23.0,,mcg/mg creat


### Sub-cohort 2: Diabetes + lab value

In [53]:
%%read_sql

create or replace table st_dia_cohort2 as

select * from
    st_dia_diag2
where
    patient_id in (select patient_id from st_dia_loinc)
    and (patient_id not in (select patient_id from st_cvd_cohort))

Query started at 12:53:03 PM India Standard Time; Query executed in 0.12 m

Unnamed: 0,status
0,Table ST_DIA_COHORT2 successfully created.


In [19]:
snow.select("select count(distinct patient_id) from st_dia_cohort2")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,107590


In [27]:
%%read_sql

select * from st_dia_cohort1 limit 3

Query started at 11:10:38 AM India Standard Time; Query executed in 0.19 m

Unnamed: 0,transcript_id,patient_id,provider_id,signed_by_provider_id,signed_by_time,dos,weight,height,c_bmi,systolic_bp,diastolic_bp,pulse,resp_rate,note_type,last_modified
0,-4864588877273464414,86E10B40-39E4-619E-329A-E0A0236883EE,8FB3D5A1-B3F6-55EF-D900-B77E1D79F60B,36EECDFF-5CDA-AE62-CC28-87D430A108CE,2013-12-20,2013-11-18,195.0,66.0,31.0,122,106,0.0,0.0,SOAP Note,2013-12-20
1,-1197041853241163279,57D30B50-BF75-BC0F-AE3B-99B3C79BCE3C,D751ECA1-93B7-ED93-8410-7639EF3F284D,,,2017-01-15,,,,158,75,72.0,17.0,SOAP Note,2017-01-22
2,6669040148222545556,57D30B50-BF75-BC0F-AE3B-99B3C79BCE3C,D751ECA1-93B7-ED93-8410-7639EF3F284D,,,2018-03-27,,66.0,,193,101,109.0,18.0,SOAP Note,2018-03-27


In [28]:
%%read_sql

select * from st_dia_cohort2 limit 3

Query started at 11:11:37 AM India Standard Time; Query executed in 0.11 m

Unnamed: 0,diagnosis_id,patient_id,provider_id,icd9,is_active,start_date,start_date_source,stop_date,last_modified,created_at,etl_icd9
0,-6624373114744605041,E30E4C75-8015-68DD-E7CD-8A62D0F437EF,041B7F54-AEB1-6FDC-0A5A-E46D0F669E28,250.8,True,2013-05-28,transcript,,2013-05-28,2013-05-28,25080
1,5486820691430820559,6027EF8E-02A0-053D-8B68-BCAAE5DBC4C2,F426FF34-DDAD-210B-CD7B-AF0025533BB7,250.0,True,2011-09-08,transcript,,2016-01-23,2011-09-08,25000
2,-2655319020650824106,6027EF8E-02A0-053D-8B68-BCAAE5DBC4C2,F426FF34-DDAD-210B-CD7B-AF0025533BB7,250.8,True,2016-01-23,transcript,,2016-01-23,2016-01-23,25080


In [54]:
%%read_sql

create or replace table st_dia_cohort3 as

select * from
    st_dia_diag4
where
    patient_id in (select patient_id from st_dia_loinc)
    and (patient_id not in (select patient_id from st_cvd_cohort))

Query started at 12:54:04 PM India Standard Time; Query executed in 0.12 m

Unnamed: 0,status
0,Table ST_DIA_COHORT3 successfully created.


In [55]:
snow.select("select count(distinct patient_id) from st_dia_cohort3")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,83986


In [29]:
%%read_sql

select * from st_dia_cohort3 limit 3

Query started at 11:16:14 AM India Standard Time; Query executed in 0.13 m

Unnamed: 0,transcript_id,patient_id,provider_id,signed_by_provider_id,signed_by_time,dos,weight,height,c_bmi,systolic_bp,diastolic_bp,pulse,resp_rate,note_type,last_modified
0,-4498820566562074959,9BCBE199-6BB4-3AF1-74A4-2F8FFBEBB28F,6E258E21-319F-6F6E-C34F-17720570402E,6E258E21-319F-6F6E-C34F-17720570402E,2017-05-20,2017-05-20,,,,,,,,SOAP Note,2017-05-20
1,-2631246346071385496,9BCBE199-6BB4-3AF1-74A4-2F8FFBEBB28F,6B6DA081-EC26-BFB3-124A-D2BD19F3F1A3,6B6DA081-EC26-BFB3-124A-D2BD19F3F1A3,2017-11-16,2017-11-16,,,,,,,,SOAP Note,2017-11-16
2,-4812418967974932013,C73EF1D2-1CC9-5392-3759-1794AF31380D,424D4EDE-4660-78BF-6EA1-6BE65CE1748E,424D4EDE-4660-78BF-6EA1-6BE65CE1748E,2016-12-12,2016-12-09,215.0,62.0,39.0,148.0,84.0,85.0,,SOAP Note,2016-12-12


### Smoking table

In [82]:
%%read_sql

select * from RWD_DB.RWD.PELICAN_SMOKE

Query started at 08:19:03 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,smoke_id,concept_id,description
0,16,77176002,"Smoker, current status unknown"
1,4,428071000124103,Up to 1 pack per day
2,3,449868002,Few (1-3) cigarettes per day
3,18,428061000124105,Light tobacco smoker
4,26,160606002,Very heavy cigarette smoker (40+ cigs/day)
5,7,77176002,Current status unknown
6,2,8517006,0 cigarettes per day (previous smoker)
7,21,228503001,Ex-user of moist powdered tobacco
8,27,59978006,Cigar smoker
9,9,77176002,Current tobacco user


In [56]:
%%read_sql

create or replace table st_dia_smoke as

select patient_smoke_id,
       patient_id,
       smoke_id

from
    RWD_DB.RWD.PELICAN_PATIENT_SMOKE
where
    cast(smoke_id as varchar(30)) in (select cast(value as varchar(30)) from st_codes_ref where 
                                        cat2 = 'no_cvd' and cat3 = 'smoke' 
                                          and deactive_date is null)
    and (patient_id not in (select patient_id from st_cvd_cohort))

Query started at 12:55:41 PM India Standard Time; Query executed in 0.11 m

Unnamed: 0,status
0,Table ST_DIA_SMOKE successfully created.


In [57]:
snow.select("select count(distinct patient_id) from st_dia_smoke")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,3961863


In [58]:
%%read_sql

create or replace table st_dia_smoke1 as

select * from st_dia_diag2

where
    patient_id in (select patient_id from st_dia_smoke)

Query started at 12:56:02 PM India Standard Time; Query executed in 0.10 m

Unnamed: 0,status
0,Table ST_DIA_SMOKE1 successfully created.


In [59]:
snow.select("select count(distinct patient_id) from st_dia_smoke1")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,338968


In [24]:
%%read_sql
select * from st_dia_smoke1 limit 3

Query started at 11:04:30 AM India Standard Time; Query executed in 0.12 m

Unnamed: 0,diagnosis_id,patient_id,provider_id,icd9,is_active,start_date,start_date_source,stop_date,last_modified,created_at,etl_icd9
0,-1576285550060578045,5C0EA088-6A2B-24BC-C1E4-9E2567488801,14C6C22E-F049-34FF-5900-F6B4B2FA872C,250.0,True,2012-10-12,transcript,,2012-10-12,2012-10-12,25000
1,288664481700599175,187D6E1D-A7A5-141F-45EF-75522CA1862D,A20BD26A-BF9B-D526-6E4D-8A1A268D412D,250.0,True,2012-10-22,transcript,,2016-04-01,2012-10-22,25000
2,-7928416350782677876,D3D268D1-7E46-34A5-43F1-9DE3ED6011AF,E070229E-81C6-1D7E-647E-BA98DCBD0C6A,250.0,True,2018-07-25,original,2018-07-25,2018-07-25,2018-07-25,25000


In [60]:
%%read_sql

create or replace table st_dia_smoke2 as

select * from st_dia_diag4

where
    patient_id in (select patient_id from st_dia_smoke)

Query started at 12:56:21 PM India Standard Time; Query executed in 0.10 m

Unnamed: 0,status
0,Table ST_DIA_SMOKE2 successfully created.


In [61]:
snow.select("select count(distinct patient_id) from st_dia_smoke2")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,325926


In [25]:
%%read_sql
select * from st_dia_smoke2 limit 3

Query started at 11:05:11 AM India Standard Time; Query executed in 0.11 m

Unnamed: 0,transcript_id,patient_id,provider_id,signed_by_provider_id,signed_by_time,dos,weight,height,c_bmi,systolic_bp,diastolic_bp,pulse,resp_rate,note_type,last_modified
0,-5014413004344582548,3179BB03-BF5A-0F24-C4E5-26C42EEC5531,80EF63AF-4863-C554-1142-AFFFE528DE38,80EF63AF-4863-C554-1142-AFFFE528DE38,2016-10-26,2016-10-26,230.0,70.0,33,158,86,,,SOAP Note,2016-10-26
1,2023615745903531501,6137B4DB-E549-08A8-EB22-B49365077A42,FE06A28B-C449-7478-6E38-2E12B6D5A6C3,FE06A28B-C449-7478-6E38-2E12B6D5A6C3,2013-12-31,2013-12-31,205.0,64.0,35,148,76,58.0,18.0,SOAP Note,2013-12-31
2,-283790780855990229,30EFDC99-C634-06E5-9811-527024D8B9AA,6EF5CD8E-353F-F031-4CC1-97A1C06CCB1D,6EF5CD8E-353F-F031-4CC1-97A1C06CCB1D,2016-06-21,2016-06-19,220.0,70.0,32,149,91,90.0,20.0,SOAP Note,2016-06-21


#### Final dia-smoke cohort

In [62]:
%%read_sql

create or replace table st_dia_cohort4 as
 
 select patient_id, diagnosis_id from st_dia_smoke1
 union
 select patient_id, transcript_id as diagnosis_id from st_dia_smoke2;

Query started at 12:57:08 PM India Standard Time; Query executed in 0.12 m

Unnamed: 0,status
0,Table ST_DIA_COHORT4 successfully created.


### Observational cohort

Cohort 1: st_dia_diag4 which includes patients with diabetes diagnosis and blood pressure levels
Cohort 2: all diagnosed diabetes patients with any one condition


In [63]:
%%read_sql

create or replace table st_dia_cohort as

select transcript_id as diagnosis_id, 
        patient_id
    from st_dia_cohort1
    
union

select diagnosis_id,
        patient_id
    from st_dia_cohort2

union

select transcript_id as diagnosis_id, 
        patient_id
    from st_dia_cohort3
    
union

select diagnosis_id, 
        patient_id
    from st_dia_cohort4

Query started at 12:57:22 PM India Standard Time; Query executed in 0.18 m

Unnamed: 0,status
0,Table ST_DIA_COHORT successfully created.


In [64]:
snow.select("select count(distinct patient_id) from st_dia_cohort")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,1461989


## Cat 3: High CVD

### Loinc

In [43]:
%%read_sql

create or replace table st_high_cvd_loinc as

select
    patient_id,
    loinc_num,
    result_status,
    report_date,
    obs_quan,
    obs_qual,
    unit

from
    RWD_DB.RWD.PELICAN_LABORDER
where
    cast(loinc_num as varchar(30)) in (select cast(value as varchar(30)) from st_codes_ref where 
                                        cat2 = 'high_cvd' and cat3 = 'lab_val' and deactive_date is null)
    and ((obs_quan > 0.2 and unit ilike 'mg/dL')
        or (obs_quan > 0.2 and unit ilike 'mg/L')
        or (obs_quan > 19.05 and unit ilike 'nmol/L')
        or (obs_quan < 45 and unit ilike 'mL/min per 1.73 m2'))

Query started at 12:09:32 PM India Standard Time; Query executed in 0.20 m

Unnamed: 0,status
0,Table ST_HIGH_CVD_LOINC successfully created.


In [65]:
%%read_sql

create or replace table st_high_cvd_cohort1 as

select * from st_high_cvd_loinc

where
    patient_id not in (select patient_id from st_dia_cohort)
    or
    patient_id not in (select patient_id from st_cvd_cohort)

Query started at 12:58:09 PM India Standard Time; Query executed in 0.10 m

Unnamed: 0,status
0,Table ST_HIGH_CVD_COHORT1 successfully created.


In [66]:
snow.select("select count(distinct patient_id) from st_high_cvd_cohort1")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,812279


### Smoke table

In [67]:
%%read_sql

create or replace table st_high_cvd_smoke as

select patient_id,
       smoke_id

from
    RWD_DB.RWD.PELICAN_PATIENT_SMOKE
where
    cast(smoke_id as varchar(30)) in (select cast(value as varchar(30)) from st_codes_ref where 
                                        cat2 = 'high_cvd' and cat3 = 'smoke' 
                                          and deactive_date is null)
    and 
    (patient_id not in (select patient_id from st_dia_cohort)
    or
    patient_id not in (select patient_id from st_cvd_cohort))

Query started at 12:58:43 PM India Standard Time; Query executed in 0.14 m

Unnamed: 0,status
0,Table ST_HIGH_CVD_SMOKE successfully created.


In [68]:
snow.select("select count(distinct patient_id) from st_high_cvd_smoke")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,4146428


### Final cohort

In [69]:
%%read_sql

create or replace table st_high_cvd_cohort as

select cast(loinc_num as varchar(30)) as diagnosis_id, 
        patient_id
    from st_high_cvd_cohort1
    
union

select cast(smoke_id as varchar(30)) as diagnosis_id,
        patient_id
    from st_high_cvd_smoke;

Query started at 01:00:27 PM India Standard Time; Query executed in 0.12 m

Unnamed: 0,status
0,Table ST_HIGH_CVD_COHORT successfully created.


In [70]:
snow.select("select count(distinct patient_id) from st_high_cvd_cohort")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,4889685


# Tg_HDL levels with CVD groupings

## Category 1: ath CVD

### Tg levels </> 150

In [73]:
%%read_sql

create or replace table st_tg_cvd as

select *

from
    st_tg_hdl_agg1
where
    patient_id in (select patient_id from st_cvd_cohort)

Query started at 02:23:10 PM India Standard Time; Query executed in 0.17 m

Unnamed: 0,status
0,Table ST_TG_CVD successfully created.


In [74]:
%%read_sql

select  tg_level,
        hdl_level,
        count(distinct patient_id)
        
        from st_tg_cvd
        
        --where unit ilike '%mg/dL%'
           --and hdl_unit ilike '%mg/dL%'
            
        group by tg_level, hdl_level
        --order by tg_level  

Query started at 02:23:55 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,tg_level,hdl_level,COUNT(DISTINCT PATIENT_ID)
0,,hdl_lt40,132
1,tg_gt150,hdl_gt40,117989
2,tg_lt150,hdl_lt40,78728
3,,hdl_gt40,289
4,tg_gt150,hdl_lt40,91434
5,tg_gt150,,186
6,tg_lt150,hdl_gt40,248175
7,tg_lt150,,234
8,,,3169


### Tg levels >180

In [75]:
%%read_sql

create or replace table st_tg_cvd1 as

select *

from
    st_tg_hdl_agg2
where
    patient_id in (select patient_id from st_cvd_cohort)

Query started at 02:31:09 PM India Standard Time; Query executed in 0.23 m

Unnamed: 0,status
0,Table ST_TG_CVD1 successfully created.


In [76]:
%%read_sql

select  tg_level,
        hdl_level,
        count(distinct patient_id)
        
        from st_tg_cvd1
        
        --where unit ilike '%mg/dL%'
           --and hdl_unit ilike '%mg/dL%'
            
        group by tg_level, hdl_level
        --order by tg_level  

Query started at 02:32:13 PM India Standard Time; Query executed in 0.10 m

Unnamed: 0,tg_level,hdl_level,COUNT(DISTINCT PATIENT_ID)
0,,hdl_lt40,97909
1,,hdl_gt40,271673
2,tg_gt180,,153
3,tg_gt180,hdl_gt40,77647
4,tg_gt180,hdl_lt40,72871
5,,,3295


### Tg levels >200

In [77]:
%%read_sql

create or replace table st_tg_cvd2 as

select *

from
    st_tg_hdl_agg3
where
    patient_id in (select patient_id from st_cvd_cohort)

Query started at 02:33:51 PM India Standard Time; Query executed in 0.16 m

Unnamed: 0,status
0,Table ST_TG_CVD2 successfully created.


In [78]:
%%read_sql

select  tg_level,
        hdl_level,
        count(distinct patient_id)
        
        from st_tg_cvd2
        
        --where unit ilike '%mg/dL%'
           --and hdl_unit ilike '%mg/dL%'
            
        group by tg_level, hdl_level
        --order by tg_level  

Query started at 02:34:07 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,tg_level,hdl_level,COUNT(DISTINCT PATIENT_ID)
0,,hdl_lt40,107221
1,tg_gt200,hdl_lt40,62126
2,,hdl_gt40,280940
3,tg_gt200,hdl_gt40,58367
4,tg_gt200,,137
5,,,3304


## Category 2: Dia, no ath CVD

### Tg levels </> 150

In [79]:
%%read_sql

create or replace table st_tg_dia1 as

select *

from
    st_tg_hdl_agg1
where
    patient_id in (select patient_id from st_dia_cohort)

Query started at 02:43:51 PM India Standard Time; Query executed in 0.18 m

Unnamed: 0,status
0,Table ST_TG_DIA1 successfully created.


In [80]:
%%read_sql

select  tg_level,
        hdl_level,
        count(distinct patient_id)
        
        from st_tg_dia1
        
        --where unit ilike '%mg/dL%'
           --and hdl_unit ilike '%mg/dL%'
            
        group by tg_level, hdl_level
        --order by tg_level 

Query started at 02:44:49 PM India Standard Time; Query executed in 0.12 m

Unnamed: 0,tg_level,hdl_level,COUNT(DISTINCT PATIENT_ID)
0,,hdl_lt40,204
1,tg_gt150,hdl_gt40,197688
2,tg_lt150,hdl_lt40,86069
3,,hdl_gt40,288
4,tg_gt150,hdl_lt40,151092
5,tg_gt150,,460
6,tg_lt150,hdl_gt40,288239
7,tg_lt150,,255
8,,,3932


### Tg levels >180

In [81]:
%%read_sql

create or replace table st_tg_dia2 as

select *

from
    st_tg_hdl_agg2
where
    patient_id in (select patient_id from st_dia_cohort)

Query started at 02:46:12 PM India Standard Time; Query executed in 0.21 m

Unnamed: 0,status
0,Table ST_TG_DIA2 successfully created.


In [82]:
%%read_sql

select  tg_level,
        hdl_level,
        count(distinct patient_id)
        
        from st_tg_dia2
        
        --where unit ilike '%mg/dL%'
           --and hdl_unit ilike '%mg/dL%'
            
        group by tg_level, hdl_level
        --order by tg_level 

Query started at 02:46:34 PM India Standard Time; Query executed in 0.11 m

Unnamed: 0,tg_level,hdl_level,COUNT(DISTINCT PATIENT_ID)
0,,hdl_lt40,114182
1,,hdl_gt40,327031
2,tg_gt180,,421
3,tg_gt180,hdl_gt40,141192
4,tg_gt180,hdl_lt40,128050
5,,,4066


### Tg levels >200

In [83]:
%%read_sql

create or replace table st_tg_dia3 as

select *

from
    st_tg_hdl_agg3
where
    patient_id in (select patient_id from st_dia_cohort)

Query started at 02:47:22 PM India Standard Time; Query executed in 0.14 m

Unnamed: 0,status
0,Table ST_TG_DIA3 successfully created.


In [84]:
%%read_sql

select  tg_level,
        hdl_level,
        count(distinct patient_id)
        
        from st_tg_dia3
        
        --where unit ilike '%mg/dL%'
           --and hdl_unit ilike '%mg/dL%'
            
        group by tg_level, hdl_level
        --order by tg_level 

Query started at 02:47:45 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,tg_level,hdl_level,COUNT(DISTINCT PATIENT_ID)
0,,hdl_lt40,129038
1,tg_gt200,hdl_lt40,113380
2,,hdl_gt40,343445
3,tg_gt200,hdl_gt40,111680
4,tg_gt200,,392
5,,,4078


## Category 3: High CVD

### Tg levels </> 150

In [85]:
%%read_sql

create or replace table st_tg_high_cvd1 as

select *

from
    st_tg_hdl_agg1
where
    patient_id in (select patient_id from st_high_cvd_cohort)

Query started at 02:50:06 PM India Standard Time; Query executed in 0.16 m

Unnamed: 0,status
0,Table ST_TG_HIGH_CVD1 successfully created.


In [86]:
%%read_sql

select  tg_level,
        hdl_level,
        count(distinct patient_id)
        
        from st_tg_high_cvd1
        
        --where unit ilike '%mg/dL%'
           --and hdl_unit ilike '%mg/dL%'
            
        group by tg_level, hdl_level
        --order by tg_level 

Query started at 02:50:41 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,tg_level,hdl_level,COUNT(DISTINCT PATIENT_ID)
0,,hdl_lt40,292
1,tg_gt150,hdl_gt40,328123
2,tg_lt150,hdl_lt40,156526
3,,hdl_gt40,783
4,tg_gt150,hdl_lt40,218180
5,tg_gt150,,589
6,tg_lt150,hdl_gt40,760192
7,tg_lt150,,824
8,,,7682


### Tg levels >180

In [87]:
%%read_sql

create or replace table st_tg_high_cvd2 as

select *

from
    st_tg_hdl_agg2
where
    patient_id in (select patient_id from st_high_cvd_cohort)

Query started at 02:51:26 PM India Standard Time; Query executed in 0.16 m

Unnamed: 0,status
0,Table ST_TG_HIGH_CVD2 successfully created.


In [88]:
%%read_sql

select  tg_level,
        hdl_level,
        count(distinct patient_id)
        
        from st_tg_high_cvd2
        
        --where unit ilike '%mg/dL%'
           --and hdl_unit ilike '%mg/dL%'
            
        group by tg_level, hdl_level
        --order by tg_level 

Query started at 02:51:45 PM India Standard Time; Query executed in 0.10 m

Unnamed: 0,tg_level,hdl_level,COUNT(DISTINCT PATIENT_ID)
0,,hdl_gt40,832941
1,,hdl_lt40,200542
2,tg_gt180,hdl_lt40,178707
3,tg_gt180,,504
4,tg_gt180,hdl_gt40,218825
5,,,8199


### Tg levels >200

In [89]:
%%read_sql

create or replace table st_tg_high_cvd3 as

select *

from
    st_tg_hdl_agg3
where
    patient_id in (select patient_id from st_high_cvd_cohort)

Query started at 02:52:20 PM India Standard Time; Query executed in 0.18 m

Unnamed: 0,status
0,Table ST_TG_HIGH_CVD3 successfully created.


In [90]:
%%read_sql

select  tg_level,
        hdl_level,
        count(distinct patient_id)
        
        from st_tg_high_cvd3
        
        --where unit ilike '%mg/dL%'
           --and hdl_unit ilike '%mg/dL%'
            
        group by tg_level, hdl_level
        --order by tg_level 

Query started at 02:52:49 PM India Standard Time; Query executed in 0.10 m

Unnamed: 0,tg_level,hdl_level,COUNT(DISTINCT PATIENT_ID)
0,,hdl_lt40,223670
1,tg_gt200,hdl_lt40,155156
2,,hdl_gt40,862539
3,tg_gt200,hdl_gt40,166929
4,tg_gt200,,463
5,,,8216


# Tg levels by drug categories

## Cat 1: ath CVD

### Tg levels </> 150

In [92]:
snow.select("select * from st_tg_cvd limit 3")

Unnamed: 0,patient_id,obs_quan,tg_level,unit,hdl_value,hdl_unit,hdl_level
0,2937AE58-5AC4-624A-2F2A-A358F3E9AF4B,64.0,tg_lt150,mg/dL,86.0,mg/dL,hdl_gt40
1,2937AE58-5AC4-624A-2F2A-A358F3E9AF4B,63.0,tg_lt150,mg/dL,80.0,mg/dL,hdl_gt40
2,2937AE58-5AC4-624A-2F2A-A358F3E9AF4B,70.0,tg_lt150,mg/dL,77.0,mg/dL,hdl_gt40


In [93]:
%%read_sql

create or replace table st_drugs_agg1 as 

select
    distinct a.patient_id,
    a.tg_level,
    a.hdl_level,
    1 as all_pts,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'statin' and deactive_date is null)
        then 1
        else 0
        end as statin,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'combination' and deactive_date is null)                   
           then 1
           else 0
           end as combination,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Gemfibrozil' and deactive_date is null)                   
           then 1
           else 0
           end as Gemfibrozil,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colestipol' and deactive_date is null)                  
           then 1
           else 0
           end as colestipol,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'cholestyramine' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as cholestyramine,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Fenofibric acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Fenofibric_acid,
     case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'ezetimibe' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as ezetimibe,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Clofibrate' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Clofibrate,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colesevelam' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as colesevelam,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Niacin' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Niacin,
    case
           when b.ndc_code in (select distinct value
                                from st_ndc_ref
                                where cat2 = 'Docosahexaenoic acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Docosahexaenoic_acid
    
from
    st_tg_cvd a
        join st_ndc b
            on a.patient_id = b.patient_id
--group by tg_level, hdl_level

Query started at 03:56:44 PM India Standard Time; Query executed in 0.43 m

Unnamed: 0,status
0,Table ST_DRUGS_AGG1 successfully created.


In [94]:
%%read_sql df

select
    tg_level,
    hdl_level,
    sum(all_pts) as all_pts,
    
    sum(statin) as statin,
    round(sum(statin)/sum(all_pts)*100, 2) as statin_perc,
    
    sum(combination) as combination,
    round(sum(combination)/sum(all_pts)*100, 2) as combination_perc,
    
    sum(gemfibrozil) as gemfibrozil,
    round(sum(gemfibrozil)/sum(all_pts)*100, 2) as gemfibrozil_perc,
    
    sum(colestipol) as colestipol,
    round(sum(colestipol)/sum(all_pts)*100, 2) as colestipol_perc,
    
    sum(cholestyramine) as cholestyramine,
    round(sum(cholestyramine)/sum(all_pts)*100, 2) as cholestyramine_perc,
    
    sum(fenofibric_acid) as fenofibric_acid,
    round(sum(fenofibric_acid)/sum(all_pts)*100, 2) as fenofibric_acid_perc,
    
    sum(ezetimibe) as ezetimibe,
    round(sum(ezetimibe)/sum(all_pts)*100, 2) as ezetimibe_perc,
    
    sum(clofibrate) as clofibrate,
    round(sum(clofibrate)/sum(all_pts)*100, 2) as clofibrate_perc,
    
    sum(colesevelam) as colesevelam,
    round(sum(colesevelam)/sum(all_pts)*100, 2) as colesevelam_perc,
    
    sum(niacin) as niacin,
    round(sum(niacin)/sum(all_pts)*100, 2) as niacin_perc,
    
    sum(docosahexaenoic_acid) as docosahexaenoic_acid,
    round(sum(docosahexaenoic_acid)/sum(all_pts)*100, 2) as docosahexaenoic_acid_perc

from
    st_drugs_agg1
    
group by tg_level, hdl_level
order by tg_level, hdl_level

Query started at 03:57:58 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,tg_level,hdl_level,all_pts,statin,statin_perc,combination,combination_perc,gemfibrozil,gemfibrozil_perc,colestipol,...,ezetimibe,ezetimibe_perc,clofibrate,clofibrate_perc,colesevelam,colesevelam_perc,niacin,niacin_perc,docosahexaenoic_acid,docosahexaenoic_acid_perc
0,tg_gt150,hdl_gt40,86238,70094,81.28,2066,2.4,2368,2.75,96,...,7932,9.2,0,0.0,1769,2.05,367,0.43,32,0.04
1,tg_gt150,hdl_lt40,67759,54205,80.0,1507,2.22,3090,4.56,71,...,5597,8.26,0,0.0,1265,1.87,454,0.67,42,0.06
2,tg_gt150,,177,113,63.84,6,3.39,18,10.17,0,...,25,14.12,0,0.0,2,1.13,3,1.69,0,0.0
3,tg_lt150,hdl_gt40,160008,135656,84.78,3664,2.29,2299,1.44,133,...,13390,8.37,0,0.0,2365,1.48,699,0.44,33,0.02
4,tg_lt150,hdl_lt40,55086,46388,84.21,1253,2.27,1269,2.3,56,...,4208,7.64,0,0.0,725,1.32,393,0.71,19,0.03
5,tg_lt150,,166,141,84.94,2,1.2,3,1.81,0,...,15,9.04,0,0.0,2,1.2,0,0.0,0,0.0
6,,hdl_gt40,196,167,85.2,5,2.55,4,2.04,0,...,14,7.14,0,0.0,1,0.51,0,0.0,0,0.0
7,,hdl_lt40,102,77,75.49,1,0.98,8,7.84,0,...,9,8.82,0,0.0,2,1.96,1,0.98,0,0.0
8,,,2139,1763,82.42,45,2.1,52,2.43,3,...,208,9.72,0,0.0,28,1.31,13,0.61,0,0.0


In [95]:
# Export to check, and then will inactivate codes that are not relevant and re-upload as final reference table
df.to_excel("out/CVD_cohort1.xlsx", index=False)

### Tg levels >180

In [96]:
snow.select("select * from st_tg_cvd1 limit 3")

Unnamed: 0,patient_id,obs_quan,tg_level,unit,hdl_value,hdl_unit,hdl_level
0,63CDB192-BB10-F209-B9C0-5407B8A6869F,69.0,,mg/dL,76.0,mg/dL,hdl_gt40
1,63CDB192-BB10-F209-B9C0-5407B8A6869F,43.0,,mg/dL,76.0,mg/dL,hdl_gt40
2,B9984358-3598-3BDE-81DE-26731CD4482F,94.0,,,55.0,mg/dL,hdl_gt40


In [97]:
%%read_sql

create or replace table st_drugs_agg2 as 

select
    distinct a.patient_id,
    a.tg_level,
    a.hdl_level,
    1 as all_pts,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'statin' and deactive_date is null)
        then 1
        else 0
        end as statin,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'combination' and deactive_date is null)                   
           then 1
           else 0
           end as combination,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Gemfibrozil' and deactive_date is null)                   
           then 1
           else 0
           end as Gemfibrozil,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colestipol' and deactive_date is null)                  
           then 1
           else 0
           end as colestipol,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'cholestyramine' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as cholestyramine,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Fenofibric acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Fenofibric_acid,
     case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'ezetimibe' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as ezetimibe,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Clofibrate' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Clofibrate,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colesevelam' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as colesevelam,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Niacin' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Niacin,
    case
           when b.ndc_code in (select distinct value
                                from st_ndc_ref
                                where cat2 = 'Docosahexaenoic acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Docosahexaenoic_acid
    
from
    st_tg_cvd1 a
        join st_ndc b
            on a.patient_id = b.patient_id
--group by tg_level, hdl_level

Query started at 04:01:24 PM India Standard Time; Query executed in 0.41 m

Unnamed: 0,status
0,Table ST_DRUGS_AGG2 successfully created.


In [98]:
%%read_sql df

select
    tg_level,
    hdl_level,
    sum(all_pts) as all_pts,
    
    sum(statin) as statin,
    round(sum(statin)/sum(all_pts)*100, 2) as statin_perc,
    
    sum(combination) as combination,
    round(sum(combination)/sum(all_pts)*100, 2) as combination_perc,
    
    sum(gemfibrozil) as gemfibrozil,
    round(sum(gemfibrozil)/sum(all_pts)*100, 2) as gemfibrozil_perc,
    
    sum(colestipol) as colestipol,
    round(sum(colestipol)/sum(all_pts)*100, 2) as colestipol_perc,
    
    sum(cholestyramine) as cholestyramine,
    round(sum(cholestyramine)/sum(all_pts)*100, 2) as cholestyramine_perc,
    
    sum(fenofibric_acid) as fenofibric_acid,
    round(sum(fenofibric_acid)/sum(all_pts)*100, 2) as fenofibric_acid_perc,
    
    sum(ezetimibe) as ezetimibe,
    round(sum(ezetimibe)/sum(all_pts)*100, 2) as ezetimibe_perc,
    
    sum(clofibrate) as clofibrate,
    round(sum(clofibrate)/sum(all_pts)*100, 2) as clofibrate_perc,
    
    sum(colesevelam) as colesevelam,
    round(sum(colesevelam)/sum(all_pts)*100, 2) as colesevelam_perc,
    
    sum(niacin) as niacin,
    round(sum(niacin)/sum(all_pts)*100, 2) as niacin_perc,
    
    sum(docosahexaenoic_acid) as docosahexaenoic_acid,
    round(sum(docosahexaenoic_acid)/sum(all_pts)*100, 2) as docosahexaenoic_acid_perc

from
    st_drugs_agg2
    
group by tg_level, hdl_level
order by tg_level, hdl_level

Query started at 04:01:58 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,tg_level,hdl_level,all_pts,statin,statin_perc,combination,combination_perc,gemfibrozil,gemfibrozil_perc,colestipol,...,ezetimibe,ezetimibe_perc,clofibrate,clofibrate_perc,colesevelam,colesevelam_perc,niacin,niacin_perc,docosahexaenoic_acid,docosahexaenoic_acid_perc
0,tg_gt180,hdl_gt40,58656,46932,80.01,1397,2.38,1925,3.28,63,...,5579,9.51,0,0.0,1310,2.23,263,0.45,25,0.04
1,tg_gt180,hdl_lt40,55238,43620,78.97,1228,2.22,2842,5.15,65,...,4634,8.39,0,0.0,1094,1.98,383,0.69,40,0.07
2,tg_gt180,,160,99,61.88,6,3.75,18,11.25,0,...,23,14.38,0,0.0,2,1.25,3,1.88,0,0.0
3,,hdl_gt40,175489,148367,84.54,4031,2.3,2701,1.54,148,...,14728,8.39,0,0.0,2684,1.53,760,0.43,37,0.02
4,,hdl_lt40,68649,57457,83.7,1565,2.28,1722,2.51,65,...,5313,7.74,0,0.0,960,1.4,474,0.69,23,0.03
5,,,2224,1835,82.51,45,2.02,52,2.34,3,...,219,9.85,0,0.0,29,1.3,13,0.58,0,0.0


In [99]:
# Export to check, and then will inactivate codes that are not relevant and re-upload as final reference table
df.to_excel("out/CVD_cohort2.xlsx", index=False)

### Tg levels >200

In [100]:
snow.select("select * from st_tg_cvd2 limit 3")

Unnamed: 0,patient_id,obs_quan,tg_level,unit,hdl_value,hdl_unit,hdl_level
0,63CDB192-BB10-F209-B9C0-5407B8A6869F,69.0,,mg/dL,76.0,mg/dL,hdl_gt40
1,63CDB192-BB10-F209-B9C0-5407B8A6869F,43.0,,mg/dL,76.0,mg/dL,hdl_gt40
2,B9984358-3598-3BDE-81DE-26731CD4482F,94.0,,,55.0,mg/dL,hdl_gt40


In [107]:
%%read_sql

create or replace table st_drugs_agg3 as 

select
    distinct a.patient_id,
    a.tg_level,
    a.hdl_level,
    1 as all_pts,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'statin' and deactive_date is null)
        then 1
        else 0
        end as statin,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'combination' and deactive_date is null)                   
           then 1
           else 0
           end as combination,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Gemfibrozil' and deactive_date is null)                   
           then 1
           else 0
           end as Gemfibrozil,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colestipol' and deactive_date is null)                  
           then 1
           else 0
           end as colestipol,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'cholestyramine' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as cholestyramine,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Fenofibric acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Fenofibric_acid,
     case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'ezetimibe' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as ezetimibe,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Clofibrate' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Clofibrate,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colesevelam' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as colesevelam,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Niacin' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Niacin,
    case
           when b.ndc_code in (select distinct value
                                from st_ndc_ref
                                where cat2 = 'Docosahexaenoic acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Docosahexaenoic_acid
    
from
    st_tg_cvd2 a
        join st_ndc b
            on a.patient_id = b.patient_id
--group by tg_level, hdl_level

Query started at 04:16:57 PM India Standard Time; Query executed in 0.22 m

Unnamed: 0,status
0,Table ST_DRUGS_AGG3 successfully created.


In [102]:
%%read_sql df

select
    tg_level,
    hdl_level,
    sum(all_pts) as all_pts,
    
    sum(statin) as statin,
    round(sum(statin)/sum(all_pts)*100, 2) as statin_perc,
    
    sum(combination) as combination,
    round(sum(combination)/sum(all_pts)*100, 2) as combination_perc,
    
    sum(gemfibrozil) as gemfibrozil,
    round(sum(gemfibrozil)/sum(all_pts)*100, 2) as gemfibrozil_perc,
    
    sum(colestipol) as colestipol,
    round(sum(colestipol)/sum(all_pts)*100, 2) as colestipol_perc,
    
    sum(cholestyramine) as cholestyramine,
    round(sum(cholestyramine)/sum(all_pts)*100, 2) as cholestyramine_perc,
    
    sum(fenofibric_acid) as fenofibric_acid,
    round(sum(fenofibric_acid)/sum(all_pts)*100, 2) as fenofibric_acid_perc,
    
    sum(ezetimibe) as ezetimibe,
    round(sum(ezetimibe)/sum(all_pts)*100, 2) as ezetimibe_perc,
    
    sum(clofibrate) as clofibrate,
    round(sum(clofibrate)/sum(all_pts)*100, 2) as clofibrate_perc,
    
    sum(colesevelam) as colesevelam,
    round(sum(colesevelam)/sum(all_pts)*100, 2) as colesevelam_perc,
    
    sum(niacin) as niacin,
    round(sum(niacin)/sum(all_pts)*100, 2) as niacin_perc,
    
    sum(docosahexaenoic_acid) as docosahexaenoic_acid,
    round(sum(docosahexaenoic_acid)/sum(all_pts)*100, 2) as docosahexaenoic_acid_perc

from
    st_drugs_agg3
    
group by tg_level, hdl_level
order by tg_level, hdl_level

Query started at 04:11:04 PM India Standard Time; Query executed in 0.06 m

Unnamed: 0,tg_level,hdl_level,all_pts,statin,statin_perc,combination,combination_perc,gemfibrozil,gemfibrozil_perc,colestipol,...,ezetimibe,ezetimibe_perc,clofibrate,clofibrate_perc,colesevelam,colesevelam_perc,niacin,niacin_perc,docosahexaenoic_acid,docosahexaenoic_acid_perc
0,tg_gt200,hdl_gt40,45077,35616,79.01,1081,2.4,1674,3.71,53,...,4375,9.71,0,0.0,1085,2.41,211,0.47,24,0.05
1,tg_gt200,hdl_lt40,47642,37256,78.2,1050,2.2,2684,5.63,56,...,4029,8.46,0,0.0,982,2.06,336,0.71,38,0.08
2,tg_gt200,,143,90,62.94,6,4.2,17,11.89,0,...,20,13.99,0,0.0,1,0.7,2,1.4,0,0.0
3,,hdl_gt40,181541,153271,84.43,4163,2.29,2882,1.59,154,...,15282,8.42,0,0.0,2811,1.55,787,0.43,41,0.02
4,,hdl_lt40,75213,62826,83.53,1677,2.23,1994,2.65,72,...,5830,7.75,0,0.0,1068,1.42,512,0.68,27,0.04
5,,,2231,1839,82.43,45,2.02,52,2.33,3,...,221,9.91,0,0.0,29,1.3,13,0.58,0,0.0


In [103]:
# Export to check, and then will inactivate codes that are not relevant and re-upload as final reference table
df.to_excel("out/CVD_cohort3.xlsx", index=False)

## Cat 2: Dia, no ath CVD

### Tg </> 150

In [104]:
snow.select("select * from st_tg_dia1 limit 3")

Unnamed: 0,patient_id,obs_quan,tg_level,unit,hdl_value,hdl_unit,hdl_level
0,0DF569FF-7689-2E3D-B99D-0254F2107662,174.0,tg_gt150,mg/dL,30.0,mg/dL,hdl_lt40
1,77954C8B-E43A-697A-2E87-5477BD013D93,96.0,tg_lt150,mg/dL,38.0,mg/dL,hdl_lt40
2,D7762739-40FA-8E9C-5DE2-56A34FBD13D3,234.0,tg_gt150,mg/dL,50.0,mg/dL,hdl_gt40


In [106]:
%%read_sql

create or replace table st_drugs_agg4 as 

select
    distinct a.patient_id,
    a.tg_level,
    a.hdl_level,
    1 as all_pts,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'statin' and deactive_date is null)
        then 1
        else 0
        end as statin,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'combination' and deactive_date is null)                   
           then 1
           else 0
           end as combination,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Gemfibrozil' and deactive_date is null)                   
           then 1
           else 0
           end as Gemfibrozil,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colestipol' and deactive_date is null)                  
           then 1
           else 0
           end as colestipol,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'cholestyramine' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as cholestyramine,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Fenofibric acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Fenofibric_acid,
     case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'ezetimibe' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as ezetimibe,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Clofibrate' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Clofibrate,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colesevelam' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as colesevelam,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Niacin' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Niacin,
    case
           when b.ndc_code in (select distinct value
                                from st_ndc_ref
                                where cat2 = 'Docosahexaenoic acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Docosahexaenoic_acid
    
from
    st_tg_dia1 a
        join st_ndc b
            on a.patient_id = b.patient_id
--group by tg_level, hdl_level

Query started at 04:16:36 PM India Standard Time; Query executed in 0.24 m

Unnamed: 0,status
0,Table ST_DRUGS_AGG4 successfully created.


In [108]:
%%read_sql df

select
    tg_level,
    hdl_level,
    sum(all_pts) as all_pts,
    
    sum(statin) as statin,
    round(sum(statin)/sum(all_pts)*100, 2) as statin_perc,
    
    sum(combination) as combination,
    round(sum(combination)/sum(all_pts)*100, 2) as combination_perc,
    
    sum(gemfibrozil) as gemfibrozil,
    round(sum(gemfibrozil)/sum(all_pts)*100, 2) as gemfibrozil_perc,
    
    sum(colestipol) as colestipol,
    round(sum(colestipol)/sum(all_pts)*100, 2) as colestipol_perc,
    
    sum(cholestyramine) as cholestyramine,
    round(sum(cholestyramine)/sum(all_pts)*100, 2) as cholestyramine_perc,
    
    sum(fenofibric_acid) as fenofibric_acid,
    round(sum(fenofibric_acid)/sum(all_pts)*100, 2) as fenofibric_acid_perc,
    
    sum(ezetimibe) as ezetimibe,
    round(sum(ezetimibe)/sum(all_pts)*100, 2) as ezetimibe_perc,
    
    sum(clofibrate) as clofibrate,
    round(sum(clofibrate)/sum(all_pts)*100, 2) as clofibrate_perc,
    
    sum(colesevelam) as colesevelam,
    round(sum(colesevelam)/sum(all_pts)*100, 2) as colesevelam_perc,
    
    sum(niacin) as niacin,
    round(sum(niacin)/sum(all_pts)*100, 2) as niacin_perc,
    
    sum(docosahexaenoic_acid) as docosahexaenoic_acid,
    round(sum(docosahexaenoic_acid)/sum(all_pts)*100, 2) as docosahexaenoic_acid_perc

from
    st_drugs_agg4
    
group by tg_level, hdl_level
order by tg_level, hdl_level

Query started at 04:17:18 PM India Standard Time; Query executed in 0.08 m

Unnamed: 0,tg_level,hdl_level,all_pts,statin,statin_perc,combination,combination_perc,gemfibrozil,gemfibrozil_perc,colestipol,...,ezetimibe,ezetimibe_perc,clofibrate,clofibrate_perc,colesevelam,colesevelam_perc,niacin,niacin_perc,docosahexaenoic_acid,docosahexaenoic_acid_perc
0,tg_gt150,hdl_gt40,119939,100320,83.64,2533,2.11,4442,3.7,145,...,7089,5.91,0,0.0,2758,2.3,375,0.31,48,0.04
1,tg_gt150,hdl_lt40,89260,72095,80.77,1708,1.91,5891,6.6,95,...,4762,5.34,0,0.0,1902,2.13,434,0.49,49,0.05
2,tg_gt150,,329,241,73.25,7,2.13,45,13.68,0,...,16,4.86,0,0.0,7,2.13,1,0.3,0,0.0
3,tg_lt150,hdl_gt40,154316,134725,87.3,3093,2.0,3106,2.01,159,...,8002,5.19,0,0.0,2846,1.84,421,0.27,25,0.02
4,tg_lt150,hdl_lt40,48524,41525,85.58,926,1.91,1700,3.5,55,...,2402,4.95,0,0.0,849,1.75,226,0.47,17,0.04
5,tg_lt150,,162,142,87.65,4,2.47,1,0.62,0,...,11,6.79,0,0.0,2,1.23,0,0.0,0,0.0
6,,hdl_gt40,183,152,83.06,3,1.64,7,3.83,0,...,18,9.84,0,0.0,0,0.0,1,0.55,0,0.0
7,,hdl_lt40,147,106,72.11,5,3.4,19,12.93,0,...,9,6.12,0,0.0,4,2.72,1,0.68,0,0.0
8,,,2066,1721,83.3,48,2.32,92,4.45,1,...,120,5.81,0,0.0,45,2.18,13,0.63,0,0.0


In [109]:
# Export to check, and then will inactivate codes that are not relevant and re-upload as final reference table
df.to_excel("out/Dia_cohort1.xlsx", index=False)

### Tg levels >180

In [110]:
snow.select("select * from st_tg_dia2 limit 3")

Unnamed: 0,patient_id,obs_quan,tg_level,unit,hdl_value,hdl_unit,hdl_level
0,BFD511A6-C722-5474-6533-72878DD027FD,107.0,,mg/dL,79.0,mg/dL,hdl_gt40
1,62DC1467-5595-2370-0C5B-36B4A796D4CD,94.0,,mg/dL,72.0,mg/dL,hdl_gt40
2,62DC1467-5595-2370-0C5B-36B4A796D4CD,101.0,,mg/dL,61.0,mg/dL,hdl_gt40


In [111]:
%%read_sql

create or replace table st_drugs_agg5 as 

select
    distinct a.patient_id,
    a.tg_level,
    a.hdl_level,
    1 as all_pts,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'statin' and deactive_date is null)
        then 1
        else 0
        end as statin,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'combination' and deactive_date is null)                   
           then 1
           else 0
           end as combination,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Gemfibrozil' and deactive_date is null)                   
           then 1
           else 0
           end as Gemfibrozil,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colestipol' and deactive_date is null)                  
           then 1
           else 0
           end as colestipol,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'cholestyramine' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as cholestyramine,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Fenofibric acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Fenofibric_acid,
     case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'ezetimibe' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as ezetimibe,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Clofibrate' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Clofibrate,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colesevelam' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as colesevelam,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Niacin' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Niacin,
    case
           when b.ndc_code in (select distinct value
                                from st_ndc_ref
                                where cat2 = 'Docosahexaenoic acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Docosahexaenoic_acid
    
from
    st_tg_dia2 a
        join st_ndc b
            on a.patient_id = b.patient_id
--group by tg_level, hdl_level

Query started at 04:20:26 PM India Standard Time; Query executed in 0.45 m

Unnamed: 0,status
0,Table ST_DRUGS_AGG5 successfully created.


In [112]:
%%read_sql df

select
    tg_level,
    hdl_level,
    sum(all_pts) as all_pts,
    
    sum(statin) as statin,
    round(sum(statin)/sum(all_pts)*100, 2) as statin_perc,
    
    sum(combination) as combination,
    round(sum(combination)/sum(all_pts)*100, 2) as combination_perc,
    
    sum(gemfibrozil) as gemfibrozil,
    round(sum(gemfibrozil)/sum(all_pts)*100, 2) as gemfibrozil_perc,
    
    sum(colestipol) as colestipol,
    round(sum(colestipol)/sum(all_pts)*100, 2) as colestipol_perc,
    
    sum(cholestyramine) as cholestyramine,
    round(sum(cholestyramine)/sum(all_pts)*100, 2) as cholestyramine_perc,
    
    sum(fenofibric_acid) as fenofibric_acid,
    round(sum(fenofibric_acid)/sum(all_pts)*100, 2) as fenofibric_acid_perc,
    
    sum(ezetimibe) as ezetimibe,
    round(sum(ezetimibe)/sum(all_pts)*100, 2) as ezetimibe_perc,
    
    sum(clofibrate) as clofibrate,
    round(sum(clofibrate)/sum(all_pts)*100, 2) as clofibrate_perc,
    
    sum(colesevelam) as colesevelam,
    round(sum(colesevelam)/sum(all_pts)*100, 2) as colesevelam_perc,
    
    sum(niacin) as niacin,
    round(sum(niacin)/sum(all_pts)*100, 2) as niacin_perc,
    
    sum(docosahexaenoic_acid) as docosahexaenoic_acid,
    round(sum(docosahexaenoic_acid)/sum(all_pts)*100, 2) as docosahexaenoic_acid_perc

from
    st_drugs_agg5
    
group by tg_level, hdl_level
order by tg_level, hdl_level

Query started at 04:22:30 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,tg_level,hdl_level,all_pts,statin,statin_perc,combination,combination_perc,gemfibrozil,gemfibrozil_perc,colestipol,...,ezetimibe,ezetimibe_perc,clofibrate,clofibrate_perc,colesevelam,colesevelam_perc,niacin,niacin_perc,docosahexaenoic_acid,docosahexaenoic_acid_perc
0,tg_gt180,hdl_gt40,88846,73275,82.47,1904,2.14,3775,4.25,115,...,5483,6.17,0,0.0,2140,2.41,287,0.32,45,0.05
1,tg_gt180,hdl_lt40,77194,61574,79.77,1498,1.94,5575,7.22,86,...,4159,5.39,0,0.0,1711,2.22,392,0.51,44,0.06
2,tg_gt180,,302,216,71.52,7,2.32,45,14.9,0,...,15,4.97,0,0.0,7,2.32,1,0.33,0,0.0
3,,hdl_gt40,175426,152544,86.96,3510,2.0,3878,2.21,173,...,9227,5.26,0,0.0,3262,1.86,482,0.27,37,0.02
4,,hdl_lt40,64592,54903,85.0,1240,1.92,2465,3.82,72,...,3260,5.05,0,0.0,1167,1.81,293,0.45,24,0.04
5,,,2152,1799,83.6,49,2.28,92,4.28,1,...,126,5.86,0,0.0,46,2.14,13,0.6,0,0.0


In [113]:
# Export to check, and then will inactivate codes that are not relevant and re-upload as final reference table
df.to_excel("out/Dia_cohort2.xlsx", index=False)

### Tg levels >200

In [114]:
snow.select("select * from st_tg_dia3 limit 3")

Unnamed: 0,patient_id,obs_quan,tg_level,unit,hdl_value,hdl_unit,hdl_level
0,99393675-0DD7-4A96-DA5E-F9809821C555,196.0,,mg/dL,45.0,mg/dL,hdl_gt40
1,99393675-0DD7-4A96-DA5E-F9809821C555,196.0,,mg/dL,47.0,mg/dL,hdl_gt40
2,26A26FA8-B9BA-7FCA-86DA-57977DB311FA,184.0,,mg/dL,38.0,mg/dL,hdl_lt40


In [115]:
%%read_sql

create or replace table st_drugs_agg6 as 

select
    distinct a.patient_id,
    a.tg_level,
    a.hdl_level,
    1 as all_pts,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'statin' and deactive_date is null)
        then 1
        else 0
        end as statin,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'combination' and deactive_date is null)                   
           then 1
           else 0
           end as combination,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Gemfibrozil' and deactive_date is null)                   
           then 1
           else 0
           end as Gemfibrozil,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colestipol' and deactive_date is null)                  
           then 1
           else 0
           end as colestipol,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'cholestyramine' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as cholestyramine,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Fenofibric acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Fenofibric_acid,
     case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'ezetimibe' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as ezetimibe,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Clofibrate' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Clofibrate,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colesevelam' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as colesevelam,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Niacin' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Niacin,
    case
           when b.ndc_code in (select distinct value
                                from st_ndc_ref
                                where cat2 = 'Docosahexaenoic acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Docosahexaenoic_acid
    
from
    st_tg_dia3 a
        join st_ndc b
            on a.patient_id = b.patient_id
--group by tg_level, hdl_level

Query started at 04:25:16 PM India Standard Time; Query executed in 0.38 m

Unnamed: 0,status
0,Table ST_DRUGS_AGG6 successfully created.


In [116]:
%%read_sql df

select
    tg_level,
    hdl_level,
    sum(all_pts) as all_pts,
    
    sum(statin) as statin,
    round(sum(statin)/sum(all_pts)*100, 2) as statin_perc,
    
    sum(combination) as combination,
    round(sum(combination)/sum(all_pts)*100, 2) as combination_perc,
    
    sum(gemfibrozil) as gemfibrozil,
    round(sum(gemfibrozil)/sum(all_pts)*100, 2) as gemfibrozil_perc,
    
    sum(colestipol) as colestipol,
    round(sum(colestipol)/sum(all_pts)*100, 2) as colestipol_perc,
    
    sum(cholestyramine) as cholestyramine,
    round(sum(cholestyramine)/sum(all_pts)*100, 2) as cholestyramine_perc,
    
    sum(fenofibric_acid) as fenofibric_acid,
    round(sum(fenofibric_acid)/sum(all_pts)*100, 2) as fenofibric_acid_perc,
    
    sum(ezetimibe) as ezetimibe,
    round(sum(ezetimibe)/sum(all_pts)*100, 2) as ezetimibe_perc,
    
    sum(clofibrate) as clofibrate,
    round(sum(clofibrate)/sum(all_pts)*100, 2) as clofibrate_perc,
    
    sum(colesevelam) as colesevelam,
    round(sum(colesevelam)/sum(all_pts)*100, 2) as colesevelam_perc,
    
    sum(niacin) as niacin,
    round(sum(niacin)/sum(all_pts)*100, 2) as niacin_perc,
    
    sum(docosahexaenoic_acid) as docosahexaenoic_acid,
    round(sum(docosahexaenoic_acid)/sum(all_pts)*100, 2) as docosahexaenoic_acid_perc

from
    st_drugs_agg6
    
group by tg_level, hdl_level
order by tg_level, hdl_level

Query started at 04:25:46 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,tg_level,hdl_level,all_pts,statin,statin_perc,combination,combination_perc,gemfibrozil,gemfibrozil_perc,colestipol,...,ezetimibe,ezetimibe_perc,clofibrate,clofibrate_perc,colesevelam,colesevelam_perc,niacin,niacin_perc,docosahexaenoic_acid,docosahexaenoic_acid_perc
0,tg_gt200,hdl_gt40,71740,58515,81.57,1578,2.2,3381,4.71,95,...,4521,6.3,0,0.0,1776,2.48,241,0.34,42,0.06
1,tg_gt200,hdl_lt40,69215,54735,79.08,1352,1.95,5286,7.64,79,...,3769,5.45,0,0.0,1560,2.25,364,0.53,43,0.06
2,tg_gt200,,286,203,70.98,7,2.45,43,15.04,0,...,14,4.9,0,0.0,7,2.45,1,0.35,0,0.0
3,,hdl_gt40,184465,160073,86.78,3682,2.0,4236,2.3,184,...,9761,5.29,0,0.0,3485,1.89,512,0.28,39,0.02
4,,hdl_lt40,73098,61936,84.73,1386,1.9,2924,4.0,81,...,3683,5.04,0,0.0,1362,1.86,328,0.45,28,0.04
5,,,2159,1805,83.6,49,2.27,93,4.31,1,...,126,5.84,0,0.0,46,2.13,13,0.6,0,0.0


In [117]:
# Export to check, and then will inactivate codes that are not relevant and re-upload as final reference table
df.to_excel("out/Dia_cohort3.xlsx", index=False)

## Cat 3: High CVD

### Tg levels </> 150

In [118]:
snow.select("select * from st_tg_high_cvd1 limit 3")

Unnamed: 0,patient_id,obs_quan,tg_level,unit,hdl_value,hdl_unit,hdl_level
0,1607E03C-B26C-1761-514A-EED7ADCACCB3,146.0,tg_lt150,mg/dL,54.0,mg/dL,hdl_gt40
1,1607E03C-B26C-1761-514A-EED7ADCACCB3,101.0,tg_lt150,mg/dL,47.0,mg/dL,hdl_gt40
2,1607E03C-B26C-1761-514A-EED7ADCACCB3,153.0,tg_gt150,mg/dL,46.0,mg/dL,hdl_gt40


In [119]:
%%read_sql

create or replace table st_drugs_agg7 as 

select
    distinct a.patient_id,
    a.tg_level,
    a.hdl_level,
    1 as all_pts,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'statin' and deactive_date is null)
        then 1
        else 0
        end as statin,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'combination' and deactive_date is null)                   
           then 1
           else 0
           end as combination,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Gemfibrozil' and deactive_date is null)                   
           then 1
           else 0
           end as Gemfibrozil,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colestipol' and deactive_date is null)                  
           then 1
           else 0
           end as colestipol,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'cholestyramine' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as cholestyramine,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Fenofibric acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Fenofibric_acid,
     case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'ezetimibe' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as ezetimibe,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Clofibrate' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Clofibrate,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colesevelam' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as colesevelam,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Niacin' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Niacin,
    case
           when b.ndc_code in (select distinct value
                                from st_ndc_ref
                                where cat2 = 'Docosahexaenoic acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Docosahexaenoic_acid
    
from
    st_tg_high_cvd1 a
        join st_ndc b
            on a.patient_id = b.patient_id
--group by tg_level, hdl_level

Query started at 04:41:09 PM India Standard Time; Query executed in 0.33 m

Unnamed: 0,status
0,Table ST_DRUGS_AGG7 successfully created.


In [120]:
%%read_sql df

select
    tg_level,
    hdl_level,
    sum(all_pts) as all_pts,
    
    sum(statin) as statin,
    round(sum(statin)/sum(all_pts)*100, 2) as statin_perc,
    
    sum(combination) as combination,
    round(sum(combination)/sum(all_pts)*100, 2) as combination_perc,
    
    sum(gemfibrozil) as gemfibrozil,
    round(sum(gemfibrozil)/sum(all_pts)*100, 2) as gemfibrozil_perc,
    
    sum(colestipol) as colestipol,
    round(sum(colestipol)/sum(all_pts)*100, 2) as colestipol_perc,
    
    sum(cholestyramine) as cholestyramine,
    round(sum(cholestyramine)/sum(all_pts)*100, 2) as cholestyramine_perc,
    
    sum(fenofibric_acid) as fenofibric_acid,
    round(sum(fenofibric_acid)/sum(all_pts)*100, 2) as fenofibric_acid_perc,
    
    sum(ezetimibe) as ezetimibe,
    round(sum(ezetimibe)/sum(all_pts)*100, 2) as ezetimibe_perc,
    
    sum(clofibrate) as clofibrate,
    round(sum(clofibrate)/sum(all_pts)*100, 2) as clofibrate_perc,
    
    sum(colesevelam) as colesevelam,
    round(sum(colesevelam)/sum(all_pts)*100, 2) as colesevelam_perc,
    
    sum(niacin) as niacin,
    round(sum(niacin)/sum(all_pts)*100, 2) as niacin_perc,
    
    sum(docosahexaenoic_acid) as docosahexaenoic_acid,
    round(sum(docosahexaenoic_acid)/sum(all_pts)*100, 2) as docosahexaenoic_acid_perc

from
    st_drugs_agg7
    
group by tg_level, hdl_level
order by tg_level, hdl_level

Query started at 04:45:36 PM India Standard Time; Query executed in 0.11 m

Unnamed: 0,tg_level,hdl_level,all_pts,statin,statin_perc,combination,combination_perc,gemfibrozil,gemfibrozil_perc,colestipol,...,ezetimibe,ezetimibe_perc,clofibrate,clofibrate_perc,colesevelam,colesevelam_perc,niacin,niacin_perc,docosahexaenoic_acid,docosahexaenoic_acid_perc
0,tg_gt150,hdl_gt40,126845,105097,82.85,2685,2.12,4005,3.16,180,...,9005,7.1,0,0.0,2714,2.14,534,0.42,80,0.06
1,tg_gt150,hdl_lt40,89079,71522,80.29,1704,1.91,5154,5.79,102,...,5832,6.55,0,0.0,1767,1.98,591,0.66,63,0.07
2,tg_gt150,,329,223,67.78,8,2.43,40,12.16,1,...,34,10.33,0,0.0,7,2.13,5,1.52,0,0.0
3,tg_lt150,hdl_gt40,184159,158685,86.17,3670,1.99,3032,1.65,206,...,11947,6.49,0,0.0,3154,1.71,733,0.4,67,0.04
4,tg_lt150,hdl_lt40,52962,44839,84.66,1022,1.93,1490,2.81,49,...,3372,6.37,0,0.0,829,1.57,374,0.71,22,0.04
5,tg_lt150,,261,226,86.59,5,1.92,4,1.53,1,...,19,7.28,0,0.0,4,1.53,1,0.38,0,0.0
6,,hdl_gt40,265,219,82.64,5,1.89,6,2.26,1,...,23,8.68,0,0.0,3,1.13,2,0.75,0,0.0
7,,hdl_lt40,160,121,75.63,3,1.88,16,10.0,1,...,8,5.0,0,0.0,4,2.5,1,0.63,0,0.0
8,,,2327,1941,83.41,38,1.63,98,4.21,4,...,153,6.58,0,0.0,49,2.11,14,0.6,1,0.04


In [121]:
# Export to check, and then will inactivate codes that are not relevant and re-upload as final reference table
df.to_excel("out/highcvd_cohort1.xlsx", index=False)

### Tg levels >180

In [122]:
snow.select("select * from st_tg_high_cvd2 limit 3")

Unnamed: 0,patient_id,obs_quan,tg_level,unit,hdl_value,hdl_unit,hdl_level
0,F244C92B-6A37-2536-D84A-36C958BE67B3,138.0,,mg/dL,73.0,mg/dL,hdl_gt40
1,768E7A3C-7E2A-5096-278E-FCD400F96B8F,130.0,,mg/dL,57.0,mg/dL,hdl_gt40
2,A7D1D314-B440-DF16-99D2-F9AFD9A668FD,187.0,tg_gt180,mg/dL,45.0,mg/dL,hdl_gt40


In [123]:
%%read_sql

create or replace table st_drugs_agg8 as 

select
    distinct a.patient_id,
    a.tg_level,
    a.hdl_level,
    1 as all_pts,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'statin' and deactive_date is null)
        then 1
        else 0
        end as statin,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'combination' and deactive_date is null)                   
           then 1
           else 0
           end as combination,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Gemfibrozil' and deactive_date is null)                   
           then 1
           else 0
           end as Gemfibrozil,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colestipol' and deactive_date is null)                  
           then 1
           else 0
           end as colestipol,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'cholestyramine' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as cholestyramine,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Fenofibric acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Fenofibric_acid,
     case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'ezetimibe' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as ezetimibe,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Clofibrate' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Clofibrate,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colesevelam' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as colesevelam,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Niacin' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Niacin,
    case
           when b.ndc_code in (select distinct value
                                from st_ndc_ref
                                where cat2 = 'Docosahexaenoic acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Docosahexaenoic_acid
    
from
    st_tg_high_cvd2 a
        join st_ndc b
            on a.patient_id = b.patient_id
--group by tg_level, hdl_level

Query started at 04:51:26 PM India Standard Time; Query executed in 0.31 m

Unnamed: 0,status
0,Table ST_DRUGS_AGG8 successfully created.


In [124]:
%%read_sql df

select
    tg_level,
    hdl_level,
    sum(all_pts) as all_pts,
    
    sum(statin) as statin,
    round(sum(statin)/sum(all_pts)*100, 2) as statin_perc,
    
    sum(combination) as combination,
    round(sum(combination)/sum(all_pts)*100, 2) as combination_perc,
    
    sum(gemfibrozil) as gemfibrozil,
    round(sum(gemfibrozil)/sum(all_pts)*100, 2) as gemfibrozil_perc,
    
    sum(colestipol) as colestipol,
    round(sum(colestipol)/sum(all_pts)*100, 2) as colestipol_perc,
    
    sum(cholestyramine) as cholestyramine,
    round(sum(cholestyramine)/sum(all_pts)*100, 2) as cholestyramine_perc,
    
    sum(fenofibric_acid) as fenofibric_acid,
    round(sum(fenofibric_acid)/sum(all_pts)*100, 2) as fenofibric_acid_perc,
    
    sum(ezetimibe) as ezetimibe,
    round(sum(ezetimibe)/sum(all_pts)*100, 2) as ezetimibe_perc,
    
    sum(clofibrate) as clofibrate,
    round(sum(clofibrate)/sum(all_pts)*100, 2) as clofibrate_perc,
    
    sum(colesevelam) as colesevelam,
    round(sum(colesevelam)/sum(all_pts)*100, 2) as colesevelam_perc,
    
    sum(niacin) as niacin,
    round(sum(niacin)/sum(all_pts)*100, 2) as niacin_perc,
    
    sum(docosahexaenoic_acid) as docosahexaenoic_acid,
    round(sum(docosahexaenoic_acid)/sum(all_pts)*100, 2) as docosahexaenoic_acid_perc

from
    st_drugs_agg8
    
group by tg_level, hdl_level
order by tg_level, hdl_level

Query started at 04:52:11 PM India Standard Time; Query executed in 0.16 m

Unnamed: 0,tg_level,hdl_level,all_pts,statin,statin_perc,combination,combination_perc,gemfibrozil,gemfibrozil_perc,colestipol,...,ezetimibe,ezetimibe_perc,clofibrate,clofibrate_perc,colesevelam,colesevelam_perc,niacin,niacin_perc,docosahexaenoic_acid,docosahexaenoic_acid_perc
0,tg_gt180,hdl_gt40,91433,74680,81.68,1968,2.15,3360,3.67,137,...,6681,7.31,0,0.0,2096,2.29,390,0.43,69,0.08
1,tg_gt180,hdl_lt40,76288,60512,79.32,1451,1.9,4875,6.39,90,...,5074,6.65,0,0.0,1566,2.05,519,0.68,58,0.08
2,tg_gt180,,299,195,65.22,8,2.68,40,13.38,1,...,33,11.04,0,0.0,7,2.34,5,1.67,0,0.0
3,,hdl_gt40,205783,176870,85.95,4087,1.99,3679,1.79,230,...,13375,6.5,0,0.0,3575,1.74,826,0.4,78,0.04
4,,hdl_lt40,69226,58245,84.14,1340,1.94,2159,3.12,69,...,4423,6.39,0,0.0,1143,1.65,461,0.67,28,0.04
5,,,2481,2076,83.68,41,1.65,99,3.99,4,...,163,6.57,0,0.0,52,2.1,14,0.56,1,0.04


In [125]:
# Export to check, and then will inactivate codes that are not relevant and re-upload as final reference table
df.to_excel("out/highcvd_cohort2.xlsx", index=False)

### Tg levels >200

In [126]:
%%read_sql

create or replace table st_drugs_agg9 as 

select
    distinct a.patient_id,
    a.tg_level,
    a.hdl_level,
    1 as all_pts,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'statin' and deactive_date is null)
        then 1
        else 0
        end as statin,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'combination' and deactive_date is null)                   
           then 1
           else 0
           end as combination,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Gemfibrozil' and deactive_date is null)                   
           then 1
           else 0
           end as Gemfibrozil,
    case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colestipol' and deactive_date is null)                  
           then 1
           else 0
           end as colestipol,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'cholestyramine' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as cholestyramine,
     case
        when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Fenofibric acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Fenofibric_acid,
     case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'ezetimibe' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as ezetimibe,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Clofibrate' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Clofibrate,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'colesevelam' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as colesevelam,
    case
           when b.ndc_code in (select distinct value from st_ndc_ref
                                where cat2 = 'Niacin' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Niacin,
    case
           when b.ndc_code in (select distinct value
                                from st_ndc_ref
                                where cat2 = 'Docosahexaenoic acid' and deactive_date is null)
           --and datediff(d, a.index_date, b.year_of_service) between -365 and 365                   
           then 1
           else 0
           end as Docosahexaenoic_acid
    
from
    st_tg_high_cvd3 a
        join st_ndc b
            on a.patient_id = b.patient_id
--group by tg_level, hdl_level

Query started at 04:56:21 PM India Standard Time; Query executed in 0.24 m

Unnamed: 0,status
0,Table ST_DRUGS_AGG9 successfully created.


In [127]:
%%read_sql df

select
    tg_level,
    hdl_level,
    sum(all_pts) as all_pts,
    
    sum(statin) as statin,
    round(sum(statin)/sum(all_pts)*100, 2) as statin_perc,
    
    sum(combination) as combination,
    round(sum(combination)/sum(all_pts)*100, 2) as combination_perc,
    
    sum(gemfibrozil) as gemfibrozil,
    round(sum(gemfibrozil)/sum(all_pts)*100, 2) as gemfibrozil_perc,
    
    sum(colestipol) as colestipol,
    round(sum(colestipol)/sum(all_pts)*100, 2) as colestipol_perc,
    
    sum(cholestyramine) as cholestyramine,
    round(sum(cholestyramine)/sum(all_pts)*100, 2) as cholestyramine_perc,
    
    sum(fenofibric_acid) as fenofibric_acid,
    round(sum(fenofibric_acid)/sum(all_pts)*100, 2) as fenofibric_acid_perc,
    
    sum(ezetimibe) as ezetimibe,
    round(sum(ezetimibe)/sum(all_pts)*100, 2) as ezetimibe_perc,
    
    sum(clofibrate) as clofibrate,
    round(sum(clofibrate)/sum(all_pts)*100, 2) as clofibrate_perc,
    
    sum(colesevelam) as colesevelam,
    round(sum(colesevelam)/sum(all_pts)*100, 2) as colesevelam_perc,
    
    sum(niacin) as niacin,
    round(sum(niacin)/sum(all_pts)*100, 2) as niacin_perc,
    
    sum(docosahexaenoic_acid) as docosahexaenoic_acid,
    round(sum(docosahexaenoic_acid)/sum(all_pts)*100, 2) as docosahexaenoic_acid_perc

from
    st_drugs_agg9
    
group by tg_level, hdl_level
order by tg_level, hdl_level

Query started at 04:56:49 PM India Standard Time; Query executed in 0.06 m

Unnamed: 0,tg_level,hdl_level,all_pts,statin,statin_perc,combination,combination_perc,gemfibrozil,gemfibrozil_perc,colestipol,...,ezetimibe,ezetimibe_perc,clofibrate,clofibrate_perc,colesevelam,colesevelam_perc,niacin,niacin_perc,docosahexaenoic_acid,docosahexaenoic_acid_perc
0,tg_gt200,hdl_gt40,72723,58774,80.82,1601,2.2,2989,4.11,117,...,5389,7.41,0,0.0,1735,2.39,322,0.44,62,0.09
1,tg_gt200,hdl_lt40,68036,53460,78.58,1306,1.92,4656,6.84,82,...,4557,6.7,0,0.0,1435,2.11,476,0.7,57,0.08
2,tg_gt200,,276,179,64.86,8,2.9,37,13.41,1,...,30,10.87,0,0.0,7,2.54,4,1.45,0,0.0
3,,hdl_gt40,214849,184386,85.82,4259,1.98,4003,1.86,244,...,13968,6.5,0,0.0,3767,1.75,863,0.4,84,0.04
4,,hdl_lt40,77790,65310,83.96,1487,1.91,2574,3.31,80,...,4936,6.35,0,0.0,1320,1.7,512,0.66,32,0.04
5,,,2486,2080,83.67,41,1.65,100,4.02,4,...,163,6.56,0,0.0,52,2.09,14,0.56,1,0.04


In [128]:
# Export to check, and then will inactivate codes that are not relevant and re-upload as final reference table
df.to_excel("out/highcvd_cohort3.xlsx", index=False)