# Intialisation Steps
Connecting to Snowflake

## Importing required modules

In [2]:
import pandas as pd
from drg_connect import Snowflake
import qgrid 
from datetime import timedelta, datetime
import math

import warnings
warnings.filterwarnings('ignore')

## Snowflake connection parameter

In [3]:
##defining parameters of snowflake
snow = Snowflake(role = 'RWD_ANALYTICS_RW',database='SANDBOX_ANALYTICS',schema = 'SANDBOX')
engine = snow.engine

%reload_ext sql_magic
%config SQL.output_result = True  #Enable output to std out
%config SQL.notify_result = False #disable browser notifications
%config SQL.conn_name = 'engine'  #Set the sql_magic connection engine

# ICD grouper table

## Swarali notes:
- Using level_4_short_description_icd10 variable because description includes retinal vein occlusion and macular edema

In [3]:
%%read_sql

create or replace temporary table st_rvo_me_ref1 as

select
    'dx' as cat1,
    'all_me' as cat2,
    case
        when 
            lower(level_4_long_description_icd10) like '%branch%' 
            then 'brvo'
        when 
            (lower(level_4_long_description_icd10) like '%retinal%' 
            and lower(level_4_long_description_icd10) like '%vein%'
            and lower(level_4_long_description_icd10) like '%occlusion%')
            then 'crvo'
        when
            (lower(level_4_long_description_icd10) like '%venous%'
            and lower(level_4_long_description_icd10) like '%engorgement%'
            and lower(level_4_long_description_icd10) like '%eye%')
            then 've'
        else 'other_me' 
        end as cat3,
    null as cat4,
    null as cat5,
    null as cat6,
    'icd_10' as type,
    level_4 as value,
    level_4_long_description_icd10 as description,
    current_date() as active_date,
    'Nish' as active_reason,
    null as deactive_date,
    null as deactive_reason,
    'RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER' as source
from 
    RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
where
    (lower(level_4_long_description_icd10) like '%retinal%'
    and lower(level_4_long_description_icd10) like '%vein%'
    and lower(level_4_long_description_icd10) like '%occlusion%')
        or
    (lower(level_4_long_description_icd10) like '%venous%'
    and lower(level_4_long_description_icd10) like '%engorgement%'
    and lower(level_4_long_description_icd10) like '%eye%')
        or
    (lower(level_4_long_description_icd10) like '%macular%'
    and lower(level_4_long_description_icd10) like '%edema%'
    and lower(level_4_long_description_icd10) not like '%without%')

Query started at 04:25:01 PM GMT Daylight TimeInitiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
; Query executed in 0.22 m

Unnamed: 0,status
0,Table ST_RVO_ME_REF1 successfully created.


In [4]:
%%read_sql

create or replace temporary table st_rvo_me_ref2 as

select
    'dx' as cat1,
    'all_me' as cat2,
    case
        when 
            lower(level_4_long_description_icd10) like '%branch%' 
            then 'brvo'
        when 
            (lower(level_4_long_description_icd10) like '%retinal%' 
            and lower(level_4_long_description_icd10) like '%vein%'
            and lower(level_4_long_description_icd10) like '%occlusion%')
            then 'crvo'
        when
            (lower(level_4_long_description_icd10) like '%venous%'
            and lower(level_4_long_description_icd10) like '%engorgement%'
            and lower(level_4_long_description_icd10) like '%eye%')
            then 've'
        else 'other_me' 
        end as cat3,
    null as cat4,
    null as cat5,
    null as cat6,
    'icd_9' as type,
    icd9_mapped_codes as value,
    level_4_long_description_icd10 as description,
    current_date() as active_date,
    'Nish' as active_reason,
    null as deactive_date,
    null as deactive_reason,
    'RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER' as source
from 
    RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
where
    ((lower(level_4_long_description_icd10) like '%retinal%'
    and lower(level_4_long_description_icd10) like '%vein%'
    and lower(level_4_long_description_icd10) like '%occlusion%')
        or
    (lower(level_4_long_description_icd10) like '%venous%'
    and lower(level_4_long_description_icd10) like '%engorgement%'
    and lower(level_4_long_description_icd10) like '%eye%')
        or
    (lower(level_4_long_description_icd10) like '%macular%'
    and lower(level_4_long_description_icd10) like '%edema%'
    and lower(level_4_long_description_icd10) not like '%without%'))
    and icd9_mapped_codes != 'No map'

Query started at 04:25:20 PM GMT Daylight Time; Query executed in 0.11 m

Unnamed: 0,status
0,Table ST_RVO_ME_REF2 successfully created.


In [17]:
%%read_sql

create or replace table st_rvo_me_ref as

select * from st_rvo_me_ref1
union
select * from st_rvo_me_ref2
order by type, value, cat3

Query started at 04:35:11 PM GMT Daylight Time; Query executed in 0.11 m

Unnamed: 0,status
0,Table ST_RVO_ME_REF successfully created.


## Export ref table to inspect

In [18]:
snow.select("select * from st_rvo_me_ref")

Unnamed: 0,cat1,cat2,cat3,cat4,cat5,cat6,type,value,description,active_date,active_reason,deactive_date,deactive_reason,source
0,dx,all_me,other_me,,,,icd_10,E08311,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
1,dx,all_me,other_me,,,,icd_10,E08321,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
2,dx,all_me,other_me,,,,icd_10,E083211,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
3,dx,all_me,other_me,,,,icd_10,E083212,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
4,dx,all_me,other_me,,,,icd_10,E083213,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
5,dx,all_me,other_me,,,,icd_10,E083219,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
6,dx,all_me,other_me,,,,icd_10,E08331,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
7,dx,all_me,other_me,,,,icd_10,E083311,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
8,dx,all_me,other_me,,,,icd_10,E083312,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
9,dx,all_me,other_me,,,,icd_10,E083313,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER


In [19]:
snow.select("select * from st_rvo_me_ref").to_excel("out/ref.xlsx", index=False)

# UPLOAD new ref table

In [12]:
df = pd.read_excel("out/ref.xlsx")

In [13]:
df

Unnamed: 0,cat1,cat2,cat3,cat4,cat5,cat6,type,value,description,active_date,active_reason,deactive_date,deactive_reason,source
0,dx,me,other_me,,,,icd_10,E08311,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
1,dx,me,other_me,,,,icd_10,E08321,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
2,dx,me,other_me,,,,icd_10,E083211,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
3,dx,me,other_me,,,,icd_10,E083212,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
4,dx,me,other_me,,,,icd_10,E083213,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
5,dx,me,other_me,,,,icd_10,E083219,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
6,dx,me,other_me,,,,icd_10,E08331,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
7,dx,me,other_me,,,,icd_10,E083311,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
8,dx,me,other_me,,,,icd_10,E083312,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
9,dx,me,other_me,,,,icd_10,E083313,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER


In [16]:
snow.drop_table("st_rvo_me_ref")

DROP TABLE IF EXISTS sandbox_analytics.sandbox.st_rvo_me_ref;
Table sandbox.st_rvo_me_ref dropped!


In [17]:
snow.upload_dataframe(df, "st_rvo_me_ref")

Upload successful!


In [18]:
%%read_sql

select * from st_rvo_me_ref

Query started at 11:26:24 AM GMT Daylight Time; Query executed in 0.07 m

Unnamed: 0,cat1,cat2,cat3,cat4,cat5,cat6,type,value,description,active_date,active_reason,deactive_date,deactive_reason,source
0,dx,me,other_me,,,,icd_10,E08311,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
1,dx,me,other_me,,,,icd_10,E08321,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
2,dx,me,other_me,,,,icd_10,E083211,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
3,dx,me,other_me,,,,icd_10,E083212,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
4,dx,me,other_me,,,,icd_10,E083213,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
5,dx,me,other_me,,,,icd_10,E083219,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
6,dx,me,other_me,,,,icd_10,E08331,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
7,dx,me,other_me,,,,icd_10,E083311,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
8,dx,me,other_me,,,,icd_10,E083312,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER
9,dx,me,other_me,,,,icd_10,E083313,Diabetes mellitus due to underlying condition ...,2018-07-18,Nish,,,RWD_DB.RWD_REFERENCE_LIBRARY.ICD_GROUPER


# Analysis plan

<img src="in/cohort.png", width=700>

# Cohort in obs. window

<img src="in/cohort.png", width=700>

In [4]:
%%read_sql

create or replace table st_rvo_obs_cohort as

select
    left(encrypted_key_1, 8)||left(encrypted_key_2, 8) as patient_id,
    claim_number,
    diagnosis,
    diagnosis_sequence,
    year_of_service
from
    RWD_DB.RWD.RAVEN_CLAIMS_SUBMITS_DIAGNOSIS
where
    diagnosis in (select distinct value from st_rvo_me_ref where cat2 = 'rvo' and deactive_date is null)
    and year_of_service between '2016-01-01' and '2017-12-31'

Query started at 04:29:06 PM India Standard TimeInitiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
; Query executed in 11.96 m

Unnamed: 0,status
0,Table ST_RVO_OBS_COHORT successfully created.


In [5]:
snow.select("select count(distinct patient_id) from st_rvo_obs_cohort where left(patient_id, 5) != 'XXX -'")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,220932


## Index date

In [6]:
%%read_sql

create or replace table st_rvo_index as

select
    patient_id,
    min(year_of_service) as index_date
from
    st_rvo_obs_cohort
where
    left(patient_id, 5) != 'XXX -'
group by 1

Query started at 04:44:28 PM India Standard Time; Query executed in 0.14 m

Unnamed: 0,status
0,Table ST_RVO_INDEX successfully created.


In [7]:
snow.select("select * from st_rvo_index order by index_date desc limit 5")

Unnamed: 0,patient_id,index_date
0,qSO4bEA0RrOyEsnW,2017-12-31
1,P0Zgm1bdgm7TEi4/,2017-12-31
2,MQuzWQejJ15nq4eX,2017-12-31
3,il58OmHZqrsUyjcc,2017-12-30
4,m4783E4g8p5dFsQ3,2017-12-30


In [8]:
snow.select("select count(patient_id) from st_rvo_index where left(patient_id, 5) = 'XXX -'")

Unnamed: 0,COUNT(PATIENT_ID)
0,0


# Continuous coverage

## Cont. cov from medical claims
<img src="in/cohort1.png">

### Extract submits header

In [9]:
%%read_sql

create or replace table st_rvo_cont_med as

select
    left(encrypted_key_1, 8)||left(encrypted_key_2, 8) as patient_id,
    claim_number,
    claim_type_code,
    year_of_service
from
    RWD_DB.RWD.RAVEN_CLAIMS_SUBMITS_HEADER
where
    left(encrypted_key_1, 8)||left(encrypted_key_2, 8) in (select patient_id from st_rvo_index)
    and year_of_service between '2015-07-01' and '2018-06-30'

Query started at 04:44:57 PM India Standard Time; Query executed in 5.64 m

Unnamed: 0,status
0,Table ST_RVO_CONT_MED successfully created.


### Check for duplicates

In [10]:
%%read_sql

select count(*) from st_rvo_cont_med

Query started at 04:55:32 PM India Standard Time; Query executed in 0.06 m

Unnamed: 0,COUNT(*)
0,12050552


In [11]:
%%read_sql

with t1 as (select distinct * from st_rvo_cont_med)
select count(*) from t1

Query started at 04:55:38 PM India Standard Time; Query executed in 0.11 m

Unnamed: 0,COUNT(*)
0,11930226


In [12]:
%%read_sql

drop table if exists st_rvo_cont_med_unique;
create or replace table st_rvo_cont_med_unique as
select distinct * from st_rvo_cont_med;
drop table st_rvo_cont_med;

Query started at 04:55:47 PM India Standard Time; Query executed in 0.06 mQuery started at 04:55:51 PM India Standard Time; Query executed in 0.11 mQuery started at 04:55:57 PM India Standard Time; Query executed in 0.04 m

Unnamed: 0,status
0,ST_RVO_CONT_MED successfully dropped.


### Summarise medical claims

In [13]:
%%read_sql

create or replace table st_rvo_cont_med_tally as

select
    a.patient_id,
    sum(case
           when datediff(d, a.index_date, b.year_of_service) between -182 and -1
           then 1
           else 0
           end) as claim_tally_pre,
    sum(case
           when datediff(d, a.index_date, b.year_of_service) between 1 and 182
           then 1
           else 0
           end) as claim_tally_post
from
    st_rvo_index a
        join st_rvo_cont_med_unique b
            on a.patient_id = b.patient_id
group by 1

Query started at 04:56:13 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,status
0,Table ST_RVO_CONT_MED_TALLY successfully created.


### Final counts

In [14]:
a = snow.select("select count(distinct patient_id) from st_rvo_cont_med_tally")
b = snow.select("select count(distinct patient_id) from st_rvo_cont_med_tally where claim_tally_pre > 0 and claim_tally_post > 0")
c = snow.select("select count(distinct patient_id) from st_rvo_cont_med_tally where claim_tally_pre > 1 and claim_tally_post > 1")
d = snow.select("select count(distinct patient_id) from st_rvo_cont_med_tally where claim_tally_pre > 2 and claim_tally_post > 2")
e = snow.select("select count(distinct patient_id) from st_rvo_cont_med_tally where claim_tally_pre > 3 and claim_tally_post > 3")
f = snow.select("select count(distinct patient_id) from st_rvo_cont_med_tally where claim_tally_pre > 4 and claim_tally_post > 4")

lst = ['pts_obs_window',
       '1_claim_pre_and_post',
       '2_claims_pre_and_post',
       '3_claims_pre_and_post',
       '4_claims_pre_and_post',
       '5_claims_pre_and_post',]

pd.concat([a,b,c,d,e,f], keys=lst)

Unnamed: 0,Unnamed: 1,COUNT(DISTINCT PATIENT_ID)
pts_obs_window,0,220932
1_claim_pre_and_post,0,169735
2_claims_pre_and_post,0,143156
3_claims_pre_and_post,0,121172
4_claims_pre_and_post,0,103314
5_claims_pre_and_post,0,88232


## Final cohort

In [15]:
%%read_sql

create or replace table st_rvo_cohort as

select
    patient_id,
    index_date
from 
    st_rvo_index
where
    patient_id in (select patient_id 
                   from st_rvo_cont_med_tally 
                   where claim_tally_pre > 1
                   and claim_tally_post > 1)

Query started at 04:57:12 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,status
0,Table ST_RVO_COHORT successfully created.


In [16]:
snow.select("select count(distinct patient_id) from st_rvo_cohort")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,143156


# Raw data extracts

Pull all diagnoses data for the cohort of patients from July 1st 2015, to June 30th 2018
<img src="in/cohort.png", width=700>

## Claim header

In [17]:
%%read_sql

create or replace table st_rvo_header_all as 

select
     'XXXXXXXXXXXXXXXX' as patient_id,
     claim_number,
     received_date,
     claim_type_code,
     statement_from,
     statement_to,
     min_service_from,
     max_service_to,
     total_charge,
     total_allowed,
     drg_code,
     type_bill,
     admission_date,
     admit_type_code,
     admit_src_code,
     discharge_hour,
     discharge_status,
     new_medicare_source,
     year_of_service
from
    RWD_DB.RWD.RAVEN_CLAIMS_SUBMITS_HEADER
where
     claim_number in (select distinct claim_number 
                      from st_rvo_cont_med_unique
                      where patient_id in (select patient_id
                                           from st_rvo_cohort))

Query started at 04:57:49 PM India Standard Time; Query executed in 3.39 m

Unnamed: 0,status
0,Table ST_RVO_HEADER_ALL successfully created.


In [18]:
%%read_sql

 begin;
 update st_rvo_header_all a
    set a.patient_id = b.patient_id
   from st_rvo_cont_med_unique b
  where a.claim_number = b.claim_number;
commit;

Query started at 05:44:58 PM India Standard Time; Query executed in 0.07 mQuery started at 05:45:03 PM India Standard Time; Query executed in 0.19 mQuery started at 05:45:14 PM India Standard Time; Query executed in 0.03 m

Unnamed: 0,status
0,Statement executed successfully.


In [19]:
%%read_sql

select
    count(patient_id),
    count(distinct patient_id),
    sum(case
           when patient_id = 'XXXXXXXXXXXXXXXX'
           then 1
           else 0
           end) as null_pt_id,
    sum(case
           when left(patient_id, 5) = 'XXX -'
           then 1
           else 0
           end) as faulty_pt_id
from st_rvo_header_all

Query started at 05:45:16 PM India Standard Time; Query executed in 0.06 m

Unnamed: 0,COUNT(PATIENT_ID),COUNT(DISTINCT PATIENT_ID),null_pt_id,faulty_pt_id
0,10636099,143156,0,0


## Diagnoses

In [20]:
%%read_sql

create or replace table st_rvo_dx_all as

select
    'XXXXXXXXXXXXXXXX' as patient_id,
    claim_number,
    diagnosis,
    diagnosis_sequence,
    statement_from,
    statement_to,
    min_service_from,
    max_service_to,
    received_date,
    year_of_service
from
    RWD_DB.RWD.RAVEN_CLAIMS_SUBMITS_DIAGNOSIS
where
    claim_number in (select distinct claim_number
                     from st_rvo_header_all)

Query started at 05:45:34 PM India Standard Time; Query executed in 5.06 m

Unnamed: 0,status
0,Table ST_RVO_DX_ALL successfully created.


In [21]:
%%read_sql

begin;
update st_rvo_dx_all a
set a.patient_id = b.patient_id
from st_rvo_header_all b
where a.claim_number = b.claim_number

Query started at 05:50:38 PM India Standard Time; Query executed in 0.04 mQuery started at 05:50:40 PM India Standard Time; Query executed in 0.23 m

Unnamed: 0,number of rows updated,number of multi-joined rows updated
0,36284008,884082


In [22]:
%%read_sql

select
    count(patient_id),
    count(distinct patient_id),
    sum(case
           when patient_id = 'XXXXXXXXXXXXXXXX'
           then 1
           else 0
           end) as null_pt_id,
    sum(case
           when left(patient_id, 5) = 'XXX -'
           then 1
           else 0
           end) as faulty_pt_id
    
from st_rvo_dx_all

Query started at 05:50:54 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,COUNT(PATIENT_ID),COUNT(DISTINCT PATIENT_ID),null_pt_id,faulty_pt_id
0,36284008,143156,0,0


## Patient raw

In [23]:
%%read_sql

create or replace table st_rvo_pat_all as

select
    'XXXXXXXXXXXXXXXX' as patient_id,
    claim_number,
    patient_gender,
    patient_dob
from
    RWD_DB.RWD.RAVEN_CLAIMS_SUBMITS_PATIENT
where
    claim_number in (select distinct claim_number
                     from st_rvo_header_all)

Query started at 05:50:58 PM India Standard Time; Query executed in 3.94 m

Unnamed: 0,status
0,Table ST_RVO_PAT_ALL successfully created.


In [24]:
%%read_sql

 begin;
 update st_rvo_pat_all a
    set a.patient_id = b.patient_id
   from st_rvo_header_all b
  where a.claim_number = b.claim_number;
commit;

Query started at 05:54:55 PM India Standard Time; Query executed in 0.03 mQuery started at 05:54:57 PM India Standard Time; Query executed in 0.28 mQuery started at 05:55:14 PM India Standard Time; Query executed in 0.05 m

Unnamed: 0,status
0,Statement executed successfully.


In [25]:
%%read_sql

create or replace table st_rvo_pat_all_unique as

select
    patient_id,
    min(patient_dob) as dob,
    min(patient_gender) as gender
from
    st_rvo_pat_all
group by 1

Query started at 05:55:16 PM India Standard Time; Query executed in 0.10 m

Unnamed: 0,status
0,Table ST_RVO_PAT_ALL_UNIQUE successfully created.


In [26]:
%%read_sql

select
    count(patient_id),
    count(distinct patient_id),
    sum(case
           when patient_id = 'XXXXXXXXXXXXXXXX'
           then 1
           else 0
           end) as null_pt_id,
    sum(case
           when left(patient_id, 5) = 'XXX -'
           then 1
           else 0
           end) as faulty_pt_id
    
from st_rvo_pat_all_unique

Query started at 05:55:23 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,COUNT(PATIENT_ID),COUNT(DISTINCT PATIENT_ID),null_pt_id,faulty_pt_id
0,143156,143156,0,0


# Master table

## Aggregate comorbidity Dx

In [31]:
snow.select("select distinct cat3, cat2 from st_rvo_me_ref")

Unnamed: 0,cat3,cat2
0,other_me,me
1,crvo,rvo
2,ve,rvo
3,brvo,rvo


In [34]:
%%read_sql

create or replace table st_rvo_comorbidites_agg as 

select
    a.patient_id,
    1 as rvo,
    max(case
           when b.diagnosis in (select distinct value
                                from st_rvo_me_ref
                                where cat2 = 'me' 
                                and deactive_date is null)
           and datediff(d, a.index_date, b.year_of_service) between -182 and 182                   
           then 1
           else 0
           end) as me,
    max(case
           when b.diagnosis in (select distinct value
                                from st_rvo_me_ref
                                where cat3 = 'crvo' 
                                and deactive_date is null)
           and datediff(d, a.index_date, b.year_of_service) between -182 and 182                   
           then 1
           else 0
           end) as crvo,
    max(case
           when b.diagnosis in (select distinct value
                                from st_rvo_me_ref
                                where cat3 = 'brvo'
                                and deactive_date is null)
           and datediff(d, a.index_date, b.year_of_service) between -182 and 182                   
           then 1
           else 0
           end) as brvo,
    max(case
           when b.diagnosis in (select distinct value
                                from st_rvo_me_ref
                                where cat3 = 've'
                                and deactive_date is null)
           and datediff(d, a.index_date, b.year_of_service) between -182 and 182                   
           then 1
           else 0
           end) as ve
from
    st_rvo_cohort a
        join st_rvo_dx_all b
            on a.patient_id = b.patient_id
group by 1

Query started at 06:12:02 PM India Standard Time; Query executed in 0.35 m

Unnamed: 0,status
0,Table ST_RVO_COMORBIDITES_AGG successfully cre...


In [35]:
snow.select("select * from st_rvo_comorbidites_agg limit").to_excel("out/results.xlsx", index=False)

## Patient age

In [36]:
%%read_sql

create or replace table st_rvo_patient_demographics as 

select
    a.patient_id,
    b.gender,
    datediff(yy, b.dob, a.index_date) as age,
    case
        when datediff(yy, b.dob, a.index_date) between 0 and 4 then '00-04'
        when datediff(yy, b.dob, a.index_date) between 5 and 9 then '05-09'
        when datediff(yy, b.dob, a.index_date) between 10 and 14 then '10-14'
        when datediff(yy, b.dob, a.index_date) between 15 and 19 then '15-19'
        when datediff(yy, b.dob, a.index_date) between 20 and 24 then '20-24'
        when datediff(yy, b.dob, a.index_date) between 25 and 29 then '25-29'
        when datediff(yy, b.dob, a.index_date) between 30 and 34 then '30-34'
        when datediff(yy, b.dob, a.index_date) between 35 and 39 then '35-39'
        when datediff(yy, b.dob, a.index_date) between 40 and 44 then '40-44'
        when datediff(yy, b.dob, a.index_date) between 45 and 49 then '45-49'
        when datediff(yy, b.dob, a.index_date) between 50 and 54 then '50-54'
        when datediff(yy, b.dob, a.index_date) between 55 and 59 then '55-59'
        when datediff(yy, b.dob, a.index_date) between 60 and 64 then '60-64'
        when datediff(yy, b.dob, a.index_date) between 65 and 69 then '65-69'
        when datediff(yy, b.dob, a.index_date) between 70 and 74 then '70-74'
        when datediff(yy, b.dob, a.index_date) between 75 and 79 then '75-79'
        when datediff(yy, b.dob, a.index_date) between 80 and 84 then '80-84'
        when datediff(yy, b.dob, a.index_date) between 85 and 110 then '85+'
        else null
        end as age_cat
from
    st_rvo_cohort a
        join st_rvo_pat_all_unique b
            on a.patient_id = b.patient_id

Query started at 06:15:34 PM India Standard Time; Query executed in 0.12 m

Unnamed: 0,status
0,Table ST_RVO_PATIENT_DEMOGRAPHICS successfully...


## Master table

In [None]:
snow.drop_table("st_rvo_master_table")

In [37]:
%%read_sql

create or replace table st_rvo_master_table as

select
    a.patient_id,
    null as wt,
    a.gender,
    a.age,
    a.age_cat,
    b.rvo,
    b.me,
    b.crvo,
    b.brvo,
    b.ve
from
    st_rvo_patient_demographics a
        join st_rvo_comorbidites_agg b
            on a.patient_id = b.patient_id

Query started at 06:16:29 PM India Standard Time; Query executed in 0.18 m

Unnamed: 0,status
0,Table ST_RVO_MASTER_TABLE successfully created.


In [38]:
snow.select("select * from st_rvo_master_table limit 50")

Unnamed: 0,patient_id,wt,gender,age,age_cat,rvo,me,crvo,brvo,ve
0,cfobAq2DguhAozRM,,M,77,75-79,1,0,0,1,0
1,J44mz8vzeaBaV3Nz,,F,84,80-84,1,0,0,1,0
2,wkr9O1LCAEhYl/d7,,F,76,75-79,1,0,0,1,0
3,g3++qgyBran+DIAU,,M,83,80-84,1,1,0,1,0
4,+BSz5zNV/vaeuf0B,,F,79,75-79,1,0,0,1,0
5,tUxXdzEUAaE1lzON,,M,74,70-74,1,0,0,1,0
6,OoSFLQ1LmmVpgEYI,,F,74,70-74,1,1,0,1,0
7,fHETtVMmoPWNdWG2,,M,74,70-74,1,0,0,1,0
8,2pBko/V4W8UPLL99,,F,68,65-69,1,0,0,1,0
9,NEfe5qgQvCF9ssjT,,M,74,70-74,1,0,0,1,0


# Summary results

In [39]:
%%read_sql

select
    --age_cat,
    sum(rvo) as rvo_pts,
    sum(me) as me_pts,
    round(sum(me)/sum(rvo)*100, 2) as me_in_rvo,
    sum(crvo) as crvo_pts,
    round(sum(crvo)/sum(rvo)*100, 2) as crvo_in_rvo,
    sum(brvo) as brvo_pts,
    round(sum(brvo)/sum(rvo)*100, 2) as brvo_in_rvo,
    sum(ve) as ve_pts,
    round(sum(ve)/sum(rvo)*100, 2) as ve_in_rvo
from
    st_rvo_master_table
--group by 1
--order by 1

Query started at 06:17:32 PM India Standard Time; Query executed in 0.06 m

Unnamed: 0,rvo_pts,me_pts,me_in_rvo,crvo_pts,crvo_in_rvo,brvo_pts,brvo_in_rvo,ve_pts,ve_in_rvo
0,143156,12007,8.39,63229,44.17,87165,60.89,1975,1.38


In [40]:
%%read_sql

select
    age_cat,
    sum(rvo) as rvo_pts,
    sum(me) as me_pts,
    round(sum(me)/sum(rvo)*100, 2) as me_in_rvo,
    sum(crvo) as crvo_pts,
    round(sum(crvo)/sum(rvo)*100, 2) as crvo_in_rvo,
    sum(brvo) as brvo_pts,
    round(sum(brvo)/sum(rvo)*100, 2) as brvo_in_rvo,
    sum(ve) as ve_pts,
    round(sum(ve)/sum(rvo)*100, 2) as ve_in_rvo
from
    st_rvo_master_table
group by 1
order by 1

Query started at 06:18:04 PM India Standard Time; Query executed in 0.04 m

Unnamed: 0,age_cat,rvo_pts,me_pts,me_in_rvo,crvo_pts,crvo_in_rvo,brvo_pts,brvo_in_rvo,ve_pts,ve_in_rvo
0,00-04,10,0,0.0,7,70.0,1,10.0,2,20.0
1,05-09,16,1,6.25,8,50.0,4,25.0,4,25.0
2,10-14,31,0,0.0,12,38.71,4,12.9,15,48.39
3,15-19,71,4,5.63,33,46.48,13,18.31,29,40.85
4,20-24,112,3,2.68,77,68.75,30,26.79,14,12.5
5,25-29,217,24,11.06,154,70.97,58,26.73,16,7.37
6,30-34,363,39,10.74,249,68.6,121,33.33,21,5.79
7,35-39,582,75,12.89,339,58.25,258,44.33,29,4.98
8,40-44,964,121,12.55,522,54.15,490,50.83,29,3.01
9,45-49,1977,269,13.61,920,46.54,1122,56.75,58,2.93


In [41]:
snow.select("select * from st_rvo_master_table").to_excel("out/final_results.xlsx", index=False)