# Intialisation Steps
Connecting to Snowflake

## Importing required modules

In [19]:
import pandas as pd
from drg_connect import Snowflake
import qgrid 
from datetime import timedelta, datetime
import math

import warnings
warnings.filterwarnings('ignore')

## Snowflake connection parameter

In [20]:
##defining parameters of snowflake
snow = Snowflake(role = 'RWD_ANALYTICS_RW',database='SANDBOX_ANALYTICS',schema = 'SANDBOX')
engine = snow.engine

%reload_ext sql_magic
%config SQL.output_result = True  #Enable output to std out
%config SQL.notify_result = False #disable browser notifications
%config SQL.conn_name = 'engine'  #Set the sql_magic connection engine

# ICD grouper table for reference

In [4]:
snow.select ("select * from RWD_DB.RWD.ICD_GROUPER limit 3")

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...


Unnamed: 0,id,level_1,level_1_description,level_2,level_2_description,level_3,level_3_description,level_4,icd9_mapped_codes,icd9_description,level_4_short_description_icd10,level_4_long_description_icd10,create_ts,update_ts
0,1,A00 - B999,Certain infectious and parasitic diseases,A00-A09,Intestinal infectious diseases,A00-A009,Cholera,A00,No map,No map,Cholera,Cholera,2017-11-02,2017-11-02
1,2,A00 - B999,Certain infectious and parasitic diseases,A00-A09,Intestinal infectious diseases,A00-A009,Cholera,A000,0010,Cholera due to vibrio cholerae,Cholera due to Vibrio cholerae 01 biovar cholerae,Cholera due to Vibrio cholerae 01 biovar cholerae,2017-11-02,2017-11-02
2,3,A00 - B999,Certain infectious and parasitic diseases,A00-A09,Intestinal infectious diseases,A00-A009,Cholera,A001,0011,Cholera due to vibrio cholerae el tor,Cholera due to Vibrio cholerae 01 biovar eltor,Cholera due to Vibrio cholerae 01 biovar eltor,2017-11-02,2017-11-02


In [5]:
%%read_sql
select 
  * 
from
    RWD_DB.RWD.ICD_GROUPER

where    
    (level_4_long_description_icd10 ilike '%severe%'
     AND level_4_long_description_icd10 ilike '%combined%'
     AND level_4_long_description_icd10 ilike '%immunodeficiency%')

Query started at 12:56:59 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,id,level_1,level_1_description,level_2,level_2_description,level_3,level_3_description,level_4,icd9_mapped_codes,icd9_description,level_4_short_description_icd10,level_4_long_description_icd10,create_ts,update_ts
0,3582,D50 - D899,Diseases of the blood and blood-forming organs...,D80-D89,Certain disorders involving the immune mechanism,D81-D819,Combined immunodeficiencies,D810,2792,Combined immunity deficiency,Severe combined immunodeficiency with reticula...,Severe combined immunodeficiency [SCID] with r...,2017-11-02,2017-11-02
1,3583,D50 - D899,Diseases of the blood and blood-forming organs...,D80-D89,Certain disorders involving the immune mechanism,D81-D819,Combined immunodeficiencies,D811,2792,Combined immunity deficiency,Severe combined immunodeficiency w low T- and ...,Severe combined immunodeficiency [SCID] with l...,2017-11-02,2017-11-02
2,3584,D50 - D899,Diseases of the blood and blood-forming organs...,D80-D89,Certain disorders involving the immune mechanism,D81-D819,Combined immunodeficiencies,D812,2792,Combined immunity deficiency,Severe combined immunodef w low or normal B-ce...,Severe combined immunodeficiency [SCID] with l...,2017-11-02,2017-11-02


# SCID claims
Restricting all records with following diagnosis:
    ICD-10: D810, D811, D812
    ICD-9: 2792

## Diagnosis table

In [8]:
%%read_sql
create or replace table st_scid as
select
    left(encrypted_key_1, 8)||left(encrypted_key_2, 8) as patient_id,
    left(year_of_service, 4) as year,
    claim_number,
    diagnosis,
    diagnosis_sequence
    
from
    RWD_DB.RWD.RAVEN_CLAIMS_SUBMITS_DIAGNOSIS
where
    diagnosis in ('D810', 'D811', 'D812', '2792')

Query started at 01:19:48 PM India Standard Time; Query executed in 15.58 m

Unnamed: 0,status
0,Table ST_SCID successfully created.


## Removing invalid patient keys

In [9]:
snow.select("select count(distinct patient_id) from st_scid where left(patient_id, 5) != 'XXX -' ")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,14962


In [11]:
%%read_sql
select year,
       count (distinct patient_id)
from
    st_scid
    
where 
    left(patient_id, 5) != 'XXX -'
group by 1
order by 1

Query started at 01:55:19 PM India Standard Time; Query executed in 0.04 m

Unnamed: 0,year,COUNT (DISTINCT PATIENT_ID)
0,1916.0,1
1,2005.0,1
2,2008.0,2
3,2010.0,13
4,2011.0,273
5,2012.0,1345
6,2013.0,3533
7,2014.0,4618
8,2015.0,6167
9,2016.0,1155


## Patients table

In [12]:
snow.select ("select * from RWD_DB.RWD.RAVEN_CLAIMS_SUBMITS_PATIENT limit 3")

Unnamed: 0,claim_number,member_dob,member_adr_state,member_adr_zip,member_encrypted_key_1,member_encrypted_key_2,patient_relation,patient_gender,patient_dob,encrypted_key_1,encrypted_key_2,key3,payer_sequence,year_of_service,data_source,load_indicator,drg_patient_id,etl_create_ts,etl_update_ts
0,con_EP031015730629733,,,,,,1,F,1959-01-01,LHNaxR5E8F3VfyrQZqaZyzvz9mrjQIeSVNqJ7+ak5BY=,n93/JuKbOYIgx0mB4J2X2+6wqnGP8qJDFlQV3II5xys=,,,2015-02-24,CONDOR,H,190599618.0,2016-02-28,2016-02-28
1,con_EP033117780686926,,TX,762.0,,,1,M,1959-01-01,Y5AfqmNJ+tozPSUi38rL7mEXUYTp8nb5TnVUyC1Kyik=,i7X0AE+BpSXDZNHZgLiPWYn+LFfSt9SFYxMR14ok4lY=,,,2017-03-21,CONDOR,H,,2017-04-08,2017-04-08
2,con_EP020113759831093,1956-01-01,MA,13.0,,,1,F,1956-01-01,tBY40yMCcaaNOLriXHJdMYKC/PCMs/BLSE/3IGUsI6c=,WsQ86eBSSlFfvLy9ny1naMRHsCgc8xgmIVZ7SzN+qpQ=,,,2012-12-27,CONDOR,H,,2017-06-29,2017-06-29


In [13]:
%%read_sql
create or replace table st_scid_pt as

select
    left(encrypted_key_1, 8)||left(encrypted_key_2, 8) as patient_id,
    patient_gender,
    patient_dob,
    claim_number
    
from 
    RWD_DB.RWD.RAVEN_CLAIMS_SUBMITS_PATIENT

where 
    left(patient_id, 5) != 'XXX -'

Query started at 02:41:05 PM India Standard Time; Query executed in 15.51 m

Unnamed: 0,status
0,Table ST_SCID_PT successfully created.


## Joining diagnosis and patients tables

In [14]:
%%read_sql

create or replace table st_scid_final as

select
    a.patient_id,
    a.year,
    a.diagnosis,
    b.patient_gender,
    b.patient_dob,
    datediff(yy, b.patient_dob, a.year) as age

from
    st_scid a
        join st_scid_pt b
            on a.patient_id = b.patient_id
            
            where age between 0 and 1

Query started at 02:58:25 PM India Standard Time; Query executed in 4.03 m

Unnamed: 0,status
0,Table ST_SCID_FINAL successfully created.


In [23]:
%%read_sql
select year,
       count (distinct patient_id)
from
    st_scid_final
    
where 
    left(patient_id, 5) != 'XXX -'
group by 1
order by 1

Query started at 03:12:07 PM India Standard Time; Query executed in 0.08 m

Unnamed: 0,year,COUNT (DISTINCT PATIENT_ID)
0,2011,2
1,2012,21
2,2013,73
3,2014,90
4,2015,84
5,2016,22
6,2017,24
7,2018,13


In [22]:
%%read_sql
select year,
       count (distinct patient_id)
from
    st_scid_final
    
where 
    left(patient_id, 5) != 'XXX -'
group by 1
order by 1

Query started at 03:10:49 PM India Standard Time

ProgrammingError: (snowflake.connector.errors.ProgrammingError) 000979 (42601): SQL compilation error:
[ST_SCID_FINAL.PATIENT_ID] is not a valid group by expression [SQL: 'select *\n\nfrom\n    st_scid_final\n    \ngroup by year\norder by year'] (Background on this error at: http://sqlalche.me/e/f405)