# Intialisation Steps
Connecting to Snowflake

## Importing required modules

In [2]:
import pandas as pd
from drg_connect import Snowflake
import qgrid 
from datetime import timedelta, datetime
import math

import warnings
warnings.filterwarnings('ignore')

## Snowflake connection parameter

In [3]:
##defining parameters of snowflake
snow = Snowflake(role = 'RWD_ANALYTICS_RW',database='SANDBOX_ANALYTICS',schema = 'SANDBOX')
engine = snow.engine

%reload_ext sql_magic
%config SQL.output_result = True  #Enable output to std out
%config SQL.notify_result = False #disable browser notifications
%config SQL.conn_name = 'engine'  #Set the sql_magic connection engine

# Queries from here

# Pelican EHR data

## Pelican table details

In [3]:
%%read_sql

select * from RWD_DB.RWD.PELICAN_E_LOINC limit 3

Query started at 04:50:17 PM India Standard TimeInitiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
; Query executed in 0.39 m

Unnamed: 0,loinc_num,component,property,time_aspct,system,scale_typ,method_typ,class,last_modified,chng_type,...,document_section,example_ucum_units,example_si_ucum_units,status_reason,status_text,change_reason_public,common_test_rank,common_order_rank,common_si_test_rank,hl7_attachment_structure
0,9256-9,Fluid output.total,Vol,Pt,^Patient,Qn,Estimated,IO_OUT.ATOM,1996-09-06,ADD,...,,,,,,,0,0,0,
1,38474-3,Acylcarnitine,SCnc,Pt,Bld.dot,Qn,,CHEM,2008-10-21,NAM,...,,umol/L,,,,,0,0,0,
2,2404-2,Hemopexin,MCnc,Pt,Urine,Qn,,CHEM,2006-10-26,MAJ,...,,mg/dL,,,,,0,0,0,


In [4]:
%%read_sql

select * from RWD_DB.RWD.PELICAN_LABORDER limit 3

Query started at 04:54:02 PM India Standard Time; Query executed in 0.14 m

Unnamed: 0,laborder_id,patient_id,provider_id,vendor_id,loinc_num,result_status,report_date,observed_at,obs_quan,obs_qual,unit,is_abnormal,abnormal_flag,created_at,last_modified,transcript_id
0,1364935398341433546,393E2EED-4D2C-79C7-D150-08E8154D142F,,8844736773880785995,2160-0,Final,2015-11-08,2015-11-14,0.96,,mg/dL,,,2015-11-14,2015-11-14,
1,4177387127494394539,393E2EED-4D2C-79C7-D150-08E8154D142F,D8E5640E-9F96-E7A1-BA57-6E996DF43374,8844736773880785995,43396-1,Final,2018-07-31,2018-08-04,212.0,,mg/dL (calc),,,2018-08-05,2018-08-05,
2,-8421755992989376358,393E2EED-4D2C-79C7-D150-08E8154D142F,D8E5640E-9F96-E7A1-BA57-6E996DF43374,8844736773880785995,30446-9,Not available,2018-07-31,2018-08-04,,281266006.0,cells/uL,,,2018-08-05,2018-08-05,


## Corresponding loinc codes for triglycerides

### Things to remember about loinc codes
 - Format: nnnnn-n
 - Component: what was measured
 - Unit: check from laborder table
 - date: ask/confirm which date to use - report date or observed_at date
 - there's no 'component' variable in laborder table so create an excel for loinc codes and upload

In [15]:
%%read_sql

create or replace table st_ref_loinc as

    select
        loinc_num,
        component

    from RWD_DB.RWD.PELICAN_E_LOINC
    
    where component ilike '%triglyceride%'
        or component ilike '%hdl%'
        or component ilike '%cholesterol%'
        and not component ilike '%ldl%'

Query started at 08:03:17 PM India Standard Time; Query executed in 0.13 m

Unnamed: 0,status
0,Table ST_REF_LOINC successfully created.


In [16]:
snow.select("select * from st_ref_loinc")

Unnamed: 0,loinc_num,component
0,2086-7,Cholesterol.in HDL
1,50223-7,Cholesterol crystals
2,2087-5,Cholesterol.in IDL
3,57937-5,Cholesterol.in HDL 3a
4,32289-1,7-Dehydrocholesterol
5,17081-1,Triglyceride^post CFst
6,34472-1,Cholesterol crystals
7,35200-5,Cholesterol
8,2565-0,Cholesterol
9,14438-6,Cholesterol


In [12]:
%%read_sql

create or replace temporary table st_loinc_ref1 as

select
    'loinc' as cat1,
     loinc_num as value,
    RELATEDNAMES2 as description,
    current_date() as active_date,
    'swar' as active_reason,
    null as deactive_date,
    null as deactive_reason,
    'RWD_DB.RWD.PELICAN_E_LOINC' as source
from 
   RWD_DB.RWD.PELICAN_E_LOINC
    where
        component ilike '%triglyceride%'

Query started at 02:13:19 PM India Standard Time; Query executed in 0.09 m

Unnamed: 0,status
0,Table ST_LOINC_REF1 successfully created.


In [13]:
%%read_sql df

select * from st_loinc_ref1

Query started at 02:15:35 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,cat1,value,description,active_date,active_reason,deactive_date,deactive_reason,source
0,loinc,17081-1,Trigl; TG; Trigly; Triglycrides; Trig; Triglyc...,2018-12-10,swar,,,RWD_DB.RWD.PELICAN_E_LOINC
1,loinc,14447-7,Trigl; TG; Trigly; Triglycrides; Trig; Triglyc...,2018-12-10,swar,,,RWD_DB.RWD.PELICAN_E_LOINC
2,loinc,28554-4,Trigl; TG; Trigly; Triglycrides; Trig; Triglyc...,2018-12-10,swar,,,RWD_DB.RWD.PELICAN_E_LOINC
3,loinc,53527-8,Trigl in HDL2; TG; Trigly; Triglycrides; Trig;...,2018-12-10,swar,,,RWD_DB.RWD.PELICAN_E_LOINC
4,loinc,70260-5,Trigl Prt; PT-TRIG; TG; Trigly; Triglycrides; ...,2018-12-10,swar,,,RWD_DB.RWD.PELICAN_E_LOINC
5,loinc,70269-6,Trigl Plr fld; PF-TRIG; TG; Trigly; Triglycrid...,2018-12-10,swar,,,RWD_DB.RWD.PELICAN_E_LOINC
6,loinc,59571-0,Trigl; TG; Trigly; Triglycrides; Trig; Triglyc...,2018-12-10,swar,,,RWD_DB.RWD.PELICAN_E_LOINC
7,loinc,3047-8,Trigl+VLDL Ester; TG; Trigly; Triglycrides; Tr...,2018-12-10,swar,,,RWD_DB.RWD.PELICAN_E_LOINC
8,loinc,9619-8,Trigl; TG; Trigly; Triglycrides; Trig; Triglyc...,2018-12-10,swar,,,RWD_DB.RWD.PELICAN_E_LOINC
9,loinc,2096-6,Cholest; Chol; Choles; Lipid; Cholesterol tota...,2018-12-10,swar,,,RWD_DB.RWD.PELICAN_E_LOINC


In [14]:
# Export to check, and then will inactivate codes that are not relevant and re-upload as final reference table
df.to_excel("out/loinccodes_check.xlsx", index=False)

## Upload into snowflake

In [15]:
loinc_ref = pd.read_excel("in/loinc_ref.xlsx")

In [17]:
snow.drop_table("st_loinc_ref")

DROP TABLE IF EXISTS sandbox_analytics.sandbox.st_loinc_ref;
Table sandbox.st_loinc_ref dropped!


In [18]:
snow.upload_dataframe(loinc_ref, "st_loinc_ref")

Upload successful!


In [19]:
%%read_sql

select *
from st_loinc_ref
where deactive_date is not null

Query started at 05:48:41 PM India Standard Time; Query executed in 0.07 m

Unnamed: 0,cat1,value,description,active_date,active_reason,deactive_date,deactive_reason,source
0,loinc,9619-8,Trigl; TG; Trigly; Triglycrides; Trig; Triglyc...,2018-12-10,swar,2018-10-12,Triglyceride [Mass/volume] in Pleural fluid,RWD_DB.RWD.PELICAN_E_LOINC
1,loinc,2096-6,Cholest; Chol; Choles; Lipid; Cholesterol tota...,2018-12-10,swar,2018-10-12,cholesterol levels?,RWD_DB.RWD.PELICAN_E_LOINC
2,loinc,34695-7,VLDLc; Very low density lipoprotein cholestero...,2018-12-10,swar,2018-10-12,Cholesterol.in VLDL/Triglyceride,RWD_DB.RWD.PELICAN_E_LOINC
3,loinc,44718-5,VLDL; Very low density lipoproteins; Trigl; TG...,2018-12-10,swar,2018-10-12,Lipoprotein.pre-beta/Triglyceride [Mass Ratio]...,RWD_DB.RWD.PELICAN_E_LOINC
4,loinc,44733-4,Trigl; TG; Trigly; Triglycrides; Trig; Triglyc...,2018-12-10,swar,2018-10-12,Trigl/HDLc SerPl,RWD_DB.RWD.PELICAN_E_LOINC
5,loinc,70259-7,Trigl Pcar; PC-TRIG; TG; Trigly; Triglycrides;...,2018-12-10,swar,2018-10-12,Pericardial fluid to serum tryglyceride ratio ...,RWD_DB.RWD.PELICAN_E_LOINC
6,loinc,59036-4,Trigl; TG; Trigly; Triglycrides; Trig; Triglyc...,2018-12-10,swar,2018-10-12,Triglyceride [Mass/volume] in Peritoneal dialy...,RWD_DB.RWD.PELICAN_E_LOINC
7,loinc,14448-5,Trigl; TG; Trigly; Triglycrides; Trig; Triglyc...,2018-12-10,swar,2018-10-12,Triglyceride [Mass/volume] in Semen,RWD_DB.RWD.PELICAN_E_LOINC
8,loinc,51605-4,Trigl; TG; Trigly; Triglycrides; Trig; Triglyc...,2018-12-10,swar,2018-10-12,Triglyceride [Moles/volume] in Synovial fluid,RWD_DB.RWD.PELICAN_E_LOINC
9,loinc,14449-3,Trigl; TG; Trigly; Triglycrides; Trig; Triglyc...,2018-12-10,swar,2018-10-12,Triglyceride [Mass/volume] in Synovial fluid,RWD_DB.RWD.PELICAN_E_LOINC


## Using laborders table

### Things to do:
    - We want unique patients related to the above listed loinc codes.
    - We then want to stratify them into buckets (see image in next section)

In [24]:
%%html
<img src="img/analysis_plan.png", width=800, height=800)>

## Observation cohort

In [27]:
%%read_sql

create or replace table st_tg_obs_cohort as

select
    patient_id,
    loinc_num,
    result_status,
    report_date,
    observed_at,
    obs_quan,
    obs_qual,
    unit,
    last_modified

from
    RWD_DB.RWD.PELICAN_LABORDER
where
    loinc_num in (select value from st_loinc_ref where cat1 = 'loinc' and deactive_date is null)

Query started at 06:32:39 PM India Standard Time; Query executed in 0.55 m

Unnamed: 0,status
0,Table ST_TG_OBS_COHORT successfully created.


In [28]:
snow.select("select count(distinct patient_id) from st_tg_obs_cohort where left(patient_id, 5) != 'XXX -'")

Unnamed: 0,COUNT(DISTINCT PATIENT_ID)
0,6461414


In [29]:
%%read_sql
select * from st_tg_obs_cohort limit 100

Query started at 06:38:45 PM India Standard Time; Query executed in 0.12 m

Unnamed: 0,patient_id,loinc_num,result_status,report_date,observed_at,obs_quan,obs_qual,unit,last_modified
0,1ED968AE-FE37-9FDD-975C-3A01C5105818,2571-8,Final,2016-01-06,2016-01-05,201.0,,mg/dL,2016-01-06
1,6085D796-747B-F408-0DAF-D5A10557EC1B,2571-8,Final,2016-12-16,2016-12-15,53.0,,mg/dL,2016-12-16
2,BEB71C61-A885-8E9A-832F-739ACCD7E866,2571-8,Final,2013-10-18,2013-10-18,247.0,,mg/dL,2015-09-19
3,56C97897-7D46-CD67-6FA8-F64E25EF7288,2571-8,Final,2017-11-03,2017-11-03,104.0,,mg/dL,2017-11-03
4,23651DE6-800F-8681-43E0-FDC1149FA0F3,2571-8,Final,2015-05-02,2015-05-01,93.0,,mg/dL,2015-09-10
5,342B4281-B2C2-C328-13AC-5A6E8B6531EF,2571-8,Final,2015-09-02,2015-09-04,123.0,,mg/dL,2015-09-04
6,260C059C-AE07-DA2F-29B8-1ACAFA737221,2571-8,Final,2018-02-08,2018-02-08,212.0,,mg/dL,2018-02-08
7,260C059C-AE07-DA2F-29B8-1ACAFA737221,2571-8,Final,2017-01-30,2017-01-22,186.0,,mg/dL,2017-02-01
8,90C8E9DB-1A46-B5CE-1EA3-1DB1C404BD2C,2571-8,Final,2017-09-03,2017-09-04,501.0,,mg/dL,2017-09-04
9,65AFC983-5495-1CDB-5D2B-C4F0B2DD5BBD,12951-0,Final,2016-06-12,2016-06-12,226.0,,mg/dL,2016-06-12
