# Review Sample Data
Review a sample of data from the cohort, data extract, and the data mart

In [1]:
#Import libraries for this notebook
from workbook_writer import make_xlsx
import pandas as pd  
from drg_connect import Snowflake
import numpy as np
import pickle
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#Load connection variables to connect_dict
with open('../../out/connect/connect_dict.pickle', 'rb') as handle:
    connect_dict = pickle.load(handle)

#Create Eegine to connect to snowflake
snow = Snowflake(role=connect_dict['role'],
                 warehouse=connect_dict['warehouse'],
                 database=connect_dict['database'],
                 schema=connect_dict['schema'])

#Finish engine setup
engine = snow.engine
%load_ext sql_magic
%config SQL.conn_name = 'engine'  #Set the sql_magic connection engine
%config SQL.output_result = True  #Enable output to std out
%config SQL.notify_result = False #disable browser notifications

In [2]:
%%read_sql
--Create a subset of patients
CREATE OR REPLACE TEMP TABLE tmp_sample AS
    SELECT patient_id,
           npi
      FROM coh_npi_pt_link
     LIMIT 1000;

Query started at 12:55:54 PM Eastern Standard TimeInitiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
; Query executed in 0.11 m

Unnamed: 0,status
0,Table TMP_SAMPLE successfully created.


In [13]:
#Patient Level characterisits
coh_pt = snow.select("SELECT * FROM coh_pt WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")

In [14]:
#Physician Level identification data
coh_npi         = snow.select("SELECT * FROM coh_npi         WHERE npi IN (SELECT npi FROM tmp_sample)")
coh_npi_pt_link = snow.select("SELECT * FROM coh_npi_pt_link WHERE npi IN (SELECT npi FROM tmp_sample)")

In [9]:
#Data Extract level information
de_raven_demographics = snow.select("SELECT * FROM de_raven_demographics WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
de_raven_diagnosis    = snow.select("SELECT * FROM de_raven_diagnosis    WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
de_raven_header       = snow.select("SELECT * FROM de_raven_header       WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
de_raven_patient      = snow.select("SELECT * FROM de_raven_patient      WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
de_raven_payer        = snow.select("SELECT * FROM de_raven_payer        WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
de_raven_pharmacy     = snow.select("SELECT * FROM de_raven_pharmacy     WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
de_raven_procedure    = snow.select("SELECT * FROM de_raven_procedure    WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
de_raven_provider     = snow.select("SELECT * FROM de_raven_provider     WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")        

In [16]:
#Data Mart 
dm_demo              = snow.select("SELECT * FROM dm_demo              WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
dm_comorbid_custom   = snow.select("SELECT * FROM dm_comorbid_custom   WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
dm_pt_elixhauser     = snow.select("SELECT * FROM dm_elix              WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
dm_pt_med_cnt        = snow.select("SELECT * FROM dm_pt_med_cnt        WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
dm_pt_med_cnt_pivot  = snow.select("SELECT * FROM dm_pt_med_cnt_pivot  WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
dm_pt_phar_cnt       = snow.select("SELECT * FROM dm_pt_phar_cnt       WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")
dm_pt_med_phar_pivot = snow.select("SELECT * FROM dm_pt_med_phar_pivot WHERE patient_id IN (SELECT patient_id FROM tmp_sample)")

In [20]:
sheet_titles = ['coh_pt', 'coh_npi', 'coh_npi_pt_link', 
               'de_raven_demographics', 'de_raven_diagnosis', 
               'de_raven_header', 'de_raven_patient', 
               'de_raven_payer', 'de_raven_pharmacy', 
               'de_raven_procedure', 'de_raven_provider', 
               'dm_demo', 'dm_comorbid_custom', 
               'dm_pt_elixhauser', 'dm_pt_med_cnt', 
               'dm_pt_med_cnt_pivot', 'dm_pt_phar_cnt', 
               'dm_pt_med_phar_pivot']
sheet_names = sheet_titles

In [21]:
make_xlsx(data=[coh_pt, coh_npi, coh_npi_pt_link,
                de_raven_demographics, de_raven_diagnosis,
                de_raven_header, de_raven_patient,
                de_raven_payer, de_raven_pharmacy,
                de_raven_procedure, de_raven_provider,
                dm_demo, dm_comorbid_custom,
                dm_pt_elixhauser, dm_pt_med_cnt,
                dm_pt_med_cnt_pivot, dm_pt_phar_cnt,
                dm_pt_med_phar_pivot ],
          xlsx_path='../../out/review/review_sample_data.xlsx',
          workbook_title='Patient Level Data Tables',
          sheet_names=sheet_names,
          sheet_titles=sheet_titles)

In [17]:
?make_xlsx