# Raven Demographics
Extract all Raven demographic records for `coh_pt`

**Script**
* [scripts/de/raven_demographics.ipynb](./scripts/de/raven_demographics.ipynb)

**Prior Script(s)**
* [scripts/coh/coh_basic.ipynb](./scripts/coh/coh_basic.ipynb)

**Parameters**
* `in/de/raven_extract.xlsx[raven_extract]`

**Input**
* `coh_pt`
* `rwd_db.rwd.raven_patient_demographics`

**Output**  
* `de_raven_demographics`

**Review**
* [scripts/de/raven_demographics.html](./scripts/de/raven_demographics.html)

In [None]:
#Import libraries for this notebook
import pandas as pd  
from drg_connect import Snowflake
import numpy as np
import pickle
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

#Load connection variables to connect_dict
with open('../../out/conn/connect_dict.pickle', 'rb') as handle:
    connect_dict = pickle.load(handle)

#Create Eegine to connect to snowflake
snow = Snowflake(role=connect_dict['role'],
                 warehouse=connect_dict['warehouse'],
                 database=connect_dict['database'],
                 schema=connect_dict['schema'])

#Finish engine setup
engine = snow.engine
%load_ext sql_magic
%config SQL.conn_name = 'engine'  #Set the sql_magic connection engine
%config SQL.output_result = True  #Enable output to std out
%config SQL.notify_result = False #disable browser notifications


# Extract Data
**Input**
  * `coh_pt`
  * `rwd_db.rwd.raven_patient_demographics`

**Output**  
* `de_raven_demographics`

In [None]:
%%read_sql
--Create raven diagnosis table
DROP TABLE IF EXISTS de_raven_demographics; 
CREATE TRANSIENT TABLE de_raven_demographics AS
      SELECT encrypted_key_1,
             encrypted_key_2,
             patient_id,
             source_of_patient,
             gender,
             date_of_birth,
             consistency_score
        FROM rwd_db.rwd.raven_patient_demographics
       WHERE patient_id IN (SELECT patient_id
                              FROM coh_pt);

In [None]:
%%read_sql
--Review counts as a sanity check
SELECT Count(*)                        AS row_cnt,
       Count(Distinct encrypted_key_1,
                      encrypted_key_2) AS key_cnt,
       Count(Distinct patient_id)      AS pt_cnt,
       Count(source_of_patient)        AS pt_source,
       Count(gender)                   AS gender_cnt,
       Count(date_of_birth)            AS dob_cnt
  FROM de_raven_demographics;

In [None]:
%%read_sql
--Breakdown source of patient
SET pt_cnt = (SELECT Count(*) FROM de_raven_demographics);

SELECT source_of_patient,
       Count(*) AS cnt,
       (Count(*) / $pt_cnt) AS pct
  FROM de_raven_demographics
 GROUP BY source_of_patient
 ORDER BY source_of_patient;


In [None]:
%%read_sql
--Calcualte gender breakdown
SET pt_cnt = (SELECT Count(*) FROM de_raven_demographics);

SELECT gender               AS gender,
       Count(*)             AS cnt,
       (Count(*) / $pt_cnt) AS pct 
  FROM de_raven_demographics
 GROUP BY gender;

In [None]:
%%read_sql
--Determine year of birth distribution
SET pt_cnt = (SELECT Count(*) FROM de_raven_demographics);

SELECT year(date_of_birth) AS yr,
       Count(*) AS cnt,
       (Count(*) / $pt_cnt) AS pct 
  FROM de_raven_demographics
 GROUP BY yr
 ORDER BY yr;