### Import Statements

In [1]:
import os
import pyodbc
import pandas as pd
from IPython.display import display, Markdown
from ebmdatalab import bq
import datalab_covariates as dlc

### Server connection

In [2]:
server = 'covid.ebmdatalab.net,1433'
database = 'OPENCorona'
username = 'SA'
password = 'ahsjdkaJAMSHDA123[' 
cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password)
cursor = cnxn.cursor()
dlc.set_backend(
    "tpp",
    hostname='covid.ebmdatalab.net',
    port='1433',
    database='OPENCoronaExport' ,
    username='SA',
    password='ahsjdkaJAMSHDA123[',
)

### Population 

Our study population is everyone in CHESS (dummy) dataset that had a positive covid-19 swab

In [3]:
chess = pd.read_csv('../data/chess.csv')
chess.head()

Unnamed: 0,Patient_ID,symptom_onset,swab_date,lab_test_date,result,admitted_itu,admission_date,died,death_date
0,6041,2020-03-17,2020-03-24,2020-03-24,A/non-subtyped,0,,0,
1,1421431,2020-02-03,2020-02-27,2020-02-27,COVID-19,0,,0,
2,50459,2020-02-29,2020-03-03,2020-03-03,COVID-19,0,,0,
3,1359784,2020-03-06,2020-03-13,2020-03-13,COVID-19,0,,0,
4,1358280,2020-03-10,2020-02-09,2020-02-09,A/unsubtypeable,0,,0,


In [4]:
study_population = chess[chess['result'] == 'COVID-19']

In [5]:
study_population.head()

Unnamed: 0,Patient_ID,symptom_onset,swab_date,lab_test_date,result,admitted_itu,admission_date,died,death_date
1,1421431,2020-02-03,2020-02-27,2020-02-27,COVID-19,0,,0,
2,50459,2020-02-29,2020-03-03,2020-03-03,COVID-19,0,,0,
3,1359784,2020-03-06,2020-03-13,2020-03-13,COVID-19,0,,0,
6,1031144,2020-02-20,2020-03-12,2020-03-12,COVID-19,0,,0,
10,1316105,2020-03-05,2020-03-09,2020-03-09,COVID-19,0,,0,


In [6]:
study_population.to_csv("../data/analysis/study_pop.csv")

### Outcome and Exposure definitions

#### Exposure: CVD disease ever

In this analysis, our exposure is Cardiovascular disease. We define this as a code on a patient's records ever that is for cardiovascular disease such as angina or a prescription of a cardiovascular drugs (from bnf)

##### Read codes

In [7]:
qof_clusters = pd.read_csv('../data/QoFClusteres_CTV3Codes - Sheet1.csv')
chd_codes = qof_clusters.loc[qof_clusters['ClusterId']=='CHD','CTV3Code']
chd_codes.head()

3562    14A3.
3563    14A4.
3564    G300.
3565    G301.
3566    G3010
Name: CTV3Code, dtype: object

##### Medicine

In [8]:
sql = '''
WITH bnf_codes AS (
  SELECT bnf_code FROM hscic.presentation WHERE 
    bnf_code LIKE '02%' #BNF cvd chapter 
)

SELECT "vmp" AS type, id, bnf_code, nm
FROM dmd.vmp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

UNION ALL

SELECT "amp" AS type, id, bnf_code, descr
FROM dmd.amp
WHERE bnf_code IN (SELECT * FROM bnf_codes)

ORDER BY type, bnf_code, id
'''

cvd_medcodes = bq.cached_read(sql, csv_path=os.path.join('..','data','cvd_medcodes.csv'))
cvd_medcodes.head()

Unnamed: 0,type,id,bnf_code,nm
0,amp,4783111000001104,0201010AABBAAAA,Digibind 38mg powder for solution for injectio...
1,amp,20477811000001103,0201010AABCAAAB,DigiFab 40mg powder for solution for infusion ...
2,amp,3726311000001104,0201010D0AAAAAA,Digitoxin 100microgram tablets (A A H Pharmace...
3,amp,3726811000001108,0201010D0AAAAAA,Digitoxin 100microgram tablets (Focus Pharmace...
4,amp,5626411000001106,0201010D0AAAAAA,Digitoxin 100microgram tablets (Alliance Healt...


#### Find patients with coded events where code matches cvd disease

In [9]:
chd_patients = dlc.patients_with_these_clinical_events("chd_code", ctv3_codes=chd_codes)
clin_df = chd_patients.to_df()
clin_df.head()

Unnamed: 0_level_0,chd_code
patient_id,Unnamed: 1_level_1
84,1
201,1
228,1
301,1
401,1


#### Find all patients where cvd code on record

In [10]:
cvd_patients = dlc.patients_with_these_medications('cvd_meds', snomed_codes=cvd_medcodes['id'])

med_df = cvd_patients.to_df()
med_df.head()

Unnamed: 0_level_0,cvd_meds
patient_id,Unnamed: 1_level_1
2,1
18,1
28,1
34,1
41,1


In [11]:
clin_df.join(med_df, how='outer').fillna(0).head()

Unnamed: 0_level_0,chd_code,cvd_meds
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2,0.0,1.0
18,0.0,1.0
28,0.0,1.0
34,0.0,1.0
41,0.0,1.0


In [12]:
clin_df.to_csv("../data/analysis/cvd_dis.csv")

#### Outcome: Death

Our outcome of interest is death

1 - death
0 - alive

This is contained within the study pop csv

### Covariates defintions

- Age
- Gender
- Smoking status

In [13]:
df = pd.read_csv('../data/analysis/study_pop.csv')
df.rename({'Patient_ID': 'patient_id'}, axis=1, inplace=True)

In [14]:
df.head()

Unnamed: 0.1,Unnamed: 0,patient_id,symptom_onset,swab_date,lab_test_date,result,admitted_itu,admission_date,died,death_date
0,1,1421431,2020-02-03,2020-02-27,2020-02-27,COVID-19,0,,0,
1,2,50459,2020-02-29,2020-03-03,2020-03-03,COVID-19,0,,0,
2,3,1359784,2020-03-06,2020-03-13,2020-03-13,COVID-19,0,,0,
3,6,1031144,2020-02-20,2020-03-12,2020-03-12,COVID-19,0,,0,
4,10,1316105,2020-03-05,2020-03-09,2020-03-09,COVID-19,0,,0,


#### Demographics 

In [15]:
demographics = dlc.patients_with_age_and_sex('2020-04-01', patient_ids=df['patient_id'])
co_df = demographics.to_df()
co_df.head()

Unnamed: 0_level_0,age,sex
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1
254,59,F
572,39,M
592,50,M
647,95,F
659,60,F


In [16]:
co_df.to_csv('../data/analysis/demo.csv')

#### Smoking status

In [17]:
smoking_stat = pd.read_csv('../data/smoking_codes.csv')

In [18]:
smoking_stat.head()

Unnamed: 0.1,Unnamed: 0,ClusterId,ClusterName,CTV3Code,Description
0,3495,CESS,Smoking cessation codes,6791.,Health education - smoking
1,3496,CESS,Smoking cessation codes,67A3.,Pregnancy smoking advice
2,3497,CESS,Smoking cessation codes,9OO1.,Attends stop smoking monitoring
3,3498,CESS,Smoking cessation codes,9OO2.,Refuses stop smoking monitor
4,3499,CESS,Smoking cessation codes,9OO3.,Stop smoking monitor default


In [19]:
smokers = dlc.patients_with_these_clinical_events(
    "smoking_status",
    ctv3_codes=smoking_stat['CTV3Code'],
    min_date='2015-01-01',
    max_date='2020-03-31'
)

smok_df = smokers.to_df()
smok_df.head()

Unnamed: 0_level_0,smoking_status
patient_id,Unnamed: 1_level_1
84,1
102,1
106,1
107,1
136,1


In [20]:
smok_df.to_csv('../data/analysis/smok.csv')

### Making final dataset 

In [21]:
study_pop = pd.read_csv('../data/analysis/study_pop.csv')

In [22]:
study_pop = study_pop[['Patient_ID', 'admitted_itu', 'died']]
study_pop.rename({'Patient_ID': 'patient_id'}, axis=1, inplace=True)

In [23]:
study_pop.head()

Unnamed: 0,patient_id,admitted_itu,died
0,1421431,0,0
1,50459,0,0
2,1359784,0,0
3,1031144,0,0
4,1316105,0,0


##### Add in exposure

In [24]:
cvd = pd.read_csv('../data/analysis/cvd_dis.csv')

In [25]:
cvd.head()

Unnamed: 0,patient_id,chd_code
0,84,1
1,201,1
2,228,1
3,301,1
4,401,1


In [26]:
study_pop = study_pop.merge(cvd, how='left', on='patient_id')

In [27]:
study_pop.fillna(0, inplace=True)

In [28]:
study_pop.head()

Unnamed: 0,patient_id,admitted_itu,died,chd_code
0,1421431,0,0,1.0
1,50459,0,0,0.0
2,1359784,0,0,0.0
3,1031144,0,0,0.0
4,1316105,0,0,1.0


##### Add in demographics and other covariates

In [29]:
demo = pd.read_csv('../data/analysis/demo.csv')

In [30]:
demo = demo[['patient_id', 'sex', 'age']]

In [31]:
demo.head()

Unnamed: 0,patient_id,sex,age
0,254,F,59
1,572,M,39
2,592,M,50
3,647,F,95
4,659,F,60


In [32]:
study_pop = study_pop.merge(demo, how='left', on='patient_id')

In [33]:
study_pop.head()

Unnamed: 0,patient_id,admitted_itu,died,chd_code,sex,age
0,1421431,0,0,1.0,F,23
1,50459,0,0,0.0,M,19
2,1359784,0,0,0.0,M,56
3,1031144,0,0,0.0,F,56
4,1316105,0,0,1.0,F,23


##### Add in smoking

In [34]:
smok = pd.read_csv('../data/analysis/smok.csv')

In [35]:
smok = smok[['patient_id', 'smoking_status']]

In [36]:
smok.head()

Unnamed: 0,patient_id,smoking_status
0,84,1
1,102,1
2,106,1
3,107,1
4,136,1


In [37]:
study_pop = study_pop.merge(smok, how='left', on='patient_id')
study_pop.fillna(0, inplace=True)

In [38]:
study_pop.head()

Unnamed: 0,patient_id,admitted_itu,died,chd_code,sex,age,smoking_status
0,1421431,0,0,1.0,F,23,0.0
1,50459,0,0,0.0,M,19,0.0
2,1359784,0,0,0.0,M,56,0.0
3,1031144,0,0,0.0,F,56,0.0
4,1316105,0,0,1.0,F,23,0.0


In [39]:
study_pop.to_csv('../data/analysis/final_dataset.csv')