In [77]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import datetime
import analysis

In [43]:
conditions = pd.read_csv("conditions.csv")
patients = pd.read_csv("patients.csv")
observations = pd.read_csv("observations.csv")
care_plans = pd.read_csv("careplans.csv")
encounters = pd.read_csv("encounters.csv")
devices = pd.read_csv("devices.csv")
supplies = pd.read_csv('supplies.csv')
procedures = pd.read_csv("procedures.csv")
medications = pd.read_csv("medications.csv")

In [72]:
%matplotlib inline

In [73]:
%config InlineBackend.figure_format = 'retina'

In [74]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [75]:
%autoreload 1

In [76]:
%aimport analysis

### IDs of patients that have been diagnosed with COVID-19 -> training set

In [50]:
covid_patient_ids = conditions[conditions.CODE == 840539006].PATIENT.unique()

### IDs for all patients that died in the simulation (died = 1, survived = 0)

In [51]:
deceased_patients = patients[patients.DEATHDATE.notna()].Id

In [52]:
inpatient_ids = encounters[(encounters.REASONCODE == 840539006) & (encounters.CODE == 1505002)].PATIENT

In [59]:
completed_isolation_patients = care_plans[(care_plans.CODE == 736376001) & (care_plans.STOP.notna()) & (care_plans.REASONCODE == 840539006)].PATIENT

In [61]:
negative_covid_patient_ids = observations[(observations.CODE == '94531-1') & (observations.VALUE == 'Not detected (qualifier value)')].PATIENT.unique()

In [62]:
survivor_ids = np.union1d(completed_isolation_patients, negative_covid_patient_ids)

### Intersection (people who died due to COVID = 1)

In [15]:
list(conditions.columns)

['START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION']

In [16]:
list(patients.columns)

['Id',
 'BIRTHDATE',
 'DEATHDATE',
 'SSN',
 'DRIVERS',
 'PASSPORT',
 'PREFIX',
 'FIRST',
 'LAST',
 'SUFFIX',
 'MAIDEN',
 'MARITAL',
 'RACE',
 'ETHNICITY',
 'GENDER',
 'BIRTHPLACE',
 'ADDRESS',
 'CITY',
 'STATE',
 'COUNTY',
 'ZIP',
 'LAT',
 'LON',
 'HEALTHCARE_EXPENSES',
 'HEALTHCARE_COVERAGE']

In [17]:
list(observations.columns)

['DATE',
 'PATIENT',
 'ENCOUNTER',
 'CODE',
 'DESCRIPTION',
 'VALUE',
 'UNITS',
 'TYPE']

In [18]:
list(care_plans.columns)

['Id',
 'START',
 'STOP',
 'PATIENT',
 'ENCOUNTER',
 'CODE',
 'DESCRIPTION',
 'REASONCODE',
 'REASONDESCRIPTION']

In [19]:
list(encounters.columns)

['Id',
 'START',
 'STOP',
 'PATIENT',
 'ORGANIZATION',
 'PROVIDER',
 'PAYER',
 'ENCOUNTERCLASS',
 'CODE',
 'DESCRIPTION',
 'BASE_ENCOUNTER_COST',
 'TOTAL_CLAIM_COST',
 'PAYER_COVERAGE',
 'REASONCODE',
 'REASONDESCRIPTION']

In [20]:
list(devices.columns)

['START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'UDI']

In [21]:
list(supplies.columns)

['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'QUANTITY']

In [22]:
list(procedures.columns)

['DATE',
 'PATIENT',
 'ENCOUNTER',
 'CODE',
 'DESCRIPTION',
 'BASE_COST',
 'REASONCODE',
 'REASONDESCRIPTION']

In [23]:
list(medications.columns)

['START',
 'STOP',
 'PATIENT',
 'PAYER',
 'ENCOUNTER',
 'CODE',
 'DESCRIPTION',
 'BASE_COST',
 'PAYER_COVERAGE',
 'DISPENSES',
 'TOTALCOST',
 'REASONCODE',
 'REASONDESCRIPTION']

### Needed information 
#### 1. AGE (DEATHDATE - BIRTHDATE)
#### 2. RACE
#### 3. ETHNICITY
#### 4. GENDER
#### 5. SYMPTOMS
#### ....

In [55]:
covid_conditions = conditions[conditions.CODE == 840539006]

In [56]:
covid_conditions

Unnamed: 0,START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION
4,2020-03-01,2020-03-30,1ff7f10f-a204-4bb1-aa72-dd763fa99482,60584c6a-e26b-4176-a660-86ad84c7ceca,840539006,COVID-19
11,2020-03-13,2020-04-14,9bcf6ed5-d808-44af-98a0-7d78a29ede72,a78e78d9-33bb-40bc-9e42-e47ff7e910cc,840539006,COVID-19
17,2020-03-10,2020-04-02,5163c501-353c-4a82-b863-a3f1df2d6cf1,f26c8d54-0e02-402b-9e98-13be78607b6b,840539006,COVID-19
22,2020-02-27,2020-03-17,cc3c806f-4a09-4a89-a990-4286450956be,12a6479c-c830-44a4-a6e6-1db0d5b0d815,840539006,COVID-19
27,2020-02-19,2020-02-28,bd1c4ffc-7f1d-4590-adbb-1d6533fb623e,8851313a-7276-49a2-ad6d-c672b3f6ebb5,840539006,COVID-19
...,...,...,...,...,...,...
1143804,2020-03-05,2020-03-15,12d4e7cc-e86b-46c5-a696-ecd52b929b71,43d9d971-2625-46ac-8378-6a07ae2fc586,840539006,COVID-19
1143826,2020-03-02,2020-03-23,2dbac353-17bb-42da-9aa0-a302f052d1a3,ccd36d91-4c90-411d-bd3d-0163d24684e4,840539006,COVID-19
1143857,2020-03-04,2020-04-08,8cf835a7-f161-4fe9-a559-350c97a3450e,72314c29-60f7-4eb3-b2cc-902ba571c99e,840539006,COVID-19
1143873,2020-03-05,,503d768f-481c-46e2-bcdb-a6116686351a,62169fce-af51-450b-889b-6dca545fadbe,840539006,COVID-19


In [63]:
cp = covid_conditions.merge(patients, how='left', left_on='PATIENT', right_on='Id')
isolation_ids = care_plans[(care_plans.CODE == 736376001) & (care_plans.REASONCODE == 840539006)].PATIENT
cp['isolation'] = cp.Id.isin(isolation_ids)
cp['admit'] = cp.Id.isin(inpatient_ids)
cp['recovered'] = cp.Id.isin(survivor_ids)
cp['death'] = cp.DEATHDATE.notna()
icu_ids = encounters[encounters.CODE == 305351004].PATIENT
cp['icu_admit'] = cp.Id.isin(icu_ids)
vent_ids = procedures[procedures.CODE == 26763009].PATIENT
cp['ventilated'] = cp.Id.isin(vent_ids)

In [65]:
covid_info = cp[['PATIENT', 'recovered', 'death', 'START', 'DEATHDATE', 'BIRTHDATE', 'GENDER', 'admit', 'icu_admit']]

In [66]:
covid_info = covid_info.rename(columns={'START': 'covid_start'})

In [67]:
covid_related_conditions = conditions[pd.to_datetime(conditions.START) > pd.to_datetime('2020-01-20')]

In [68]:
covid_patient_conditions = covid_info.merge(covid_related_conditions, on='PATIENT')

In [78]:
analysis.symptom_table(covid_patient_conditions)

AttributeError: module 'analysis' has no attribute 'symptom_table'