In [77]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import datetime
import analysis

In [43]:
conditions = pd.read_csv("conditions.csv")
patients = pd.read_csv("patients.csv")
observations = pd.read_csv("observations.csv")
care_plans = pd.read_csv("careplans.csv")
encounters = pd.read_csv("encounters.csv")
devices = pd.read_csv("devices.csv")
supplies = pd.read_csv('supplies.csv')
procedures = pd.read_csv("procedures.csv")
medications = pd.read_csv("medications.csv")

In [72]:
%matplotlib inline

In [73]:
%config InlineBackend.figure_format = 'retina'

In [74]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [75]:
%autoreload 1

In [76]:
%aimport analysis

### IDs of patients that have been diagnosed with COVID-19 -> training set

In [50]:
covid_patient_ids = conditions[conditions.CODE == 840539006].PATIENT.unique()

### IDs for all patients that died in the simulation (died = 1, survived = 0)

In [51]:
deceased_patients = patients[patients.DEATHDATE.notna()].Id

In [52]:
inpatient_ids = encounters[(encounters.REASONCODE == 840539006) & (encounters.CODE == 1505002)].PATIENT

In [59]:
completed_isolation_patients = care_plans[(care_plans.CODE == 736376001) & (care_plans.STOP.notna()) & (care_plans.REASONCODE == 840539006)].PATIENT

In [61]:
negative_covid_patient_ids = observations[(observations.CODE == '94531-1') & (observations.VALUE == 'Not detected (qualifier value)')].PATIENT.unique()

In [62]:
survivor_ids = np.union1d(completed_isolation_patients, negative_covid_patient_ids)

### Intersection (people who died due to COVID = 1)

In [15]:
list(conditions.columns)

['START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION']

In [16]:
list(patients.columns)

['Id',
 'BIRTHDATE',
 'DEATHDATE',
 'SSN',
 'DRIVERS',
 'PASSPORT',
 'PREFIX',
 'FIRST',
 'LAST',
 'SUFFIX',
 'MAIDEN',
 'MARITAL',
 'RACE',
 'ETHNICITY',
 'GENDER',
 'BIRTHPLACE',
 'ADDRESS',
 'CITY',
 'STATE',
 'COUNTY',
 'ZIP',
 'LAT',
 'LON',
 'HEALTHCARE_EXPENSES',
 'HEALTHCARE_COVERAGE']

In [17]:
list(observations.columns)

['DATE',
 'PATIENT',
 'ENCOUNTER',
 'CODE',
 'DESCRIPTION',
 'VALUE',
 'UNITS',
 'TYPE']

In [18]:
list(care_plans.columns)

['Id',
 'START',
 'STOP',
 'PATIENT',
 'ENCOUNTER',
 'CODE',
 'DESCRIPTION',
 'REASONCODE',
 'REASONDESCRIPTION']

In [19]:
list(encounters.columns)

['Id',
 'START',
 'STOP',
 'PATIENT',
 'ORGANIZATION',
 'PROVIDER',
 'PAYER',
 'ENCOUNTERCLASS',
 'CODE',
 'DESCRIPTION',
 'BASE_ENCOUNTER_COST',
 'TOTAL_CLAIM_COST',
 'PAYER_COVERAGE',
 'REASONCODE',
 'REASONDESCRIPTION']

In [20]:
list(devices.columns)

['START', 'STOP', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'UDI']

In [21]:
list(supplies.columns)

['DATE', 'PATIENT', 'ENCOUNTER', 'CODE', 'DESCRIPTION', 'QUANTITY']

In [22]:
list(procedures.columns)

['DATE',
 'PATIENT',
 'ENCOUNTER',
 'CODE',
 'DESCRIPTION',
 'BASE_COST',
 'REASONCODE',
 'REASONDESCRIPTION']

In [23]:
list(medications.columns)

['START',
 'STOP',
 'PATIENT',
 'PAYER',
 'ENCOUNTER',
 'CODE',
 'DESCRIPTION',
 'BASE_COST',
 'PAYER_COVERAGE',
 'DISPENSES',
 'TOTALCOST',
 'REASONCODE',
 'REASONDESCRIPTION']

### Needed information 
#### 1. AGE (DEATHDATE - BIRTHDATE)
#### 2. RACE
#### 3. ETHNICITY
#### 4. GENDER
#### 5. SYMPTOMS
#### ....

In [55]:
covid_conditions = conditions[conditions.CODE == 840539006]

In [56]:
covid_conditions

Unnamed: 0,START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION
4,2020-03-01,2020-03-30,1ff7f10f-a204-4bb1-aa72-dd763fa99482,60584c6a-e26b-4176-a660-86ad84c7ceca,840539006,COVID-19
11,2020-03-13,2020-04-14,9bcf6ed5-d808-44af-98a0-7d78a29ede72,a78e78d9-33bb-40bc-9e42-e47ff7e910cc,840539006,COVID-19
17,2020-03-10,2020-04-02,5163c501-353c-4a82-b863-a3f1df2d6cf1,f26c8d54-0e02-402b-9e98-13be78607b6b,840539006,COVID-19
22,2020-02-27,2020-03-17,cc3c806f-4a09-4a89-a990-4286450956be,12a6479c-c830-44a4-a6e6-1db0d5b0d815,840539006,COVID-19
27,2020-02-19,2020-02-28,bd1c4ffc-7f1d-4590-adbb-1d6533fb623e,8851313a-7276-49a2-ad6d-c672b3f6ebb5,840539006,COVID-19
...,...,...,...,...,...,...
1143804,2020-03-05,2020-03-15,12d4e7cc-e86b-46c5-a696-ecd52b929b71,43d9d971-2625-46ac-8378-6a07ae2fc586,840539006,COVID-19
1143826,2020-03-02,2020-03-23,2dbac353-17bb-42da-9aa0-a302f052d1a3,ccd36d91-4c90-411d-bd3d-0163d24684e4,840539006,COVID-19
1143857,2020-03-04,2020-04-08,8cf835a7-f161-4fe9-a559-350c97a3450e,72314c29-60f7-4eb3-b2cc-902ba571c99e,840539006,COVID-19
1143873,2020-03-05,,503d768f-481c-46e2-bcdb-a6116686351a,62169fce-af51-450b-889b-6dca545fadbe,840539006,COVID-19


In [63]:
cp = covid_conditions.merge(patients, how='left', left_on='PATIENT', right_on='Id')
isolation_ids = care_plans[(care_plans.CODE == 736376001) & (care_plans.REASONCODE == 840539006)].PATIENT
cp['isolation'] = cp.Id.isin(isolation_ids)
cp['admit'] = cp.Id.isin(inpatient_ids)
cp['recovered'] = cp.Id.isin(survivor_ids)
cp['death'] = cp.DEATHDATE.notna()
icu_ids = encounters[encounters.CODE == 305351004].PATIENT
cp['icu_admit'] = cp.Id.isin(icu_ids)
vent_ids = procedures[procedures.CODE == 26763009].PATIENT
cp['ventilated'] = cp.Id.isin(vent_ids)

In [65]:
covid_info = cp[['PATIENT', 'recovered', 'death', 'START', 'DEATHDATE', 'BIRTHDATE', 'GENDER', 'admit', 'icu_admit']]

In [66]:
covid_info = covid_info.rename(columns={'START': 'covid_start'})

In [67]:
covid_related_conditions = conditions[pd.to_datetime(conditions.START) > pd.to_datetime('2020-01-20')]

In [68]:
covid_patient_conditions = covid_info.merge(covid_related_conditions, on='PATIENT')

In [79]:
hospitalized = (cp.admit == True)
icu = (cp.icu_admit == True)
vent = (cp.ventilated == True)
covid_count = cp.Id.size
row_filters = {'Home Isolation': (cp.isolation == True), 'Hospital Admission': hospitalized, 'ICU Admission': icu,
 'Ventilated': vent, 'Recovered': (cp.recovered == True), 'Death': (cp.death == True)}

table_rows = []
for category, row_filter in row_filters.items():
    row = {'Outcome': category}
    row['All Patients'] = cp[row_filter].Id.size / covid_count
    row['Hospitalized'] = cp[row_filter & hospitalized].Id.size / hospitalized.value_counts()[True]
    row['ICU Admitted'] = cp[row_filter & icu].Id.size / icu.value_counts()[True]
    row['Required Ventilation'] = cp[row_filter & vent].Id.size / vent.value_counts()[True]
    table_rows.append(row)
    
pd.DataFrame.from_records(table_rows)

Unnamed: 0,Outcome,All Patients,Hospitalized,ICU Admitted,Required Ventilation
0,Home Isolation,0.800036,0.029986,0.029644,0.028826
1,Hospital Admission,0.206145,1.0,1.0,1.0
2,ICU Admission,0.041705,0.202311,1.0,1.0
3,Ventilated,0.033051,0.16033,0.792494,1.0
4,Recovered,0.959724,0.806272,0.320914,0.14825
5,Death,0.041297,0.195103,0.679358,0.85175


In [80]:
covid_related_conditions = conditions[pd.to_datetime(conditions.START) > pd.to_datetime('2020-01-20')]

In [81]:
covid_related_conditions

Unnamed: 0,START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION
2,2020-03-01,2020-03-30,1ff7f10f-a204-4bb1-aa72-dd763fa99482,60584c6a-e26b-4176-a660-86ad84c7ceca,386661006,Fever (finding)
3,2020-03-01,2020-03-01,1ff7f10f-a204-4bb1-aa72-dd763fa99482,60584c6a-e26b-4176-a660-86ad84c7ceca,840544004,Suspected COVID-19
4,2020-03-01,2020-03-30,1ff7f10f-a204-4bb1-aa72-dd763fa99482,60584c6a-e26b-4176-a660-86ad84c7ceca,840539006,COVID-19
5,2020-02-12,2020-02-26,9bcf6ed5-d808-44af-98a0-7d78a29ede72,5d9dfe80-7edf-4181-9753-770e361934d1,44465007,Sprain of ankle
6,2020-03-13,2020-04-14,9bcf6ed5-d808-44af-98a0-7d78a29ede72,a78e78d9-33bb-40bc-9e42-e47ff7e910cc,49727002,Cough (finding)
...,...,...,...,...,...,...
1143882,2020-03-13,,503d768f-481c-46e2-bcdb-a6116686351a,4820e50c-5504-4cd5-b55b-5d4d78b18628,67782005,Acute respiratory distress syndrome (disorder)
1143896,2020-03-09,2020-04-05,f8d85cff-037c-4313-9448-14ac57d586a8,f2cadc10-2d46-42a8-80db-2893c700b50b,248595008,Sputum finding (finding)
1143897,2020-03-09,2020-04-05,f8d85cff-037c-4313-9448-14ac57d586a8,f2cadc10-2d46-42a8-80db-2893c700b50b,386661006,Fever (finding)
1143898,2020-03-09,2020-03-09,f8d85cff-037c-4313-9448-14ac57d586a8,f2cadc10-2d46-42a8-80db-2893c700b50b,840544004,Suspected COVID-19


In [82]:
covid_patient_conditions = covid_info.merge(covid_related_conditions, on='PATIENT')

Unnamed: 0,PATIENT,recovered,death,covid_start,DEATHDATE,BIRTHDATE,GENDER,admit,icu_admit,START,STOP,ENCOUNTER,CODE,DESCRIPTION
0,1ff7f10f-a204-4bb1-aa72-dd763fa99482,True,False,2020-03-01,,2017-08-24,M,False,False,2020-03-01,2020-03-30,60584c6a-e26b-4176-a660-86ad84c7ceca,386661006,Fever (finding)
1,1ff7f10f-a204-4bb1-aa72-dd763fa99482,True,False,2020-03-01,,2017-08-24,M,False,False,2020-03-01,2020-03-01,60584c6a-e26b-4176-a660-86ad84c7ceca,840544004,Suspected COVID-19
2,1ff7f10f-a204-4bb1-aa72-dd763fa99482,True,False,2020-03-01,,2017-08-24,M,False,False,2020-03-01,2020-03-30,60584c6a-e26b-4176-a660-86ad84c7ceca,840539006,COVID-19
3,9bcf6ed5-d808-44af-98a0-7d78a29ede72,True,False,2020-03-13,,2016-08-01,F,False,False,2020-02-12,2020-02-26,5d9dfe80-7edf-4181-9753-770e361934d1,44465007,Sprain of ankle
4,9bcf6ed5-d808-44af-98a0-7d78a29ede72,True,False,2020-03-13,,2016-08-01,F,False,False,2020-03-13,2020-04-14,a78e78d9-33bb-40bc-9e42-e47ff7e910cc,49727002,Cough (finding)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
643449,503d768f-481c-46e2-bcdb-a6116686351a,False,True,2020-03-05,2020-03-18,1934-12-26,M,True,True,2020-03-13,,4820e50c-5504-4cd5-b55b-5d4d78b18628,67782005,Acute respiratory distress syndrome (disorder)
643450,f8d85cff-037c-4313-9448-14ac57d586a8,True,False,2020-03-09,,1934-12-26,M,False,False,2020-03-09,2020-04-05,f2cadc10-2d46-42a8-80db-2893c700b50b,248595008,Sputum finding (finding)
643451,f8d85cff-037c-4313-9448-14ac57d586a8,True,False,2020-03-09,,1934-12-26,M,False,False,2020-03-09,2020-04-05,f2cadc10-2d46-42a8-80db-2893c700b50b,386661006,Fever (finding)
643452,f8d85cff-037c-4313-9448-14ac57d586a8,True,False,2020-03-09,,1934-12-26,M,False,False,2020-03-09,2020-03-09,f2cadc10-2d46-42a8-80db-2893c700b50b,840544004,Suspected COVID-19


In [84]:
cp.head()

Unnamed: 0,START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION,Id,BIRTHDATE,DEATHDATE,SSN,...,LAT,LON,HEALTHCARE_EXPENSES,HEALTHCARE_COVERAGE,isolation,admit,recovered,death,icu_admit,ventilated
0,2020-03-01,2020-03-30,1ff7f10f-a204-4bb1-aa72-dd763fa99482,60584c6a-e26b-4176-a660-86ad84c7ceca,840539006,COVID-19,1ff7f10f-a204-4bb1-aa72-dd763fa99482,2017-08-24,,999-68-6630,...,42.151961,-72.598959,8446.49,1499.08,True,False,True,False,False,False
1,2020-03-13,2020-04-14,9bcf6ed5-d808-44af-98a0-7d78a29ede72,a78e78d9-33bb-40bc-9e42-e47ff7e910cc,840539006,COVID-19,9bcf6ed5-d808-44af-98a0-7d78a29ede72,2016-08-01,,999-15-5895,...,42.17737,-71.281353,94568.4,1870.72,True,False,True,False,False,False
2,2020-03-10,2020-04-02,5163c501-353c-4a82-b863-a3f1df2d6cf1,f26c8d54-0e02-402b-9e98-13be78607b6b,840539006,COVID-19,5163c501-353c-4a82-b863-a3f1df2d6cf1,2004-01-09,,999-73-2461,...,42.075292,-70.757035,375754.62,3131.44,True,False,True,False,False,False
3,2020-02-27,2020-03-17,cc3c806f-4a09-4a89-a990-4286450956be,12a6479c-c830-44a4-a6e6-1db0d5b0d815,840539006,COVID-19,cc3c806f-4a09-4a89-a990-4286450956be,1996-11-15,,999-60-7372,...,42.352434,-71.02861,484758.46,3632.96,True,False,True,False,False,False
4,2020-02-19,2020-02-28,bd1c4ffc-7f1d-4590-adbb-1d6533fb623e,8851313a-7276-49a2-ad6d-c672b3f6ebb5,840539006,COVID-19,bd1c4ffc-7f1d-4590-adbb-1d6533fb623e,2019-06-12,,999-81-4349,...,42.74022,-72.722648,24130.0,903.28,False,True,True,False,False,False


In [85]:
patients.head()

Unnamed: 0,Id,BIRTHDATE,DEATHDATE,SSN,DRIVERS,PASSPORT,PREFIX,FIRST,LAST,SUFFIX,...,BIRTHPLACE,ADDRESS,CITY,STATE,COUNTY,ZIP,LAT,LON,HEALTHCARE_EXPENSES,HEALTHCARE_COVERAGE
0,1ff7f10f-a204-4bb1-aa72-dd763fa99482,2017-08-24,,999-68-6630,,,,Jacinto644,Kris249,,...,Beverly Massachusetts US,888 Hickle Ferry Suite 38,Springfield,Massachusetts,Hampden County,1106.0,42.151961,-72.598959,8446.49,1499.08
1,9bcf6ed5-d808-44af-98a0-7d78a29ede72,2016-08-01,,999-15-5895,,,,Alva958,Krajcik437,,...,Boston Massachusetts US,1048 Skiles Trailer,Walpole,Massachusetts,Norfolk County,2081.0,42.17737,-71.281353,94568.4,1870.72
2,5163c501-353c-4a82-b863-a3f1df2d6cf1,2004-01-09,,999-73-2461,S99999063,,,Jimmie93,Harris789,,...,Worcester Massachusetts US,201 Mitchell Lodge Unit 67,Pembroke,Massachusetts,Plymouth County,,42.075292,-70.757035,375754.62,3131.44
3,cc3c806f-4a09-4a89-a990-4286450956be,1996-11-15,,999-60-7372,S99924941,X9952947X,Mr.,Gregorio366,Auer97,,...,Patras Achaea GR,1050 Lindgren Extension Apt 38,Boston,Massachusetts,Suffolk County,2135.0,42.352434,-71.02861,484758.46,3632.96
4,bd1c4ffc-7f1d-4590-adbb-1d6533fb623e,2019-06-12,,999-81-4349,,,,Karyn217,Mueller846,,...,New Bedford Massachusetts US,570 Abshire Forge Suite 32,Colrain,Massachusetts,Franklin County,,42.74022,-72.722648,24130.0,903.28


### Sample Training idea for reference (used GPT)

In [86]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [87]:
data = {'age': [25, 50, 35], 'gender': ['male', 'female', 'male'], 'symptom1': [1, 0, 1], 'symptom2': [0, 1, 1], 'death': [0, 1, 0]}

In [88]:
data

{'age': [25, 50, 35],
 'gender': ['male', 'female', 'male'],
 'symptom1': [1, 0, 1],
 'symptom2': [0, 1, 1],
 'death': [0, 1, 0]}

In [89]:
df = pd.DataFrame(data)

In [90]:
df

Unnamed: 0,age,gender,symptom1,symptom2,death
0,25,male,1,0,0
1,50,female,0,1,1
2,35,male,1,1,0


In [91]:
X = df.drop('death', axis=1)
y = df['death']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [93]:
X_train

Unnamed: 0,age,gender,symptom1,symptom2
1,50,female,0,1
2,35,male,1,1


### Actual Training

Unnamed: 0,START,STOP,PATIENT,ENCOUNTER,CODE,DESCRIPTION
4,2020-03-01,2020-03-30,1ff7f10f-a204-4bb1-aa72-dd763fa99482,60584c6a-e26b-4176-a660-86ad84c7ceca,840539006,COVID-19
11,2020-03-13,2020-04-14,9bcf6ed5-d808-44af-98a0-7d78a29ede72,a78e78d9-33bb-40bc-9e42-e47ff7e910cc,840539006,COVID-19
17,2020-03-10,2020-04-02,5163c501-353c-4a82-b863-a3f1df2d6cf1,f26c8d54-0e02-402b-9e98-13be78607b6b,840539006,COVID-19
22,2020-02-27,2020-03-17,cc3c806f-4a09-4a89-a990-4286450956be,12a6479c-c830-44a4-a6e6-1db0d5b0d815,840539006,COVID-19
27,2020-02-19,2020-02-28,bd1c4ffc-7f1d-4590-adbb-1d6533fb623e,8851313a-7276-49a2-ad6d-c672b3f6ebb5,840539006,COVID-19
...,...,...,...,...,...,...
1143804,2020-03-05,2020-03-15,12d4e7cc-e86b-46c5-a696-ecd52b929b71,43d9d971-2625-46ac-8378-6a07ae2fc586,840539006,COVID-19
1143826,2020-03-02,2020-03-23,2dbac353-17bb-42da-9aa0-a302f052d1a3,ccd36d91-4c90-411d-bd3d-0163d24684e4,840539006,COVID-19
1143857,2020-03-04,2020-04-08,8cf835a7-f161-4fe9-a559-350c97a3450e,72314c29-60f7-4eb3-b2cc-902ba571c99e,840539006,COVID-19
1143873,2020-03-05,,503d768f-481c-46e2-bcdb-a6116686351a,62169fce-af51-450b-889b-6dca545fadbe,840539006,COVID-19


In [201]:
X = patients[patients['Id'].isin(covid_patient_ids)]

In [202]:
X = X[["BIRTHDATE", "DEATHDATE", "RACE", "GENDER"]]

In [203]:
X

Unnamed: 0,BIRTHDATE,DEATHDATE,RACE,GENDER
0,2017-08-24,,white,M
1,2016-08-01,,white,F
2,2004-01-09,,white,F
3,1996-11-15,,white,M
4,2019-06-12,,white,F
...,...,...,...,...
124140,1948-06-03,,white,M
124141,1941-10-07,,white,F
124146,1944-09-15,,white,M
124147,1934-12-26,2020-03-18,white,M


In [204]:
DESCRIPTIONS = covid_patient_conditions.DESCRIPTION.unique()

In [205]:
symptoms = []

In [206]:
for i in DESCRIPTIONS:
    if "(finding)" in i:
        symptoms.append(i[:i.find('(')-1])

In [207]:
symptoms

['Fever',
 'Cough',
 'Sputum finding',
 'Diarrhea symptom',
 'Loss of taste',
 'Dyspnea',
 'Wheezing',
 'Respiratory distress',
 'Fatigue',
 'Hemoptysis',
 'Muscle pain',
 'Joint pain',
 'Headache',
 'Sore throat symptom',
 'Nausea',
 'Vomiting symptom',
 'Nasal congestion',
 'Chill',
 'Body mass index 30+ - obesity',
 'Passive conjunctival congestion',
 'Body mass index 40+ - severely obese']

In [208]:
for i in symptoms:
    X[i] = 0

In [209]:
X

Unnamed: 0,BIRTHDATE,DEATHDATE,RACE,GENDER,Fever,Cough,Sputum finding,Diarrhea symptom,Loss of taste,Dyspnea,...,Joint pain,Headache,Sore throat symptom,Nausea,Vomiting symptom,Nasal congestion,Chill,Body mass index 30+ - obesity,Passive conjunctival congestion,Body mass index 40+ - severely obese
0,2017-08-24,,white,M,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2016-08-01,,white,F,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2004-01-09,,white,F,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1996-11-15,,white,M,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2019-06-12,,white,F,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124140,1948-06-03,,white,M,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
124141,1941-10-07,,white,F,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
124146,1944-09-15,,white,M,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
124147,1934-12-26,2020-03-18,white,M,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### all patients that died (died = 0, survived = 1)

In [210]:
X["DEATH"] = patients['Id'].isin(deceased_patients).astype(int)

In [211]:
X

Unnamed: 0,BIRTHDATE,DEATHDATE,RACE,GENDER,Fever,Cough,Sputum finding,Diarrhea symptom,Loss of taste,Dyspnea,...,Headache,Sore throat symptom,Nausea,Vomiting symptom,Nasal congestion,Chill,Body mass index 30+ - obesity,Passive conjunctival congestion,Body mass index 40+ - severely obese,DEATH
0,2017-08-24,,white,M,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2016-08-01,,white,F,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2004-01-09,,white,F,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1996-11-15,,white,M,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2019-06-12,,white,F,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124140,1948-06-03,,white,M,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
124141,1941-10-07,,white,F,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
124146,1944-09-15,,white,M,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
124147,1934-12-26,2020-03-18,white,M,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


#### What we have to do:
##### 1. Fill out all the symptoms in table ( 0 or 1)
##### 2. calculate age (DEATHRATE - BIRTHRATE)
##### 3. split training set - validation set - test set