In [1]:
# Import our dependencies
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import datetime
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn import linear_model

%matplotlib inline

In [2]:
# import time series csv and SVI csv
case_series_df = pd.read_csv("resources/altered_data/time_series_cases_state.csv", encoding="ISO-8859-1")
SVI_df = pd.read_csv("resources/altered_data/SVI2018_US_COUNTY.csv", encoding="ISO-8859-1")
combined_cases_deaths_df = pd.read_csv("resources/original_data/combined_cases_deaths_state.csv")

## Inputs: SVI Data merged with case_series_df along with population and deaths from combined_cases_deaths_df 
## Outputs: Predicted total deaths from combined_cases_deaths_df

### Model output may change as another option is to classify high, moderate, and low risk state based on death and case rate

### Additional CSV data may be included in Deliverable 2 

In [3]:
case_series_df

Unnamed: 0,UID,FIPS,COUNTY,STATE,Lat,Long_,Combined_Key,3/1/20,3/2/20,3/3/20,...,3/22/21,3/23/21,3/24/21,3/25/21,3/26/21,3/27/21,3/28/21,3/29/21,3/30/21,3/31/21
0,84001001,1001.0,Autauga,Alabama,32.539527,-86.644082,"Autauga, Alabama, US",0,0,0,...,6517,6525,6533,6540,6543,6562,6570,6577,6580,6589
1,84001003,1003.0,Baldwin,Alabama,30.727750,-87.722071,"Baldwin, Alabama, US",0,0,0,...,20361,20354,20395,20417,20423,20453,20473,20487,20492,20505
2,84001005,1005.0,Barbour,Alabama,31.868263,-85.387129,"Barbour, Alabama, US",0,0,0,...,2213,2213,2216,2218,2221,2224,2226,2226,2227,2227
3,84001007,1007.0,Bibb,Alabama,32.996421,-87.125115,"Bibb, Alabama, US",0,0,0,...,2529,2530,2535,2534,2535,2535,2536,2536,2537,2542
4,84001009,1009.0,Blount,Alabama,33.982109,-86.567906,"Blount, Alabama, US",0,0,0,...,6387,6388,6402,6408,6415,6420,6424,6426,6443,6444
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3108,84056037,56037.0,Sweetwater,Wyoming,41.659439,-108.882788,"Sweetwater, Wyoming, US",0,0,0,...,3947,3951,3953,3961,3990,3990,3990,4002,4006,4022
3109,84056039,56039.0,Teton,Wyoming,43.935225,-110.589080,"Teton, Wyoming, US",0,0,0,...,3530,3532,3547,3549,3562,3562,3562,3593,3593,3609
3110,84056041,56041.0,Uinta,Wyoming,41.287818,-110.547578,"Uinta, Wyoming, US",0,0,0,...,2109,2111,2113,2115,2119,2119,2119,2123,2125,2128
3111,84056043,56043.0,Washakie,Wyoming,43.904516,-107.680187,"Washakie, Wyoming, US",0,0,0,...,889,889,890,890,890,890,890,891,891,891


In [4]:
SVI_df

Unnamed: 0,STATE,ST_ABBR,COUNTY,FIPS,LOCATION,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,...,F_CROWD,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_UNINSUR,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP
0,NEW MEXICO,NM,Rio Arriba,35039,"Rio Arriba County, New Mexico",5860.869195,39307,20044,12398,-999,...,0,0,0,1,-999,4160,588,10.6,1.5,32290
1,ALABAMA,AL,Autauga,1001,"Autauga County, Alabama",594.443459,55200,23315,21115,8422,...,0,0,0,0,0,3875,508,7.1,0.9,37301
2,ALABAMA,AL,Blount,1009,"Blount County, Alabama",644.830460,57645,24222,20600,8220,...,0,0,0,0,0,6303,732,11.0,1.3,40036
3,ALABAMA,AL,Butler,1013,"Butler County, Alabama",776.838201,20025,10026,6708,4640,...,0,0,0,0,0,2005,340,10.2,1.7,17280
4,ALABAMA,AL,Calhoun,1015,"Calhoun County, Alabama",605.867251,115098,53682,45033,20819,...,0,0,0,0,0,10686,796,9.4,0.7,117894
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3137,TEXAS,TX,Duval,48131,"Duval County, Texas",1793.476183,11355,5592,3511,2751,...,1,1,0,2,10,2656,406,24.5,3.7,9592
3138,TEXAS,TX,Zapata,48505,"Zapata County, Texas",998.411980,14369,6388,4405,5609,...,1,0,0,2,10,4206,484,29.3,3.4,10924
3139,TEXAS,TX,Zavala,48507,"Zavala County, Texas",1297.406535,12131,4344,3509,4150,...,1,1,0,2,10,2194,354,18.4,3.0,10672
3140,TEXAS,TX,Dimmit,48127,"Dimmit County, Texas",1328.884075,10663,4408,3309,3148,...,1,1,0,2,11,2345,539,22.2,5.1,8229


In [5]:
#load updated SVI_case_series_df
updated_SVI_case_series = pd.read_csv("resources/merged_data/SVI_case_series_merge_updated.csv")
updated_SVI_case_series

Unnamed: 0,FIPS,COUNTY,ST_ABBR,STATE,Lat,Long_,Combined_Key,3/1/20,3/2/20,3/3/20,...,F_CROWD,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_UNINSUR,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP
0,1001.0,Autauga,AL,Alabama,32.539527,-86.644082,"Autauga, Alabama, US",0,0,0,...,0.0,0.0,0.0,0.0,0.0,3875.0,508.0,7.1,0.9,37301.0
1,1003.0,Baldwin,AL,Alabama,30.727750,-87.722071,"Baldwin, Alabama, US",0,0,0,...,0.0,0.0,0.0,1.0,1.0,20864.0,1646.0,10.2,0.8,195677.0
2,1005.0,Barbour,AL,Alabama,31.868263,-85.387129,"Barbour, Alabama, US",0,0,0,...,0.0,0.0,1.0,2.0,8.0,2558.0,363.0,11.2,1.6,25052.0
3,1007.0,Bibb,AL,Alabama,32.996421,-87.125115,"Bibb, Alabama, US",0,0,0,...,0.0,0.0,1.0,2.0,2.0,1619.0,396.0,7.9,1.9,17696.0
4,1009.0,Blount,AL,Alabama,33.982109,-86.567906,"Blount, Alabama, US",0,0,0,...,0.0,0.0,0.0,0.0,0.0,6303.0,732.0,11.0,1.3,40036.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3107,56037.0,Sweetwater,WY,Wyoming,41.659439,-108.882788,"Sweetwater, Wyoming, US",0,0,0,...,0.0,0.0,0.0,0.0,1.0,5240.0,794.0,12.0,1.8,45734.0
3108,56039.0,Teton,WY,Wyoming,43.935225,-110.589080,"Teton, Wyoming, US",0,0,0,...,1.0,0.0,0.0,1.0,2.0,2305.0,603.0,10.0,2.6,29426.0
3109,56041.0,Uinta,WY,Wyoming,41.287818,-110.547578,"Uinta, Wyoming, US",0,0,0,...,0.0,0.0,0.0,0.0,1.0,2499.0,452.0,12.2,2.2,19734.0
3110,56043.0,Washakie,WY,Wyoming,43.904516,-107.680187,"Washakie, Wyoming, US",0,0,0,...,0.0,0.0,0.0,0.0,0.0,1223.0,245.0,15.4,3.1,7859.0


In [6]:
updated_SVI_case_series.isna().sum()

FIPS          3
COUNTY        0
ST_ABBR       9
STATE         0
Lat           0
             ..
E_UNINSUR     9
M_UNINSUR     9
EP_UNINSUR    9
MP_UNINSUR    9
E_DAYPOP      9
Length: 487, dtype: int64

In [7]:
SVI_cases_df = updated_SVI_case_series.fillna("0")
SVI_cases_df.isna().sum()

FIPS          0
COUNTY        0
ST_ABBR       0
STATE         0
Lat           0
             ..
E_UNINSUR     0
M_UNINSUR     0
EP_UNINSUR    0
MP_UNINSUR    0
E_DAYPOP      0
Length: 487, dtype: int64

In [8]:
SVI_cases_df.dtypes

FIPS           object
COUNTY         object
ST_ABBR        object
STATE          object
Lat           float64
               ...   
E_UNINSUR      object
M_UNINSUR      object
EP_UNINSUR     object
MP_UNINSUR     object
E_DAYPOP       object
Length: 487, dtype: object

In [9]:
#change dtypes
SVI_cases_df[["AREA_SQMI","E_TOTPOP", "E_HU", "E_HH", "E_POV", "E_UNEMP","E_PCI","E_NOHSDP","E_AGE65","E_AGE17", "E_DISABL","E_SNGPNT", "E_MINRTY","E_LIMENG","E_MUNIT","E_MOBILE", "E_CROWD", "E_NOVEH","E_GROUPQ", "EP_POV","EP_UNEMP","EP_PCI","EP_NOHSDP","EP_AGE65", "EP_AGE17", "EP_DISABL", "EP_SNGPNT","EP_MINRTY","EP_LIMENG", "EP_MUNIT", "EP_MOBILE", "EP_CROWD","EP_NOVEH","EP_GROUPQ","EPL_POV","EPL_UNEMP","EPL_PCI","EPL_NOHSDP","SPL_THEME1", "RPL_THEME1","EPL_AGE65","EPL_AGE17","EPL_DISABL","EPL_SNGPNT","SPL_THEME2","RPL_THEME2","EPL_MINRTY","EPL_LIMENG","SPL_THEME3","RPL_THEME3","EPL_MUNIT","EPL_MOBILE","EPL_CROWD","EPL_NOVEH","EPL_GROUPQ","SPL_THEME4","RPL_THEME4","SPL_THEMES","RPL_THEMES","F_POV","F_UNEMP","F_PCI","F_NOHSDP","F_THEME1","F_AGE65","F_AGE17","F_DISABL","F_SNGPNT","F_THEME2","F_MINRTY","F_LIMENG","F_THEME3","F_MUNIT","F_MOBILE","F_CROWD","F_NOVEH","F_GROUPQ","F_THEME4","F_TOTAL","E_UNINSUR","M_UNINSUR","EP_UNINSUR","MP_UNINSUR","E_DAYPOP"]] = SVI_cases_df[["AREA_SQMI","E_TOTPOP", "E_HU", "E_HH", "E_POV", "E_UNEMP","E_PCI","E_NOHSDP","E_AGE65","E_AGE17", "E_DISABL","E_SNGPNT", "E_MINRTY","E_LIMENG","E_MUNIT","E_MOBILE", "E_CROWD", "E_NOVEH","E_GROUPQ", "EP_POV","EP_UNEMP","EP_PCI","EP_NOHSDP","EP_AGE65", "EP_AGE17", "EP_DISABL", "EP_SNGPNT","EP_MINRTY","EP_LIMENG", "EP_MUNIT", "EP_MOBILE", "EP_CROWD","EP_NOVEH","EP_GROUPQ","EPL_POV","EPL_UNEMP","EPL_PCI","EPL_NOHSDP","SPL_THEME1", "RPL_THEME1","EPL_AGE65","EPL_AGE17","EPL_DISABL","EPL_SNGPNT","SPL_THEME2","RPL_THEME2","EPL_MINRTY","EPL_LIMENG","SPL_THEME3","RPL_THEME3","EPL_MUNIT","EPL_MOBILE","EPL_CROWD","EPL_NOVEH","EPL_GROUPQ","SPL_THEME4","RPL_THEME4","SPL_THEMES","RPL_THEMES","F_POV","F_UNEMP","F_PCI","F_NOHSDP","F_THEME1","F_AGE65","F_AGE17","F_DISABL","F_SNGPNT","F_THEME2","F_MINRTY","F_LIMENG","F_THEME3","F_MUNIT","F_MOBILE","F_CROWD","F_NOVEH","F_GROUPQ","F_THEME4","F_TOTAL","E_UNINSUR","M_UNINSUR","EP_UNINSUR","MP_UNINSUR","E_DAYPOP"]].astype('int')

In [10]:
#check dtypes again
SVI_cases_df.dtypes

FIPS           object
COUNTY         object
ST_ABBR        object
STATE          object
Lat           float64
               ...   
E_UNINSUR       int64
M_UNINSUR       int64
EP_UNINSUR      int64
MP_UNINSUR      int64
E_DAYPOP        int64
Length: 487, dtype: object

In [11]:
#convert the dates to datetime64
SVI_cases_df= SVI_cases_df.dtypes[SVI_cases_df.dtypes =="str"].astype("datetime64")

In [12]:
combined_cases_deaths_df 

Unnamed: 0,County,State,Population,Total_cases,Total_deaths
0,Autauga,Alabama,55869,9780,142
1,Baldwin,Alabama,223234,36579,518
2,Barbour,Alabama,24686,3519,71
3,Bibb,Alabama,22394,4157,83
4,Blount,Alabama,57826,9870,162
...,...,...,...,...,...
3337,Teton,Wyoming,23464,4943,13
3338,Uinta,Wyoming,20226,3550,25
3339,Unassigned,Wyoming,0,0,0
3340,Washakie,Wyoming,7805,1398,27


In [13]:
# combined_cases_deaths_df ("Key") = combined_cases_deaths_df(columns=["County", "State"], axis =1)
combined_cases_deaths_df ["Key"] = combined_cases_deaths_df["State"] + "-" + combined_cases_deaths_df["County"]
combined_cases_deaths_df

Unnamed: 0,County,State,Population,Total_cases,Total_deaths,Key
0,Autauga,Alabama,55869,9780,142,Alabama-Autauga
1,Baldwin,Alabama,223234,36579,518,Alabama-Baldwin
2,Barbour,Alabama,24686,3519,71,Alabama-Barbour
3,Bibb,Alabama,22394,4157,83,Alabama-Bibb
4,Blount,Alabama,57826,9870,162,Alabama-Blount
...,...,...,...,...,...,...
3337,Teton,Wyoming,23464,4943,13,Wyoming-Teton
3338,Uinta,Wyoming,20226,3550,25,Wyoming-Uinta
3339,Unassigned,Wyoming,0,0,0,Wyoming-Unassigned
3340,Washakie,Wyoming,7805,1398,27,Wyoming-Washakie


In [14]:
updated_SVI_case_series["Key"]=updated_SVI_case_series["STATE"]+ "-"+ updated_SVI_case_series["COUNTY"]
updated_SVI_case_series

Unnamed: 0,FIPS,COUNTY,ST_ABBR,STATE,Lat,Long_,Combined_Key,3/1/20,3/2/20,3/3/20,...,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_UNINSUR,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP,Key
0,1001.0,Autauga,AL,Alabama,32.539527,-86.644082,"Autauga, Alabama, US",0,0,0,...,0.0,0.0,0.0,0.0,3875.0,508.0,7.1,0.9,37301.0,Alabama-Autauga
1,1003.0,Baldwin,AL,Alabama,30.727750,-87.722071,"Baldwin, Alabama, US",0,0,0,...,0.0,0.0,1.0,1.0,20864.0,1646.0,10.2,0.8,195677.0,Alabama-Baldwin
2,1005.0,Barbour,AL,Alabama,31.868263,-85.387129,"Barbour, Alabama, US",0,0,0,...,0.0,1.0,2.0,8.0,2558.0,363.0,11.2,1.6,25052.0,Alabama-Barbour
3,1007.0,Bibb,AL,Alabama,32.996421,-87.125115,"Bibb, Alabama, US",0,0,0,...,0.0,1.0,2.0,2.0,1619.0,396.0,7.9,1.9,17696.0,Alabama-Bibb
4,1009.0,Blount,AL,Alabama,33.982109,-86.567906,"Blount, Alabama, US",0,0,0,...,0.0,0.0,0.0,0.0,6303.0,732.0,11.0,1.3,40036.0,Alabama-Blount
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3107,56037.0,Sweetwater,WY,Wyoming,41.659439,-108.882788,"Sweetwater, Wyoming, US",0,0,0,...,0.0,0.0,0.0,1.0,5240.0,794.0,12.0,1.8,45734.0,Wyoming-Sweetwater
3108,56039.0,Teton,WY,Wyoming,43.935225,-110.589080,"Teton, Wyoming, US",0,0,0,...,0.0,0.0,1.0,2.0,2305.0,603.0,10.0,2.6,29426.0,Wyoming-Teton
3109,56041.0,Uinta,WY,Wyoming,41.287818,-110.547578,"Uinta, Wyoming, US",0,0,0,...,0.0,0.0,0.0,1.0,2499.0,452.0,12.2,2.2,19734.0,Wyoming-Uinta
3110,56043.0,Washakie,WY,Wyoming,43.904516,-107.680187,"Washakie, Wyoming, US",0,0,0,...,0.0,0.0,0.0,0.0,1223.0,245.0,15.4,3.1,7859.0,Wyoming-Washakie


In [15]:
SVI_case_deaths_df = pd.merge(updated_SVI_case_series,combined_cases_deaths_df, on= "Key", how="outer")
SVI_case_deaths_df

Unnamed: 0,FIPS,COUNTY,ST_ABBR,STATE,Lat,Long_,Combined_Key,3/1/20,3/2/20,3/3/20,...,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP,Key,County,State,Population,Total_cases,Total_deaths
0,1001.0,Autauga,AL,Alabama,32.539527,-86.644082,"Autauga, Alabama, US",0.0,0.0,0.0,...,508.0,7.1,0.9,37301.0,Alabama-Autauga,Autauga,Alabama,55869,9780,142
1,1003.0,Baldwin,AL,Alabama,30.727750,-87.722071,"Baldwin, Alabama, US",0.0,0.0,0.0,...,1646.0,10.2,0.8,195677.0,Alabama-Baldwin,Baldwin,Alabama,223234,36579,518
2,1005.0,Barbour,AL,Alabama,31.868263,-85.387129,"Barbour, Alabama, US",0.0,0.0,0.0,...,363.0,11.2,1.6,25052.0,Alabama-Barbour,Barbour,Alabama,24686,3519,71
3,1007.0,Bibb,AL,Alabama,32.996421,-87.125115,"Bibb, Alabama, US",0.0,0.0,0.0,...,396.0,7.9,1.9,17696.0,Alabama-Bibb,Bibb,Alabama,22394,4157,83
4,1009.0,Blount,AL,Alabama,33.982109,-86.567906,"Blount, Alabama, US",0.0,0.0,0.0,...,732.0,11.0,1.3,40036.0,Alabama-Blount,Blount,Alabama,57826,9870,162
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3337,,,,,,,,,,,...,,,,,West Virginia-Unassigned,Unassigned,West Virginia,0,0,0
3338,,,,,,,,,,,...,,,,,Wisconsin-Out of WI,Out of WI,Wisconsin,0,0,0
3339,,,,,,,,,,,...,,,,,Wisconsin-Unassigned,Unassigned,Wisconsin,0,0,0
3340,,,,,,,,,,,...,,,,,Wyoming-Out of WY,Out of WY,Wyoming,0,0,0


In [16]:
SVI_case_deaths_df.to_csv("SVI_case_deaths_df.csv")

In [37]:
#read in updated SVI_case_deaths_df
SVI_case_deaths_updated = pd.read_csv("SVI_case_deaths_df_updated.csv", dtype = {"FIPS":str})
SVI_case_deaths_updated 

Unnamed: 0,FIPS,Key,STATE,Lat,Long,AREA_SQMI,Population,Total_cases,Total_deaths,E_TOTPOP,...,F_CROWD,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_UNINSUR,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP
0,1001,Alabama-Autauga,Alabama,32.539527,-86.644082,594.443459,55869,9780,142,55200.0,...,0.0,0.0,0.0,0.0,0.0,3875.0,508.0,7.1,0.9,37301.0
1,1003,Alabama-Baldwin,Alabama,30.727750,-87.722071,1589.793007,223234,36579,518,208107.0,...,0.0,0.0,0.0,1.0,1.0,20864.0,1646.0,10.2,0.8,195677.0
2,1005,Alabama-Barbour,Alabama,31.868263,-85.387129,885.001636,24686,3519,71,25782.0,...,0.0,0.0,1.0,2.0,8.0,2558.0,363.0,11.2,1.6,25052.0
3,1007,Alabama-Bibb,Alabama,32.996421,-87.125115,622.461089,22394,4157,83,22527.0,...,0.0,0.0,1.0,2.0,2.0,1619.0,396.0,7.9,1.9,17696.0
4,1009,Alabama-Blount,Alabama,33.982109,-86.567906,644.830460,57826,9870,162,57645.0,...,0.0,0.0,0.0,0.0,0.0,6303.0,732.0,11.0,1.3,40036.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3107,56037,Wyoming-Sweetwater,Wyoming,41.659439,-108.882788,10426.975730,42343,6959,65,44117.0,...,0.0,0.0,0.0,0.0,1.0,5240.0,794.0,12.0,1.8,45734.0
3108,56039,Wyoming-Teton,Wyoming,43.935225,-110.589080,3996.844622,23464,4943,13,23059.0,...,1.0,0.0,0.0,1.0,2.0,2305.0,603.0,10.0,2.6,29426.0
3109,56041,Wyoming-Uinta,Wyoming,41.287818,-110.547578,2081.719807,20226,3550,25,20609.0,...,0.0,0.0,0.0,0.0,1.0,2499.0,452.0,12.2,2.2,19734.0
3110,56043,Wyoming-Washakie,Wyoming,43.904516,-107.680187,2238.672972,7805,1398,27,8129.0,...,0.0,0.0,0.0,0.0,0.0,1223.0,245.0,15.4,3.1,7859.0


In [38]:
#drop state a
SVI_case_deaths_nn = SVI_case_deaths_updated.drop(columns=["STATE"])
SVI_case_deaths_nn.head()

Unnamed: 0,FIPS,Key,Lat,Long,AREA_SQMI,Population,Total_cases,Total_deaths,E_TOTPOP,E_HU,...,F_CROWD,F_NOVEH,F_GROUPQ,F_THEME4,F_TOTAL,E_UNINSUR,M_UNINSUR,EP_UNINSUR,MP_UNINSUR,E_DAYPOP
0,1001,Alabama-Autauga,32.539527,-86.644082,594.443459,55869,9780,142,55200.0,23315.0,...,0.0,0.0,0.0,0.0,0.0,3875.0,508.0,7.1,0.9,37301.0
1,1003,Alabama-Baldwin,30.72775,-87.722071,1589.793007,223234,36579,518,208107.0,111945.0,...,0.0,0.0,0.0,1.0,1.0,20864.0,1646.0,10.2,0.8,195677.0
2,1005,Alabama-Barbour,31.868263,-85.387129,885.001636,24686,3519,71,25782.0,11937.0,...,0.0,0.0,1.0,2.0,8.0,2558.0,363.0,11.2,1.6,25052.0
3,1007,Alabama-Bibb,32.996421,-87.125115,622.461089,22394,4157,83,22527.0,9161.0,...,0.0,0.0,1.0,2.0,2.0,1619.0,396.0,7.9,1.9,17696.0
4,1009,Alabama-Blount,33.982109,-86.567906,644.83046,57826,9870,162,57645.0,24222.0,...,0.0,0.0,0.0,0.0,0.0,6303.0,732.0,11.0,1.3,40036.0


In [39]:
# check for dtypes 
SVI_case_deaths_nn.dtypes

FIPS           object
Key            object
Lat           float64
Long          float64
AREA_SQMI     float64
               ...   
E_UNINSUR     float64
M_UNINSUR     float64
EP_UNINSUR    float64
MP_UNINSUR    float64
E_DAYPOP      float64
Length: 91, dtype: object

In [40]:
# check for NaN values
SVI_case_deaths_nn.isna().sum()

FIPS          3
Key           0
Lat           0
Long          0
AREA_SQMI     9
             ..
E_UNINSUR     9
M_UNINSUR     9
EP_UNINSUR    9
MP_UNINSUR    9
E_DAYPOP      9
Length: 91, dtype: int64

In [47]:
SVI_case_deaths_nn_cat=SVI_case_deaths_nn.dtypes[SVI_case_deaths_nn.dtypes =="object"].index.tolist()
SVI_case_deaths_nn[SVI_case_deaths_nn_cat].nunique()

FIPS    3109
Key     3112
dtype: int64

In [48]:
# SVI_case_deaths_nn_cat=SVI_case_deaths_nn.dtypes[SVI_case_deaths_nn.dtypes =="string"].index.tolist()
# SVI_case_deaths_nn[SVI_case_deaths_nn_cat].nunique()

### Need to establish feature importance

In [49]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df=pd.DataFrame(enc.fit_transform(SVI_case_deaths_nn[SVI_case_deaths_nn_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names_out(SVI_case_deaths_nn_cat)
encode_df.head()

Unnamed: 0,FIPS_10001,FIPS_10003,FIPS_10005,FIPS_1001,FIPS_1003,FIPS_1005,FIPS_1007,FIPS_1009,FIPS_1011,FIPS_1013,...,Key_Wyoming-Niobrara,Key_Wyoming-Park,Key_Wyoming-Platte,Key_Wyoming-Sheridan,Key_Wyoming-Sublette,Key_Wyoming-Sweetwater,Key_Wyoming-Teton,Key_Wyoming-Uinta,Key_Wyoming-Washakie,Key_Wyoming-Weston
0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [50]:
# Merge one-hot encoded features and drop the originals
encoded_application_df = SVI_case_deaths_nn.merge(encode_df, left_index = True, right_index = True).drop(columns = SVI_case_deaths_nn_cat)
encoded_application_df.head()

Unnamed: 0,Lat,Long,AREA_SQMI,Population,Total_cases,Total_deaths,E_TOTPOP,E_HU,E_HH,E_POV,...,Key_Wyoming-Niobrara,Key_Wyoming-Park,Key_Wyoming-Platte,Key_Wyoming-Sheridan,Key_Wyoming-Sublette,Key_Wyoming-Sweetwater,Key_Wyoming-Teton,Key_Wyoming-Uinta,Key_Wyoming-Washakie,Key_Wyoming-Weston
0,32.539527,-86.644082,594.443459,55869,9780,142,55200.0,23315.0,21115.0,8422.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,30.72775,-87.722071,1589.793007,223234,36579,518,208107.0,111945.0,78622.0,21653.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,31.868263,-85.387129,885.001636,24686,3519,71,25782.0,11937.0,9186.0,6597.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,32.996421,-87.125115,622.461089,22394,4157,83,22527.0,9161.0,6840.0,2863.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,33.982109,-86.567906,644.83046,57826,9870,162,57645.0,24222.0,20600.0,8220.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [51]:
# Split our preprocessed data into our features and target arrays
y = SVI_case_deaths_updated.loc[:, "Total_deaths"].values
X = SVI_case_deaths_updated.drop(columns=["Total_deaths"], axis=1).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [52]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

ValueError: could not convert string to float: 'Colorado-Crowley'

### start training and evaluating model

In [53]:

# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 100
hidden_nodes_layer2 = 50
nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="sigmoid"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 2000)              184000    
                                                                 
 dense_1 (Dense)             (None, 600)               1200600   
                                                                 
 dense_2 (Dense)             (None, 1)                 601       
                                                                 
Total params: 1,385,201
Trainable params: 1,385,201
Non-trainable params: 0
_________________________________________________________________


2022-08-03 19:26:58.712407: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# Compile the model #need different loss
nn.compile(loss="MeanSquaredError", optimizer="adam", metrics=["accuracy"])