In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
import pandas as pd
import warnings
warnings.simplefilter('ignore', FutureWarning)

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.2.4-tf'

In [4]:
pd.options.display.max_rows

60

In [5]:
pd.options.display.max_columns

20

In [6]:
pd.options.display.max_rows = 300

In [7]:
pd.options.display.max_columns = 200

In [8]:
# Read in CSV files
employment = pd.read_csv('../COVID-19-Predictive-Modelling/assets/data/AGS_data/AGS_Data_Massaged.csv')
covid = pd.read_csv('../COVID-19-Predictive-Modelling/assets/data/COVID County Data/covid_county_data_cleaned.csv')
social = pd.read_csv('../COVID-19-Predictive-Modelling/assets/data/SVI2018_US_COUNTY_2.csv')

In [9]:
employment.head()

Unnamed: 0,fips,county_name,labor_force,file_week_ended,percent_unemployed,total_unemployed
0,1001,Autauga,25819,3/7/20,2.69,694
1,1001,Autauga,25819,3/14/20,2.78,718
2,1001,Autauga,25819,3/21/20,3.34,862
3,1001,Autauga,25819,3/28/20,7.47,1929
4,1001,Autauga,25819,4/4/20,12.92,3336


In [10]:
# Had difficulty merging afer CSV files were cleaned because data type changed for fips, so had to change fips to object
employment.dtypes

fips                  object
county_name           object
labor_force           object
file_week_ended       object
percent_unemployed    object
total_unemployed      object
dtype: object

In [11]:
covid.head()

Unnamed: 0.1,Unnamed: 0,name,date,fips,lat,long,confirmed,deaths,confirmed_diff,deaths_diff,last_update,state,state_abbr
0,0,Jefferson,3/22/20,1073,33.555547,-86.895063,71,0,0,0,3/22/20 23:45,Alabama,AL
1,1,Shelby,3/22/20,1117,33.268798,-86.662326,17,0,0,0,3/22/20 23:45,Alabama,AL
2,2,Lee,3/22/20,1081,32.601549,-85.351322,16,0,0,0,3/22/20 23:45,Alabama,AL
3,3,Madison,3/22/20,1089,34.763271,-86.550696,16,0,0,0,3/22/20 23:45,Alabama,AL
4,4,Tuscaloosa,3/22/20,1125,33.287261,-87.525568,7,0,0,0,3/22/20 23:45,Alabama,AL


In [12]:
covid.dtypes

Unnamed: 0          int64
name               object
date               object
fips                int64
lat               float64
long              float64
confirmed           int64
deaths              int64
confirmed_diff      int64
deaths_diff         int64
last_update        object
state              object
state_abbr         object
dtype: object

In [13]:
covid.fips = covid.fips.astype('str')
covid.head()

Unnamed: 0.1,Unnamed: 0,name,date,fips,lat,long,confirmed,deaths,confirmed_diff,deaths_diff,last_update,state,state_abbr
0,0,Jefferson,3/22/20,1073,33.555547,-86.895063,71,0,0,0,3/22/20 23:45,Alabama,AL
1,1,Shelby,3/22/20,1117,33.268798,-86.662326,17,0,0,0,3/22/20 23:45,Alabama,AL
2,2,Lee,3/22/20,1081,32.601549,-85.351322,16,0,0,0,3/22/20 23:45,Alabama,AL
3,3,Madison,3/22/20,1089,34.763271,-86.550696,16,0,0,0,3/22/20 23:45,Alabama,AL
4,4,Tuscaloosa,3/22/20,1125,33.287261,-87.525568,7,0,0,0,3/22/20 23:45,Alabama,AL


In [14]:
covid.dtypes

Unnamed: 0          int64
name               object
date               object
fips               object
lat               float64
long              float64
confirmed           int64
deaths              int64
confirmed_diff      int64
deaths_diff         int64
last_update        object
state              object
state_abbr         object
dtype: object

In [15]:
# Instead of dropping 50 composite and margin or error variables from the SVI using code, I just deleted them from the csv file (I know, I know)
social.head()

Unnamed: 0,ST,STATE,COUNTY,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ
0,35,NEW MEXICO,Rio Arriba,35039,5860.869195,39307,20044,12398,-999,-999,-999,3669,7083,9318,6280,1330,34397,755,67,7770,264,763,654
1,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546
2,1,ALABAMA,Blount,1009,644.83046,57645,24222,20600,8220,909,22656,7861,10233,13468,8114,1437,7413,934,211,6108,339,856,543
3,1,ALABAMA,Butler,1013,776.838201,20025,10026,6708,4640,567,20430,2141,3806,4566,3492,704,9641,93,134,2625,119,520,322
4,1,ALABAMA,Calhoun,1015,605.867251,115098,53682,45033,20819,4628,24706,12620,19386,25196,23598,4701,31675,1076,1990,7904,772,2599,3112


In [16]:
social.dtypes

ST             int64
STATE         object
COUNTY        object
fips           int64
AREA_SQMI    float64
E_TOTPOP       int64
E_HU           int64
E_HH           int64
E_POV          int64
E_UNEMP        int64
E_PCI          int64
E_NOHSDP       int64
E_AGE65        int64
E_AGE17        int64
E_DISABL       int64
E_SNGPNT       int64
E_MINRTY       int64
E_LIMENG       int64
E_MUNIT        int64
E_MOBILE       int64
E_CROWD        int64
E_NOVEH        int64
E_GROUPQ       int64
dtype: object

In [17]:
social.fips = social.fips.astype('str')
social.head()

Unnamed: 0,ST,STATE,COUNTY,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ
0,35,NEW MEXICO,Rio Arriba,35039,5860.869195,39307,20044,12398,-999,-999,-999,3669,7083,9318,6280,1330,34397,755,67,7770,264,763,654
1,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546
2,1,ALABAMA,Blount,1009,644.83046,57645,24222,20600,8220,909,22656,7861,10233,13468,8114,1437,7413,934,211,6108,339,856,543
3,1,ALABAMA,Butler,1013,776.838201,20025,10026,6708,4640,567,20430,2141,3806,4566,3492,704,9641,93,134,2625,119,520,322
4,1,ALABAMA,Calhoun,1015,605.867251,115098,53682,45033,20819,4628,24706,12620,19386,25196,23598,4701,31675,1076,1990,7904,772,2599,3112


In [18]:
social.dtypes

ST             int64
STATE         object
COUNTY        object
fips          object
AREA_SQMI    float64
E_TOTPOP       int64
E_HU           int64
E_HH           int64
E_POV          int64
E_UNEMP        int64
E_PCI          int64
E_NOHSDP       int64
E_AGE65        int64
E_AGE17        int64
E_DISABL       int64
E_SNGPNT       int64
E_MINRTY       int64
E_LIMENG       int64
E_MUNIT        int64
E_MOBILE       int64
E_CROWD        int64
E_NOVEH        int64
E_GROUPQ       int64
dtype: object

In [19]:
social.shape

(3142, 23)

In [20]:
# For the SVI, -999 reflects missing values. Apparantly, only one county has -999 vaules in their dataset, so removing.
social =  social[(social.iloc[:, 1:] != -999).all(axis=1)]
social.head()

Unnamed: 0,ST,STATE,COUNTY,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ
1,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546
2,1,ALABAMA,Blount,1009,644.83046,57645,24222,20600,8220,909,22656,7861,10233,13468,8114,1437,7413,934,211,6108,339,856,543
3,1,ALABAMA,Butler,1013,776.838201,20025,10026,6708,4640,567,20430,2141,3806,4566,3492,704,9641,93,134,2625,119,520,322
4,1,ALABAMA,Calhoun,1015,605.867251,115098,53682,45033,20819,4628,24706,12620,19386,25196,23598,4701,31675,1076,1990,7904,772,2599,3112
5,1,ALABAMA,Chambers,1017,596.560643,33826,16981,13516,5531,773,22827,4383,6409,7006,5570,1307,14954,36,679,2378,404,989,512


In [21]:
social.shape

(3141, 23)

In [22]:
# Merge social and employment on fips
merge = pd.merge(social, employment, on='fips')
merge.head()

Unnamed: 0,ST,STATE,COUNTY,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,county_name,labor_force,file_week_ended,percent_unemployed,total_unemployed
0,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694
1,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/14/20,2.78,718
2,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/21/20,3.34,862
3,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/28/20,7.47,1929
4,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,4/4/20,12.92,3336


In [23]:
# Merge merge and covid on fips
combined = pd.merge(merge, covid, on='fips')
combined.head()

Unnamed: 0.1,ST,STATE,COUNTY,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,county_name,labor_force,file_week_ended,percent_unemployed,total_unemployed,Unnamed: 0,name,date,lat,long,confirmed,deaths,confirmed_diff,deaths_diff,last_update,state,state_abbr
0,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,21,Autauga,3/22/20,32.539527,-86.644082,0,0,0,0,3/22/20 23:45,Alabama,AL
1,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,3170,Autauga,3/23/20,32.539527,-86.644082,0,0,0,0,3/23/20 23:19,Alabama,AL
2,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,6339,Autauga,3/24/20,32.539527,-86.644082,1,0,1,0,3/24/20 23:37,Alabama,AL
3,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,9507,Autauga,3/25/20,32.539527,-86.644082,4,0,3,0,3/25/20 23:33,Alabama,AL
4,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,12675,Autauga,3/26/20,32.539527,-86.644082,6,0,2,0,3/26/20 23:48,Alabama,AL


In [24]:
combined.dtypes

ST                      int64
STATE                  object
COUNTY                 object
fips                   object
AREA_SQMI             float64
E_TOTPOP                int64
E_HU                    int64
E_HH                    int64
E_POV                   int64
E_UNEMP                 int64
E_PCI                   int64
E_NOHSDP                int64
E_AGE65                 int64
E_AGE17                 int64
E_DISABL                int64
E_SNGPNT                int64
E_MINRTY                int64
E_LIMENG                int64
E_MUNIT                 int64
E_MOBILE                int64
E_CROWD                 int64
E_NOVEH                 int64
E_GROUPQ                int64
county_name            object
labor_force            object
file_week_ended        object
percent_unemployed     object
total_unemployed       object
Unnamed: 0              int64
name                   object
date                   object
lat                   float64
long                  float64
confirmed 

In [25]:
combined.shape

(2855628, 40)

In [26]:
combined.drop('state', axis=1, inplace=True)
combined.head()

Unnamed: 0.1,ST,STATE,COUNTY,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,county_name,labor_force,file_week_ended,percent_unemployed,total_unemployed,Unnamed: 0,name,date,lat,long,confirmed,deaths,confirmed_diff,deaths_diff,last_update,state_abbr
0,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,21,Autauga,3/22/20,32.539527,-86.644082,0,0,0,0,3/22/20 23:45,AL
1,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,3170,Autauga,3/23/20,32.539527,-86.644082,0,0,0,0,3/23/20 23:19,AL
2,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,6339,Autauga,3/24/20,32.539527,-86.644082,1,0,1,0,3/24/20 23:37,AL
3,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,9507,Autauga,3/25/20,32.539527,-86.644082,4,0,3,0,3/25/20 23:33,AL
4,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,12675,Autauga,3/26/20,32.539527,-86.644082,6,0,2,0,3/26/20 23:48,AL


In [27]:
combined.drop('name', axis=1, inplace=True)
combined.head()

Unnamed: 0.1,ST,STATE,COUNTY,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,county_name,labor_force,file_week_ended,percent_unemployed,total_unemployed,Unnamed: 0,date,lat,long,confirmed,deaths,confirmed_diff,deaths_diff,last_update,state_abbr
0,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,21,3/22/20,32.539527,-86.644082,0,0,0,0,3/22/20 23:45,AL
1,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,3170,3/23/20,32.539527,-86.644082,0,0,0,0,3/23/20 23:19,AL
2,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,6339,3/24/20,32.539527,-86.644082,1,0,1,0,3/24/20 23:37,AL
3,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,9507,3/25/20,32.539527,-86.644082,4,0,3,0,3/25/20 23:33,AL
4,1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,12675,3/26/20,32.539527,-86.644082,6,0,2,0,3/26/20 23:48,AL


In [28]:
combined.shape

(2855628, 38)

In [29]:
# Obviously I don't know how to drop multiple columns in the same code block
combined.drop('ST', axis=1, inplace=True)
combined.head()

Unnamed: 0.1,STATE,COUNTY,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,county_name,labor_force,file_week_ended,percent_unemployed,total_unemployed,Unnamed: 0,date,lat,long,confirmed,deaths,confirmed_diff,deaths_diff,last_update,state_abbr
0,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,21,3/22/20,32.539527,-86.644082,0,0,0,0,3/22/20 23:45,AL
1,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,3170,3/23/20,32.539527,-86.644082,0,0,0,0,3/23/20 23:19,AL
2,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,6339,3/24/20,32.539527,-86.644082,1,0,1,0,3/24/20 23:37,AL
3,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,9507,3/25/20,32.539527,-86.644082,4,0,3,0,3/25/20 23:33,AL
4,ALABAMA,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,12675,3/26/20,32.539527,-86.644082,6,0,2,0,3/26/20 23:48,AL


In [30]:
combined.drop('STATE', axis=1, inplace=True)
combined.head()

Unnamed: 0.1,COUNTY,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,county_name,labor_force,file_week_ended,percent_unemployed,total_unemployed,Unnamed: 0,date,lat,long,confirmed,deaths,confirmed_diff,deaths_diff,last_update,state_abbr
0,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,21,3/22/20,32.539527,-86.644082,0,0,0,0,3/22/20 23:45,AL
1,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,3170,3/23/20,32.539527,-86.644082,0,0,0,0,3/23/20 23:19,AL
2,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,6339,3/24/20,32.539527,-86.644082,1,0,1,0,3/24/20 23:37,AL
3,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,9507,3/25/20,32.539527,-86.644082,4,0,3,0,3/25/20 23:33,AL
4,Autauga,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,12675,3/26/20,32.539527,-86.644082,6,0,2,0,3/26/20 23:48,AL


In [31]:
combined.drop('COUNTY', axis=1, inplace=True)
combined.head()

Unnamed: 0.1,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,county_name,labor_force,file_week_ended,percent_unemployed,total_unemployed,Unnamed: 0,date,lat,long,confirmed,deaths,confirmed_diff,deaths_diff,last_update,state_abbr
0,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,21,3/22/20,32.539527,-86.644082,0,0,0,0,3/22/20 23:45,AL
1,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,3170,3/23/20,32.539527,-86.644082,0,0,0,0,3/23/20 23:19,AL
2,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,6339,3/24/20,32.539527,-86.644082,1,0,1,0,3/24/20 23:37,AL
3,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,9507,3/25/20,32.539527,-86.644082,4,0,3,0,3/25/20 23:33,AL
4,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,12675,3/26/20,32.539527,-86.644082,6,0,2,0,3/26/20 23:48,AL


In [32]:
combined.drop('Unnamed: 0', axis=1, inplace=True)
combined.head()

Unnamed: 0,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,county_name,labor_force,file_week_ended,percent_unemployed,total_unemployed,date,lat,long,confirmed,deaths,confirmed_diff,deaths_diff,last_update,state_abbr
0,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,3/22/20,32.539527,-86.644082,0,0,0,0,3/22/20 23:45,AL
1,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,3/23/20,32.539527,-86.644082,0,0,0,0,3/23/20 23:19,AL
2,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,3/24/20,32.539527,-86.644082,1,0,1,0,3/24/20 23:37,AL
3,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,3/25/20,32.539527,-86.644082,4,0,3,0,3/25/20 23:33,AL
4,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,3/26/20,32.539527,-86.644082,6,0,2,0,3/26/20 23:48,AL


In [33]:
combined.drop('date', axis=1, inplace=True)
combined.head()

Unnamed: 0,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,county_name,labor_force,file_week_ended,percent_unemployed,total_unemployed,lat,long,confirmed,deaths,confirmed_diff,deaths_diff,last_update,state_abbr
0,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,32.539527,-86.644082,0,0,0,0,3/22/20 23:45,AL
1,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,32.539527,-86.644082,0,0,0,0,3/23/20 23:19,AL
2,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,32.539527,-86.644082,1,0,1,0,3/24/20 23:37,AL
3,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,32.539527,-86.644082,4,0,3,0,3/25/20 23:33,AL
4,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,32.539527,-86.644082,6,0,2,0,3/26/20 23:48,AL


In [34]:
combined.drop('state_abbr', axis=1, inplace=True)
combined.head()

Unnamed: 0,fips,AREA_SQMI,E_TOTPOP,E_HU,E_HH,E_POV,E_UNEMP,E_PCI,E_NOHSDP,E_AGE65,E_AGE17,E_DISABL,E_SNGPNT,E_MINRTY,E_LIMENG,E_MUNIT,E_MOBILE,E_CROWD,E_NOVEH,E_GROUPQ,county_name,labor_force,file_week_ended,percent_unemployed,total_unemployed,lat,long,confirmed,deaths,confirmed_diff,deaths_diff,last_update
0,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,32.539527,-86.644082,0,0,0,0,3/22/20 23:45
1,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,32.539527,-86.644082,0,0,0,0,3/23/20 23:19
2,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,32.539527,-86.644082,1,0,1,0,3/24/20 23:37
3,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,32.539527,-86.644082,4,0,3,0,3/25/20 23:33
4,1001,594.443459,55200,23315,21115,8422,1065,29372,4204,8050,13369,10465,1586,13788,426,886,4279,299,1191,546,Autauga,25819,3/7/20,2.69,694,32.539527,-86.644082,6,0,2,0,3/26/20 23:48


In [None]:
# Assign X (predictors) and Y (criterion)
X = combined.drop("STATE", axis=1)
Y = combined["label"]
print(X.shape, y.shape)

In [None]:
# Train

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [None]:
# Scale