### Import Relevant Libraries

In [45]:
%matplotlib inline

In [50]:
import pandas as pd
from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(1)
import matplotlib.pyplot as plt
import numpy as np
import sklearn
import sklearn.datasets
from tensorflow.keras.utils import to_categorical

### Load Data

In [11]:
covid_deaths_county = pd.read_csv("Resources/covid_cases_deaths_counties_april18.csv")
df2 = pd.read_csv("Resources/census1.csv")

### Clean Data

In [26]:
df1 = covid_deaths_county.copy()
df1 = df1.rename(columns={'county':'County'})

In [27]:
df1.head()

Unnamed: 0,date,County,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [31]:
df2 = df2.rename(columns={'CTYNAME':'County'})
df2.head()

Unnamed: 0,SUMLEV,STATE,Unnamed: 2,STNAME,County,YEAR,AGEGRP,TOT_POP,TOT_MALE,TOT_FEMALE
0,50,1,1,Alabama,Autauga,11,0,55601,26995,28606
1,50,1,1,Alabama,Autauga,11,1,3364,1728,1636
2,50,1,1,Alabama,Autauga,11,2,3423,1779,1644
3,50,1,1,Alabama,Autauga,11,3,3882,1977,1905
4,50,1,1,Alabama,Autauga,11,4,3755,1913,1842


In [32]:
raw_data = pd.merge(df1, df2, how='left', on='County')

In [36]:
raw_data = raw_data.dropna()
raw_data

Unnamed: 0,date,County,state,fips,cases,deaths,SUMLEV,STATE,Unnamed: 2,STNAME,YEAR,AGEGRP,TOT_POP,TOT_MALE,TOT_FEMALE
0,2020-01-21,Snohomish,Washington,53061.0,1,0,50.0,53.0,61.0,Washington,11.0,0.0,814901.0,408857.0,406044.0
1,2020-01-21,Snohomish,Washington,53061.0,1,0,50.0,53.0,61.0,Washington,11.0,1.0,52083.0,26942.0,25141.0
2,2020-01-21,Snohomish,Washington,53061.0,1,0,50.0,53.0,61.0,Washington,11.0,2.0,50976.0,26232.0,24744.0
3,2020-01-21,Snohomish,Washington,53061.0,1,0,50.0,53.0,61.0,Washington,11.0,3.0,51514.0,26485.0,25029.0
4,2020-01-21,Snohomish,Washington,53061.0,1,0,50.0,53.0,61.0,Washington,11.0,4.0,46683.0,24161.0,22522.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6489816,2020-04-18,Washakie,Wyoming,56043.0,4,0,50.0,56.0,43.0,Wyoming,11.0,14.0,517.0,268.0,249.0
6489817,2020-04-18,Washakie,Wyoming,56043.0,4,0,50.0,56.0,43.0,Wyoming,11.0,15.0,420.0,204.0,216.0
6489818,2020-04-18,Washakie,Wyoming,56043.0,4,0,50.0,56.0,43.0,Wyoming,11.0,16.0,321.0,175.0,146.0
6489819,2020-04-18,Washakie,Wyoming,56043.0,4,0,50.0,56.0,43.0,Wyoming,11.0,17.0,210.0,84.0,126.0


In [38]:
list(raw_data.columns)

['date',
 'County',
 'state',
 'fips',
 'cases',
 'deaths',
 'SUMLEV',
 'STATE',
 'Unnamed: 2',
 'STNAME',
 'YEAR',
 'AGEGRP',
 'TOT_POP',
 'TOT_MALE',
 'TOT_FEMALE']

In [40]:
raw_data = raw_data.drop(columns=['fips','SUMLEV','STATE','Unnamed: 2','STNAME','YEAR'])

In [41]:
raw_data

Unnamed: 0,date,County,state,cases,deaths,AGEGRP,TOT_POP,TOT_MALE,TOT_FEMALE
0,2020-01-21,Snohomish,Washington,1,0,0.0,814901.0,408857.0,406044.0
1,2020-01-21,Snohomish,Washington,1,0,1.0,52083.0,26942.0,25141.0
2,2020-01-21,Snohomish,Washington,1,0,2.0,50976.0,26232.0,24744.0
3,2020-01-21,Snohomish,Washington,1,0,3.0,51514.0,26485.0,25029.0
4,2020-01-21,Snohomish,Washington,1,0,4.0,46683.0,24161.0,22522.0
...,...,...,...,...,...,...,...,...,...
6489816,2020-04-18,Washakie,Wyoming,4,0,14.0,517.0,268.0,249.0
6489817,2020-04-18,Washakie,Wyoming,4,0,15.0,420.0,204.0,216.0
6489818,2020-04-18,Washakie,Wyoming,4,0,16.0,321.0,175.0,146.0
6489819,2020-04-18,Washakie,Wyoming,4,0,17.0,210.0,84.0,126.0


### Choose dependent and independent variables

In [72]:
y = raw_data[['cases']]
X = raw_data[['AGEGRP','TOT_POP']]

### Split Data Set

In [73]:
# Use train_test_split to create training and testing data
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size= 0.3)

### Standardize Data Set

In [74]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data
X_scaler = StandardScaler().fit(X_train)
# Transform the training and testing data using the X_scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Create Model

In [75]:
# first, create a normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=6, activation='sigmoid', input_dim=2))
model.add(Dense(units=4, activation='sigmoid'))

In [76]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 6)                 18        
_________________________________________________________________
dense_11 (Dense)             (None, 4)                 28        
Total params: 46
Trainable params: 46
Non-trainable params: 0
_________________________________________________________________


In [82]:
# Compile the model
model.compile(optimizer='adam',
              loss= 'sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [85]:
# Fit the model to the training data
model.fit(
    X_train_scaled,
    y_train,
    epochs=100,
    shuffle=True,
    verbose=4
)

Train on 4542216 samples
Epoch 1/300


InvalidArgumentError:  Received a label value of 209 which is outside the valid range of [0, 4).  Label values: 209 1 1 1 3 37 1 33 7 1 13 82 4 16 4 14 26 1 95 24 12 7 5 68 34 15 92 7 1 25 10 6
	 [[node loss/dense_11_loss/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits (defined at <ipython-input-83-a4f9d8bc9726>:7) ]] [Op:__inference_distributed_function_1070]

Function call stack:
distributed_function
