In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
import numpy as np

#  Import and read the csv.
df = pd.read_csv("final2.csv")
df.drop('Unnamed: 0', axis=1, inplace=True)
df

Unnamed: 0,County Name,State,Date,Cases,Deaths,Series_Complete_Yes,Series_Complete_Pop_Pct,Administered_Dose1_Pop_Pct,Booster_Doses_Vax_Pct
0,Aleutians East Borough,AK,2021-12-15,135,0,2364,70.8,80.3,22.7
1,Aleutians West Census Area,AK,2021-12-15,350,2,3267,58.0,68.8,22.5
2,Bethel Census Area,AK,2021-12-15,6873,28,11577,63.0,70.9,39.3
3,Bristol Bay Borough,AK,2021-12-15,608,2,811,95.0,95.0,32.4
4,Denali Borough,AK,2021-12-15,234,0,1041,49.6,59.2,41.0
...,...,...,...,...,...,...,...,...,...
21567,Sweetwater County,WY,2021-12-21,8243,107,18451,43.6,54.4,33.1
21568,Teton County,WY,2021-12-21,5505,14,20737,88.4,95.0,35.8
21569,Uinta County,WY,2021-12-21,4121,31,9257,45.8,54.3,31.8
21570,Washakie County,WY,2021-12-21,1860,37,3126,40.1,43.9,40.7


In [2]:
df['Death_Pct'] = round(df.Deaths / df.Cases,3)
df.head()

Unnamed: 0,County Name,State,Date,Cases,Deaths,Series_Complete_Yes,Series_Complete_Pop_Pct,Administered_Dose1_Pop_Pct,Booster_Doses_Vax_Pct,Death_Pct
0,Aleutians East Borough,AK,2021-12-15,135,0,2364,70.8,80.3,22.7,0.0
1,Aleutians West Census Area,AK,2021-12-15,350,2,3267,58.0,68.8,22.5,0.006
2,Bethel Census Area,AK,2021-12-15,6873,28,11577,63.0,70.9,39.3,0.004
3,Bristol Bay Borough,AK,2021-12-15,608,2,811,95.0,95.0,32.4,0.003
4,Denali Borough,AK,2021-12-15,234,0,1041,49.6,59.2,41.0,0.0


In [3]:
print(df.Series_Complete_Pop_Pct.median())
print(df.Death_Pct.median())
print(df.Administered_Dose1_Pop_Pct.median())
print(df.Booster_Doses_Vax_Pct.median())

46.3
0.017
52.8
31.3


In [4]:
df['Risk'] = np.where((df['Death_Pct'] >0.017), 1, 0)
df.head()

Unnamed: 0,County Name,State,Date,Cases,Deaths,Series_Complete_Yes,Series_Complete_Pop_Pct,Administered_Dose1_Pop_Pct,Booster_Doses_Vax_Pct,Death_Pct,Risk
0,Aleutians East Borough,AK,2021-12-15,135,0,2364,70.8,80.3,22.7,0.0,0
1,Aleutians West Census Area,AK,2021-12-15,350,2,3267,58.0,68.8,22.5,0.006,0
2,Bethel Census Area,AK,2021-12-15,6873,28,11577,63.0,70.9,39.3,0.004,0
3,Bristol Bay Borough,AK,2021-12-15,608,2,811,95.0,95.0,32.4,0.003,0
4,Denali Borough,AK,2021-12-15,234,0,1041,49.6,59.2,41.0,0.0,0


In [5]:
# Determine the number of unique values in each column.

#filtered_df.drop('County Name', axis=1, inplace=True)
#filtered_df.nunique()

df.drop(['County Name','Date'], axis=1, inplace=True)
df.nunique()

State                            50
Cases                          9688
Deaths                         1298
Series_Complete_Yes           15207
Series_Complete_Pop_Pct         720
Administered_Dose1_Pop_Pct      800
Booster_Doses_Vax_Pct           609
Death_Pct                        80
Risk                              2
dtype: int64

In [6]:
# Generate our categorical variable lists
cat = df.dtypes[df.dtypes == "object"].index.tolist()
cat

['State']

In [7]:
df.head(10)

Unnamed: 0,State,Cases,Deaths,Series_Complete_Yes,Series_Complete_Pop_Pct,Administered_Dose1_Pop_Pct,Booster_Doses_Vax_Pct,Death_Pct,Risk
0,AK,135,0,2364,70.8,80.3,22.7,0.0,0
1,AK,350,2,3267,58.0,68.8,22.5,0.006,0
2,AK,6873,28,11577,63.0,70.9,39.3,0.004,0
3,AK,608,2,811,95.0,95.0,32.4,0.003,0
4,AK,234,0,1041,49.6,59.2,41.0,0.0,0
5,AK,959,10,2304,46.9,52.5,35.9,0.01,0
6,AK,16874,108,50507,52.2,61.3,25.8,0.006,0
7,AK,231,1,1675,66.2,70.9,40.6,0.004,0
8,AK,0,0,1489,69.3,74.5,52.4,,0
9,AK,11584,76,25653,43.7,49.4,37.3,0.007,0


In [8]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(df[cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(cat)
encode_df.head(10)

Unnamed: 0,State_AK,State_AL,State_AR,State_AZ,State_CA,State_CO,State_CT,State_DC,State_DE,State_FL,...,State_SD,State_TN,State_TX,State_UT,State_VA,State_VT,State_WA,State_WI,State_WV,State_WY
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
# Merge one-hot encoded features and drop the originals
new_df = df.merge(encode_df, left_index = True, right_index = True)
new_df = new_df.drop(columns = cat)
new_df

Unnamed: 0,Cases,Deaths,Series_Complete_Yes,Series_Complete_Pop_Pct,Administered_Dose1_Pop_Pct,Booster_Doses_Vax_Pct,Death_Pct,Risk,State_AK,State_AL,...,State_SD,State_TN,State_TX,State_UT,State_VA,State_VT,State_WA,State_WI,State_WV,State_WY
0,135,0,2364,70.8,80.3,22.7,0.000,0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,350,2,3267,58.0,68.8,22.5,0.006,0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,6873,28,11577,63.0,70.9,39.3,0.004,0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,608,2,811,95.0,95.0,32.4,0.003,0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,234,0,1041,49.6,59.2,41.0,0.000,0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21567,8243,107,18451,43.6,54.4,33.1,0.013,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
21568,5505,14,20737,88.4,95.0,35.8,0.003,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
21569,4121,31,9257,45.8,54.3,31.8,0.008,0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
21570,1860,37,3126,40.1,43.9,40.7,0.020,1,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [10]:
# Split our preprocessed data into our features and target arrays
y = new_df['Risk']
X = new_df.drop(columns=['Risk', 'Cases', 'Deaths'])

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [11]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [12]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 30

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)
# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 80)                4480      
                                                                 
 dense_1 (Dense)             (None, 30)                2430      
                                                                 
 dense_2 (Dense)             (None, 1)                 31        
                                                                 
Total params: 6,941
Trainable params: 6,941
Non-trainable params: 0
_________________________________________________________________


In [13]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [14]:
# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=20) 


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [15]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

169/169 - 0s - loss: nan - accuracy: 0.5426 - 475ms/epoch - 3ms/step
Loss: nan, Accuracy: 0.5425551533699036
