## Machine Learning Conclusion
### 1) Best result with shallow or deep (2 hidden layers) neural network (177 inputs, relu or tanh activation functions with linear output)
### 2) 1 or 2 hidden layers have comparable R-squared values (~0.6 test, ~0.9 train [overfitting is present]); 3 layers yields very poor performance

## Connect to SQLite database

In [None]:
# imports for machine learning
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

import matplotlib.pyplot as plt

%matplotlib inline

In [1]:
# imports for database connection
import pandas as pd
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import create_engine, func

# import these to view table column headers & rows
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select

In [2]:
engine = create_engine("sqlite:///../resources/mobility_db.db")

In [3]:
# reflect an existing database into a new model (creates base class for automap schema)
Base = automap_base()

# reflect the tables
Base.prepare(engine, reflect=True)

In [4]:
Base.classes.keys()

['mobility_tbl']

In [None]:
# save references to each table
mobility = Base.classes.mobility_tbl

In [None]:
# Create session (link) from Python to the database
session = sessionmaker(bind=engine)()

# View table column headers & rows
mobility_table = select('*').select_from(mobility)
mobility_result = session.execute(mobility_table).fetchall()
print(mobility.__table__.columns.keys())
# print(mobility_result)

In [None]:
# perform a query to retrieve the data
results = session.query(mobility.index,mobility.date,mobility.province,mobility.retail_and_recreation,mobility.grocery_and_pharmacy,mobility.parks,mobility.transit_stations,mobility.workplaces,mobility.residential,mobility.DailyTotals).all()

# save the query results as a Pandas DataFrame and set the index
df = pd.DataFrame(results, columns=['index','date', 'province', 'retail_and_recreation', 'grocery_and_pharmacy', 'parks', 'transit_stations', 'workplaces', 'residential', 'DailyTotals'])
df.set_index(df['index'], inplace=True)

# sort the dataframe by index
df = df.sort_index()

## Correlate mobility, provinces, date vs daily totals (of covid cases) per province

In [None]:
# preview df
df

In [None]:
# encode provinces
provEnc_df = pd.get_dummies(df['province'])
provEnc_df

In [None]:
# encode dates
dateEnc_df = pd.get_dummies(df['date'])
dateEnc_df

In [None]:
# drop columns from df
numFeatures_df = df[['retail_and_recreation','grocery_and_pharmacy', 'parks',\
                     'transit_stations', 'workplaces','residential']]
numFeatures_df

In [None]:
# join dfs into monster "features_df"
features_df = numFeatures_df.join([provEnc_df, dateEnc_df], how="inner")
features_df

## Machine Learning

In [None]:
# create features(X) and target(y) sets
X = features_df.values

y = df['DailyTotals'].values
y = y.reshape(-1,1)

In [None]:
# create training and testing datasets
# train 95% of data, test 5%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

# scale data
X_scaler = StandardScaler()
X_scaler.fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Shallow Net (relu, linear): 100 Hidden Neurons

In [None]:
# define model - shallow neural net
number_hidden_nodes = 100
number_input_features = 177

nn = Sequential()
nn.add(Dense(units=number_hidden_nodes, input_dim=number_input_features, activation="relu"))
nn.add(Dense(1, activation="linear"))

nn.summary()

# compile model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# train model
model_1 = nn.fit(X_train_scaled, y_train, epochs=100)

# Plot the train and test loss function
plt.plot(model_1.history["loss"])
plt.title("loss_function - 1 hidden layer")
plt.legend(["loss"])
plt.show()

y_train_pred = nn.predict(X_train_scaled)
y_test_pred = nn.predict(X_test_scaled)

print(r2_score(y_train, y_train_pred))
print(r2_score(y_test, y_test_pred))

## Shallow Net (relu, linear): 354 Hidden Neurons (2X Features)

In [None]:
# define model - shallow neural net
number_hidden_nodes = 354
number_input_features = 177

nn = Sequential()
nn.add(Dense(units=number_hidden_nodes, input_dim=number_input_features, activation="relu"))
nn.add(Dense(1, activation="linear"))

nn.summary()

# compile model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# train model
model_2 = nn.fit(X_train_scaled, y_train, epochs=100)

# Plot the train and test loss function
plt.plot(model_2.history["loss"])
plt.title("loss_function - 1 hidden layer")
plt.legend(["loss"])
plt.show()

y_train_pred = nn.predict(X_train_scaled)
y_test_pred = nn.predict(X_test_scaled)

print(r2_score(y_train, y_train_pred))
print(r2_score(y_test, y_test_pred))

## Shallow Net (relu, linear): 531 Hidden Neurons (3X Features)

In [None]:
# define model - shallow neural net
number_hidden_nodes = 531
number_input_features = 177

nn = Sequential()
nn.add(Dense(units=number_hidden_nodes, input_dim=number_input_features, activation="relu"))
nn.add(Dense(1, activation="linear"))

nn.summary()

# compile model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# train model
model_3 = nn.fit(X_train_scaled, y_train, epochs=100)

# Plot the train and test loss function
plt.plot(model_3.history["loss"])
plt.title("loss_function - 1 hidden layer")
plt.legend(["loss"])
plt.show()

y_train_pred = nn.predict(X_train_scaled)
y_test_pred = nn.predict(X_test_scaled)

print(r2_score(y_train, y_train_pred))
print(r2_score(y_test, y_test_pred))

## Shallow Net (tanh, linear): 531 Hidden Neurons (3X Features)

In [None]:
# define model - shallow neural net
number_hidden_nodes = 531
number_input_features = 177

nn = Sequential()
nn.add(Dense(units=number_hidden_nodes, input_dim=number_input_features, activation="tanh"))
nn.add(Dense(1, activation="linear"))

nn.summary()

# compile model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# train model
model_3 = nn.fit(X_train_scaled, y_train, epochs=100)

# Plot the train and test loss function
plt.plot(model_3.history["loss"])
plt.title("loss_function - 1 hidden layer")
plt.legend(["loss"])
plt.show()

y_train_pred = nn.predict(X_train_scaled)
y_test_pred = nn.predict(X_test_scaled)

print(r2_score(y_train, y_train_pred))
print(r2_score(y_test, y_test_pred))

## Deep Learning

### relu, relu, linear, 2 layers (160N, 80N)

In [None]:
# Define the model - deep neural net
number_input_features = 177
hidden_nodes_layer1 = 160
hidden_nodes_layer2 = 80

nn = Sequential()
# First hidden layer
nn.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))
# Second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation="relu"))
# Output layer
nn.add(Dense(units=1, activation="linear"))

# Compile model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# Fit the model
model_4 = nn.fit(X_train_scaled, y_train, epochs=100)

# Plot the train and test loss function
plt.plot(model_4.history["loss"])
plt.title("loss_function - 2 hidden layers")
plt.legend(["loss"])
plt.show()

y_train_pred = nn.predict(X_train_scaled)
y_test_pred = nn.predict(X_test_scaled)

print(r2_score(y_train, y_train_pred))
print(r2_score(y_test, y_test_pred))

### tanh, tanh, linear, 2 layers (160N, 80N) [BEST MODEL]

In [None]:
# Define the model - deep neural net
number_input_features = 177
hidden_nodes_layer1 = 160
hidden_nodes_layer2 = 80

nn = Sequential()
# First hidden layer
nn.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="tanh"))
# Second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation="tanh"))
# Output layer
nn.add(Dense(units=1, activation="linear"))

# Compile model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# Fit the model
model_5 = nn.fit(X_train_scaled, y_train, epochs=100)

# Plot the train and test loss function
plt.plot(model_5.history["loss"])
plt.title("loss_function - 2 hidden layers")
plt.legend(["loss"])
plt.show()

y_train_pred = nn.predict(X_train_scaled)
y_test_pred = nn.predict(X_test_scaled)

print(f'r2 score (train): {r2_score(y_train, y_train_pred)}')
print(f'r2 score (test): {r2_score(y_test, y_test_pred)}')

### tanh, tanh, tanh, linear, 3 layers (160N, 80N, 40N)

In [None]:
# Define the model - deep neural net
number_input_features = 177
hidden_nodes_layer1 = 160
hidden_nodes_layer2 = 80
hidden_nodes_layer3 = 40

nn = Sequential()
# First hidden layer
nn.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="tanh"))
# Second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation="tanh"))
# Third hidden layer
nn.add(Dense(units=hidden_nodes_layer3, activation="tanh"))
# Output layer
nn.add(Dense(units=1, activation="linear"))

# Compile model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# Fit the model
model_6 = nn.fit(X_train_scaled, y_train, epochs=100)

# Plot the train and test loss function
plt.plot(model_6.history["loss"])
plt.title("loss_function - 3 hidden layers")
plt.legend(["loss"])
plt.show()

y_train_pred = nn.predict(X_train_scaled)
y_test_pred = nn.predict(X_test_scaled)

print(r2_score(y_train, y_train_pred))
print(r2_score(y_test, y_test_pred))

### tanh, tanh, tanh, linear, 3 layers (80N, 40N, 20N)

In [None]:
# Define the model - deep neural net
number_input_features = 177
hidden_nodes_layer1 = 80
hidden_nodes_layer2 = 40
hidden_nodes_layer3 = 20

nn = Sequential()
# First hidden layer
nn.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="tanh"))
# Second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation="tanh"))
# Third hidden layer
nn.add(Dense(units=hidden_nodes_layer3, activation="tanh"))
# Output layer
nn.add(Dense(units=1, activation="linear"))

# Compile model
nn.compile(loss="mean_squared_error", optimizer="adam", metrics=["mse"])

# Fit the model
model_7 = nn.fit(X_train_scaled, y_train, epochs=100)

# Plot the train and test loss function
plt.plot(model_7.history["loss"])
plt.title("loss_function - 3 hidden layers")
plt.legend(["loss"])
plt.show()

y_train_pred = nn.predict(X_train_scaled)
y_test_pred = nn.predict(X_test_scaled)

print(r2_score(y_train, y_train_pred))
print(r2_score(y_test, y_test_pred))