In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from datetime import datetime, timedelta
import sklearn
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
import math
from math import sqrt
import random
import scipy
from scipy.interpolate import make_interp_spline

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 40)
random.seed(1)

import gdown 
gdown.download('https://drive.google.com/uc?id=1oK8AlCRFp4isBJQv-AZnC_zr-ZvrOOxd','covid-statistics-by-us-states-daily-updates.csv',True);

raw_data = pd.read_csv('covid-statistics-by-us-states-daily-updates.csv')

raw_data["date"] = pd.to_datetime(raw_data["date"], format="%Y-%m-%d")

raw_data.set_index("date", inplace = True, drop = False)

state_df_list = []
for name, df in raw_data.groupby("state"):
  tomorrow_data = df[['date','positiveincrease']]
  tomorrow_data.columns = ['tomorrow_date', 'tomorrow_new_pos']
  tomorrow_data['today_date'] = tomorrow_data.tomorrow_date - timedelta(days=1)
  tomorrow_data.set_index("today_date", inplace = True)
  state_df = df.join(tomorrow_data, how = 'inner')
  state_df_list.append(state_df)

data = pd.concat(state_df_list)
data.drop(['date','tomorrow_date'],axis=1,inplace=True)
data = data[~data.totaltestresultsincrease.isna()]
data = data[~data.total.isna()]
data = data[~data.positive.isna()]
data.negative = data.total - data.positive

data.sort_index(inplace = True)

In [None]:
data.head(1)

In [None]:
data = data[["state", "positive", "negative", 
             "total", "hospitalizedincrease", 
             "negativeincrease", "positiveincrease", 
             "tomorrow_new_pos"]]


In [None]:
data.head(50) 

In [None]:
num_rows = data.shape[0]
num_columns = data.shape[1]

print (num_rows, "rows")
print (num_columns, "columns")

In [None]:
state = "ma" 

state=state.upper()

states = set(data["state"])

if state not in states:
    raise ValueError("Abbreviation isn't a state. Please try again")
print(f"You've chosen to see the data for the state of {state}")

new_data=data[data["state"] == state]
new_data.index = (new_data.index - datetime(2020, 1, 1)).days

plt.plot(new_data.index, new_data.tomorrow_new_pos)
plt.title(f"New positive cases each day for {state}")
plt.ylabel("Number of new positive cases")
plt.xlabel("Day of the year")
plt.xlim(22, 118)
plt.show()

plt.plot(new_data.index, new_data.positive)
plt.title(f"Total number of positive cases for the state of {state}")
plt.ylabel("Number of total positive cases")
plt.xlabel("Day of the year")
plt.xlim(22, 118)
plt.show()

In [None]:
X_raw = data.drop(['tomorrow_new_pos'],axis=1)

y_raw = data[['tomorrow_new_pos']]

In [None]:
one_hot_columns = pd.get_dummies(X_raw.state, prefix="state")

X = pd.concat([X_raw, one_hot_columns], axis=1)
X.drop('state',axis=1,inplace=True)

X["day_of_year"] = (X.index - datetime(2020, 1, 1)).days

X.reset_index(inplace = True, drop=True)
y = y_raw.reset_index(drop = True)

X.head(5)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [None]:
print (X_train.shape, X_test.shape, y_train.shape, y_test.shape) 

In [None]:
linear = linear_model.LinearRegression()

In [None]:
linear.fit(X_train, y_train)

In [None]:
y_pred = linear.predict(X_test)

In [None]:
state = "ms" 

def plot_true_and_pred(state, y_pred):
  state=state.upper()

  if state not in set(data["state"]):
      raise ValueError("Abbreviation isn't a state. Please try again")
  print(f"You've chosen to see the data for the state of {state}")

  y_pred_series = pd.Series(y_pred.squeeze(), index = y_test.index)

  X_train_state = X_train[X_train["state_"+state] == 1]
  X_test_state = X_test[X_test["state_"+state] == 1]

  early_true = y_train.loc[X_train_state.index]
  late_true = y_test.loc[X_test_state.index]
  late_pred = y_pred_series.loc[X_test_state.index]

  plt.plot(X_train_state.day_of_year, early_true)
  plt.plot(X_test_state.day_of_year, late_true)
  plt.plot(X_test_state.day_of_year, late_pred)
  plt.legend(["True (Training)", "True (Testing)", "Predicted (Testing)"])
  plt.show()

plot_true_and_pred(state, y_pred)


In [None]:
from sklearn.metrics import mean_squared_error

mse_linear = mean_squared_error(y_test, y_pred)

print ("MSE for linear model:", mse_linear)

In [None]:
from sklearn.neural_network import MLPRegressor 

nnet = MLPRegressor(hidden_layer_sizes=(10,10,10,10), 
                    activation="relu",
                    max_iter= 10000)  
nnet.fit(X_train, y_train)

nnet_preds = nnet.predict(X_test)

mse_nnet = mean_squared_error(y_true = y_test, 
                              y_pred = nnet_preds)

print ("MSE", mse_nnet)

In [None]:
plot_true_and_pred("WA",nnet_preds)

In [None]:
for hidden_layer_sizes in [(), (1), (3), (5),(10),
                           (5,5), (10, 10), 
                           (5,5,5), (10, 10, 10), 
                           (10, 10, 10, 10), 
                           (10, 10, 10, 10, 10)]:

  print(f"Testing neural network for the following configuration: {hidden_layer_sizes}")
  nnet = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes,
                      activation="relu",
                      max_iter= 100000)
  
  nnet.fit(X_train, y_train)

  nnet_preds = nnet.predict(X_test)

  mse_nnet = mean_squared_error(y_true = y_test, 
                                y_pred = nnet_preds)

  print(f"Our MSE  for the neural network is: {mse_nnet}")

In [None]:
X_rand = np.linspace(0, 15, 50)
y_rand = (5 * X_rand) + (1.75 * np.random.normal(scale=10, size=len(X_rand))) 
plt.scatter(X_rand, y_rand)
plt.show()

In [None]:
slope=1
y_intercept=0

predicted_y_rand= (slope * X_rand) + y_intercept

plt.scatter(X_rand, y_rand)
plt.plot(X_rand, predicted_y_rand, color="red")
plt.show()


In [None]:
from sklearn import linear_model

linear_rand =linear_model.LinearRegression()

linear_rand.fit(X_rand.reshape(-1, 1), y_rand)

slope=linear_rand.coef_
y_intercept=linear_rand.intercept_

predicted_y_rand_linear = (slope * X_rand) + y_intercept

plt.scatter(X_rand, y_rand)
plt.plot(X_rand, predicted_y_rand_linear, color="red")
plt.show()

In [None]:
a=0.007
b=-0.25
c=2.5
d=-3
e=2

predicted_y_rand= (a*np.power(X_rand, 4)) + (b*np.power(X_rand, 3)) + (c*np.power(X_rand, 2)) + (d*np.power(X_rand, 1)) + e

plt.scatter(X_rand, y_rand)
plt.plot(X_rand, predicted_y_rand, color="red")
plt.ylim(0, 120)
plt.show()


In [None]:
rand_polyfit = np.polyfit(X_rand, y_rand, deg = 4)

a=rand_polyfit[0]
b=rand_polyfit[1]
c=rand_polyfit[2]
d=rand_polyfit[3]
e=rand_polyfit[4]

predicted_y_rand_polynomial = (a*np.power(X_rand, 4)) + (b*np.power(X_rand, 3)) + (c*np.power(X_rand, 2)) + (d*np.power(X_rand, 1)) + e

plt.scatter(X_rand, y_rand)
plt.plot(X_rand, predicted_y_rand_polynomial, color="red")
plt.ylim(0, 120)
plt.show()



In [None]:
plt.scatter(X_rand, y_rand)
plt.plot(X_rand, y_rand, color="red")
plt.ylim(0, 120)
plt.show()



In [None]:
x=14
y=60

new_y_line = (slope * x) + y_intercept
new_y_polynomial = (a * np.power(x, 4)) + (b * np.power(x, 3)) + (c * np.power(x, 2)) + (d * np.power(x, 1)) + e

plt.scatter(X_rand, y_rand)
plt.ylim(0, 120)
plt.scatter(x, y, color="purple", s=75)
plt.plot(X_rand, predicted_y_rand_linear, color="red")
plt.title("Fitting using the straight line")
plt.show()

plt.scatter(X_rand, y_rand)
plt.ylim(0, 120)
plt.scatter(x, y, color="purple", s=75)
plt.plot(X_rand, predicted_y_rand_polynomial, color="green")
plt.title("Fitting using the 4th-degree polynomial")
plt.show()

plt.scatter(X_rand, y_rand)
plt.ylim(0, 120)
plt.scatter(x, y, color="purple", s=75)
plt.plot(X_rand, y_rand, color="red", dashes=[6,2])
plt.title("Fitting using the extreme polynomial")
plt.show()

In [None]:
state = 'tx' 
state=state.upper()

X_train_state = X_train[X_train["state_"+state] == 1]
X_test_state = X_test[X_test["state_"+state] == 1]

print ("Training and testing shapes:", X_train_state.shape, X_test_state.shape)

current_day = X_train_state.iloc[-1:].copy()

current_day

In [None]:
pred_list = [] 

for pred_day in range(len(X_test_state)):
  prediction = linear.predict(current_day).item()
  current_day['positiveincrease'] = prediction
  current_day['day_of_year'] += 1
  pred_list.append(prediction)

In [None]:
early_true = y_train.loc[X_train_state.index]
late_true = y_test.loc[X_test_state.index]
late_pred = pred_list

plt.plot(X_train_state.day_of_year, early_true)
plt.plot(X_test_state.day_of_year, late_true)
plt.plot(X_test_state.day_of_year, late_pred)
plt.title(f"New positive cases each day for {state}")
plt.legend(["True (Training)", "True (Testing)", "Predicted"])
plt.show() 