<a href="https://colab.research.google.com/github/samarthkadaba/COVID-19-Cases-Predictor/blob/master/COVID_19_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
import seaborn as sns
import re

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Activation, Dropout, LeakyReLU
from tensorflow.python.keras.layers import advanced_activations
from tensorflow.python.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.python.keras.activations import relu, sigmoid, linear, exponential, softmax, tanh

In [0]:
def updateData():  

  ## Web-scraping data based on Worldometers.com COVID-19 daily updates

  site = "https://www.worldometers.info/coronavirus/country/us/"
  hdr = {'User-Agent': 'Mozilla/5.0'}
  req = Request(site,headers=hdr)
  pd.set_option('display.max_rows', None)
  pd.set_option('display.max_columns', None)
  page = urlopen(req)
  soup = BeautifulSoup(page)
  table = soup.find('table')
  table_rows = table.find_all('tr')
  col_labels = soup.find_all('th')
  all_headers = []
  col_str = str(col_labels)
  cleantext2 = BeautifulSoup(col_str, "lxml").get_text()
  all_headers.append(cleantext2)
  df2 = pd.DataFrame(all_headers)
  df3 = df2[0].str.split(',', expand=True)
  data = []
  for tr in table_rows:
      td = tr.find_all('td')
      row = [i.text for i in td]
      data.append(row)
  
  
  df = pd.DataFrame(data)
  frames = [df3, df]
  df4 = pd.concat(frames)
  df4[0] = df4[0].str.strip('[')
  df4.drop(df4.index[[0,2]], inplace = True)
  df4.drop(df4.columns[[2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]], axis=1, inplace=True)
  df4[0] = df4[0].str.replace(r'\n', '')
  df4[1] = (df4[1].str.replace(r',', ''))
  df4[3] = df4[3].str.replace(r'\n', '')
  df4[3][51, 57, 58] = 0.00
  df4[3] = (df4[3].str.replace(r',', ''))
  df4 = df4.fillna(0)


  df5 = df4.iloc[0:57, :]
  df5 = df5.reindex([5, 12, 8, 2, 9, 10, 17, 13, 20, 4, 3, 21, 11, 23, 7, 18, 16, 22, 19, 24, 15, 35, 25, 26, 6, 34, 32, 30, 14, 29, 37, 27, 38, 55, 28, 39, 42, 46, 31, 47, 45, 41, 44, 48, 36, 40, 49, 50, 52, 33, 43, 51, 53, 56, 57, 58])
  df5.drop(df5.index[[52, 53, 54, 55]], inplace = True)

    
  return df5


In [0]:

def loadData(df5):
  data = pd.read_csv('/content/COVID-19_Data_04122020.csv')
  data = pd.DataFrame(data, columns = ['example', 'state', 'pop', 'density', 'days', 'cases', 'deaths'])
  num_examples = data['example'].count()
  data = data.head(num_examples)
  numexamples = data.shape[0]
  dc = data.iloc[numexamples-52:numexamples, :]
  dc.loc[:, 'days'] = dc.loc[:, 'days']+1
  dc.loc[:, 'cases'] =  df5.iloc[:, 1].values
  dc.loc[:, 'deaths'] = df5.iloc[:, 2].values
  frames = [data, dc]
  data = pd.concat(frames)
  data.to_csv(r'/content/COVID-19_Data_04132020.csv', index = False)
  x = (data[['pop', 'density', 'days']])
  x = x.to_numpy()
  x = x.astype(float)
  y = (data[['cases']])
  y = y.to_numpy()
  y = y.astype(float)
  y = np.reshape(y, (-1,1))

  return x,y


In [0]:
def plotData(x, y, xlabel, ylabel):

  plt.xlabel(xlabel)
  plt.ylabel(ylabel)
  plt.scatter([x], [y], marker = 'o')

In [0]:
web_data = updateData()
x, y = loadData(web_data)

scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
print(scaler_x.fit(x))
xscale=scaler_x.transform(x)
print(scaler_y.fit(y))
yscale=scaler_y.transform(y)

X_train, X_test, y_train, y_test = train_test_split(xscale, yscale)

In [46]:
def create_model(layers, activation):
  model = Sequential()
  for i, nodes in enumerate(layers):
   
    if(i == 0):
      model.add(Dense(nodes, input_dim = 3, kernel_initializer = 'normal', activation='linear'))
  
    else:
      model.add(Dense(nodes, activation = activation))

  model.add(Dense(1, activation = 'linear'))

  model.compile(loss = 'mse', optimizer = 'adam', metrics = ['mse', 'mae'])

  return model

model_checked = KerasRegressor(build_fn = create_model, verbose = 0)
model_checked

<tensorflow.python.keras.wrappers.scikit_learn.KerasRegressor at 0x7f8cffc5e160>

In [0]:
##Automatic Hyperparameter optimization with GridSearch CV

layers = [(512, 256), (512, 256, 128)]
activations = ['sigmoid', 'selu']
param_grid = dict(layers = layers, activation=activations, batch_size = [52,104,208, 416], epochs = [500,1000,1500])
grid = GridSearchCV(estimator = model_checked, param_grid = param_grid, cv=KFold(n_splits = 3, shuffle=True))
grid_result = grid.fit(X_train, y_train)
print([grid_result.best_score_, grid_result.best_params_])
fitted_model = grid_result.best_estimator_

In [0]:
##Manually Update Network Paramters

model = Sequential()
model.add(Dense(2048, input_dim = 3, kernel_initializer = 'normal', activation = 'linear'))
model.add(Dense(1024, activation = 'sigmoid'))
model.add(Dense(512, activation = 'sigmoid'))
model.add(Dense(1, activation = 'linear'))
model.compile(loss = 'mse', optimizer = 'adam', metrics = ['mse', 'mae'])

In [0]:


history = model.fit(X_train, y_train, verbose = 1, validation_split = 0.2, batch_size = len(X_train), epochs = 1500)


In [0]:
print(history.history.keys())
#Loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc = 'upper left')
plt.show()

In [0]:
## Visualize Data
x, y = loadData(web_data)
plotData(x[:, 0], y, 'Population', 'Number of Cases')
plt.figure()
plt.axis([0, 2000, 0, 15000])
plotData(x[:, 1], y, 'Population Denisty', 'Number of Cases')
plt.figure()
plt.axis([0, 70, 0, 10000])
plotData(x[:, 2], y, 'Days Since First Case', 'Number of Cases')

In [0]:
## Compare prediction on entire training set to confirm linearity 

x, y = loadData(web_data)
x = np.expand_dims(x, axis = 1)
predictions = []
for i in range(len(x)):
  x[i]= scaler_x.transform(x[i].reshape(1,-1))
  ynew= model.predict(x[i])
  #invert normalize
  ynew = scaler_y.inverse_transform(ynew.reshape(1,-1))
  x[i] = scaler_x.inverse_transform(x[i])
  predictions.append(ynew[0])
##print("X=%s, Predicted=%s" % (Xnew[0], ynew[0]))
plt.figure()
plt.axis([0, 10000, 0, 10000])
plotData(y, predictions, 'measured', 'predicted' )
for i in range(len(predictions)):
  print(i+1, predictions[i])


In [17]:
## Extrapolation predictions

pop = float(input("Enter the popualtion: "))
dense = float(input("Enter the population density: "))
days = float(input("Enter the days since first case: "))
x_spec = [[pop, dense, days]]
x_spec = scaler_x.transform(x_spec)
y_hat = model.predict(x_spec)
y_hat = scaler_y.inverse_transform(y_hat.reshape(-1,1))
x_spec = scaler_x.inverse_transform(x_spec)
print("X: %s, Predicted = %s" % (x_spec[0], y_hat[0]))

Enter the popualtion: 5851754
Enter the population density: 108.05
Enter the days since first case: 54
X: [5.851754e+06 1.080500e+02 5.400000e+01], Predicted = [4285.8354]
