In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

**Import Libraries**

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

**Open training file**

In [None]:
df=pd.read_csv("/kaggle/input/covid19-global-forecasting-week-3/train.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
print(df[df["Country_Region"]=="Sweden"])

**Add new column for time named as days**

In [None]:
df["days"]=[x for x in range(1,307) for x in range(1,78)]

In [None]:
df.info()

**Filter New York, for forecasting New York**

In [None]:
df=df[df["Country_Region"]=="Sweden"]

In [None]:
df.info()

In [None]:
df.head()
x=df.iloc[:,1]
time=df.iloc[:,6]
y=df.iloc[:,4]
time=time.to_numpy(dtype="float32")
series=y.to_numpy(dtype="float32")
time.shape

In [None]:
plt.figure(figsize=(10, 6))

plt.plot(time, series)
plt.title("Confirmed Cases in Sweden")
plt.ylabel("Confirmed Cases")
plt.xlabel("Days")

**Split the training Set into training and Validation. Training set is until 70 days last 4 days will be predicted. The last 4 days will serve to select which method is the best for forecasting.**

In [None]:
time=np.array(time)
series=np.array(series)
split_time = 71
time_train = time[:split_time]
x_train = series[:split_time]
time_valid = time[split_time:]
x_valid = series[split_time:]

In [None]:
window_size = 2
batch_size = 3
shuffle_buffer_size = 71

In [None]:
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
  dataset = tf.data.Dataset.from_tensor_slices(series)
  dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
  dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
  dataset = dataset.shuffle(shuffle_buffer).map(lambda window: (window[:-1], window[-1]))
  dataset = dataset.batch(batch_size).prefetch(1)
  
  return dataset

In [None]:
dataset = windowed_dataset(x_train, window_size, batch_size=batch_size, shuffle_buffer=shuffle_buffer_size)

In [None]:
print(x_train)

**First Method is single neuron regression**

In [None]:
l0 = tf.keras.layers.Dense(1, input_shape=[window_size])
model = tf.keras.models.Sequential([l0])

model.compile(loss=tf.keras.losses.Huber(), optimizer="adam")
model.fit(dataset,epochs=100)

In [None]:
forecast=[]
for time in range(len(series) - window_size):
  forecast.append(model.predict(series[time:time + window_size][np.newaxis]))
  

forecast = forecast[split_time-window_size:]
print(forecast)
results = np.array(forecast)[:, 0, 0]
print(forecast)

plt.figure(figsize=(10, 6))

line1=plt.plot( time_valid, x_valid,label="Real")
line2=plt.plot(time_valid, results,label="Forecasted")
plt.title("Sweden Single Neuron Forecasting")
plt.ylabel("Confirmed Cases")
plt.xlabel("Days")
plt.legend()

**Simple Neural Network forecasting**

In [None]:
tf.keras.backend.clear_session()
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(150, input_shape=[window_size], activation="relu"), 
    tf.keras.layers.Dense(10, activation="relu"), 
    tf.keras.layers.Dense(1)
])

model.compile(loss=tf.keras.losses.Huber(), optimizer="adam")
model.fit(dataset,epochs=100)

In [None]:
forecast=[]
for time in range(len(series) - window_size):
  forecast.append(model.predict(series[time:time + window_size][np.newaxis]))
  

forecast = forecast[split_time-window_size:]
results = np.array(forecast)[:, 0, 0]
print(forecast)

plt.figure(figsize=(10, 6))
plt.title("Sweden SNN Forecasting")
plt.ylabel("Confirmed Cases")
plt.xlabel("Days")

plt.plot( time_valid, x_valid,label="Real")
plt.plot(time_valid, results,label="Forecasted")
plt.legend()

**Bidirectional LSTM forecasting**

In [None]:
tf.keras.backend.clear_session()
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)

model = tf.keras.models.Sequential([
  tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1),
                      input_shape=[None]),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(150, return_sequences=True,activation="relu")),
  tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(50,activation="relu")),
  tf.keras.layers.Dense(1),
  tf.keras.layers.Lambda(lambda x: x * 100.0)
])


model.compile(loss=tf.keras.losses.Huber(), optimizer="adam",metrics=["mae"])
history = model.fit(dataset,epochs=1000)

In [None]:
forecast=[]
for time in range(len(series) - window_size):
  forecast.append(model.predict(series[time:time + window_size][np.newaxis]))

forecast = forecast[split_time-window_size:]
results = np.array(forecast)[:, 0, 0]
print(forecast)

plt.figure(figsize=(10, 6))

plt.plot(time_valid, x_valid,label="Real")
plt.plot(time_valid, results,label="Forecasted")
plt.title("Bidirectionl LSTM Forecasting")
plt.ylabel("Confirmed Cases")
plt.xlabel("Days")
plt.legend()

In [None]:
print(results)
print("")
print(x_valid)
x= results[-1] - x_valid[-1]
y = x / x_valid[-1]
print("the difference of last day is: ", x)
print("error is: ", round(y*100,2), "%")




In [None]:
my_list = results - x_valid
my_list_per = my_list / x_valid
rounded = [round(num, 2) for num in my_list_per*100]
mean1 = np.array(rounded).mean()

print(my_list)
#print(round(np.array(my_list_per)*100,2))
#print([round(num, 2) for num in my_list_per*100])
print(rounded)
print("average error for predictions were: ", mean1, "%")