In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## COVID-19 Cases Forecast with Arima, LSTM, Prophet e Holt Winters

****
### 1.Read Data
***

In [None]:
import warnings
warnings.filterwarnings('ignore')

from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click to show code."></form>''')

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
import pandas as pd
from statistics import mean 
df=pd.read_csv('/kaggle/input/corona-virus-report/covid_19_clean_complete.csv')

In [None]:
df_china=df[df['Country/Region']=='China']
df_china_grouped=df_china.groupby(['Date']).sum()

### Active Cases Predictions of China

In [None]:
from sklearn.model_selection import TimeSeriesSplit
from numpy import array

tscv = TimeSeriesSplit()


scores=pd.DataFrame(columns=['Model','R2'])

X_train_list=[]
X_test_list=[]
y_train_list=[]
y_test_list=[]


series=df_china_grouped['Active']
# split a univariate sequence into samples
def split_sequence(sequence, n_steps):
	X, y = list(), list()
	for i in range(len(sequence)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the sequence
		if end_ix > len(sequence)-1:
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
		X.append(seq_x)
		y.append(seq_y)
	return array(X), array(y)

# choose a number of time steps
n_steps = 25
# split into samples
X, y = split_sequence(series, n_steps)


for train_index, test_index in tscv.split(X):
   

    X_train, X_test = X[train_index], X[test_index]
    
    y_train, y_test = y[train_index], y[test_index]
    
    X_train_list.append(X_train)
    X_test_list.append(X_test)
    
    y_train_list.append(y_train)
    y_test_list.append(y_test)
    

## 1.1 Plot Data of China

In [None]:
from matplotlib.pyplot import figure
#figure(num=None, figsize=(15, 6), dpi=80, facecolor='w', edgecolor='k')
df_china_grouped.plot(figsize=(10,5),title='China COVID cases')
plt.show()

***
### Using Arima to Covid prediction
***

In [None]:
import matplotlib.pyplot as plt
series=df_china_grouped['Active']
series.plot(figsize=(15,5),title='China active cases')
plt.show()

In [None]:

from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot
from pandas.plotting import autocorrelation_plot
 
def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')
 

autocorrelation_plot(series)
pyplot.show()

### Plotting Arima Residuals

In [None]:

from pandas import read_csv
from pandas import datetime
from pandas import DataFrame
from statsmodels.tsa.arima_model import ARIMA
from matplotlib import pyplot

series=df_china_grouped['Active']
 
def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')
 
model = ARIMA(series, order=(5,1,0))
model_fit = model.fit(disp=0)
print(model_fit.summary())
# plot residual errors
residuals = DataFrame(model_fit.resid)
residuals.plot(figsize=(10,5),title='Residuals')
pyplot.show()
residuals.plot(kind='kde',title='Density')
pyplot.show()


In [None]:
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')

arima_scores=[]
count=0
for X_train,y_train,X_test,y_test in zip(X_train_list,y_train_list,X_test_list,y_test_list):
    count+=1
  
    train=list(X_test[0])
    test=list(y_test)
   
    history = train
    predictions = list()
    for t in range(len(test)):
        model = ARIMA(history, order=(5,1,0))
        model_fit = model.fit(disp=0)
        output = model_fit.forecast()
        yhat = output[0]
        predictions.append(yhat)
        obs = test[t]
        history.append(obs)
   
    error = mean_squared_error(test, predictions)
   
    title='Arima  prediction sample :'+str(count)
    fig = plt.figure()
    fig.suptitle(title, fontsize=12)
    pyplot.plot(test)
    pyplot.plot(predictions, color='red')
    pyplot.show()
    r2_score_ = r2_score(test, predictions)
    arima_scores.append(r2_score_)
    print('R2 Score:',r2_score_)

scores=scores.append({'Model':'Arima',
                      'R2':mean(arima_scores)},ignore_index=True)
print(scores)

***
### LSTM Prediction
***

In [None]:

# univariate convlstm example
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import ConvLSTM2D
import numpy as np
from sklearn.metrics import r2_score

from sklearn.model_selection import TimeSeriesSplit





lstm_scores=[]


tscv = TimeSeriesSplit()


# reshape from [samples, timesteps] into [samples, timesteps, rows, columns, features]
n_features = 1
n_seq = 1
n_steps = 25
#X = X.reshape((X.shape[0], n_seq, 1, n_steps, n_features))
# define model
model = Sequential()
model.add(ConvLSTM2D(filters=64, kernel_size=(1,2), activation='relu',
                     input_shape=(n_seq, 1, n_steps, n_features)))
model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

count=0
for X_train,y_train,X_test,y_test in zip(X_train_list,y_train_list,X_test_list,y_test_list):
    count+=1
 
 
    X_train = X_train.reshape((X_train.shape[0], n_seq, 1, n_steps, n_features))
    
    X_test =  X_test.reshape((X_test.shape[0], n_seq, 1, n_steps, n_features))
    #y_train, y_test = y[train_index], y[test_index]
    

    # fit model
    model.fit(X_train, y_train, epochs=500, verbose=0)
    # demonstrate prediction
    
    y_pred=model.predict(X_test)
    y_pred=[y[0] for y in y_pred]
    
    title='LSTM  prediction sample :'+str(count)
    fig = plt.figure()
    fig.suptitle(title, fontsize=12)
    pyplot.plot(y_test)
    pyplot.plot(y_pred, color='red')
    pyplot.show()
    
    
    r2_score_ = r2_score(list(y_test), y_pred)
    lstm_scores.append(r2_score_)
    print('R2 Score: %.3f' % r2_score_)

scores=scores.append({'Model':'LSTM',
                      'R2':mean(lstm_scores)},ignore_index=True)
print(scores)

***
### Prediction with Prophet
***

In [None]:
import pandas as pd
from fbprophet import Prophet


In [None]:
from sklearn.metrics import r2_score
import datetime
from tqdm.auto import tqdm

prophet_r2=[]
for X_train,y_train,X_test,y_test in zip(X_train_list,y_train_list,X_test_list,y_test_list):
 
    y_pred=[]
    for i in tqdm(range(len(X_test))): 
        
        #print(' Prediction '+str(i)+' of '+str(len(X_test)))
        base = datetime.datetime.today()
        date_list = [base + datetime.timedelta(days=x) for x in range(len(X_test[0]))]

        ts=pd.DataFrame(columns=['ds','y'])
        ts['ds']=date_list
        ts['y']=list(X_test[i])

        ts['y']=pd.to_numeric(ts['y'])

        

        m = Prophet()
        m.fit(ts)

        future = m.make_future_dataframe(periods=1)
        future.tail()


        forecast = m.predict(future)
       
        

       

        y_pred.append(forecast.iloc[len(X_test),:]['yhat'])

    #print(y_pred)
    pyplot.plot(y_test)
    pyplot.plot(y_pred, color='red')
    pyplot.show()
    r2_score_ = r2_score(y_test, y_pred)
    prophet_r2.append(r2_score_)
    print('R2 Score: %.3f' % r2_score_)


scores=scores.append({'Model':'Prophet',
                      'R2':mean(prophet_r2)},ignore_index=True)

***
### Models Evaluation
***

In [None]:
print(scores)