# EDA & Time Series Analysis: Using Prophet

In [None]:
# Libraries Required
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D

In [None]:
# Loading Datasets
#confirmed = pd.read_csv("../input/novel-corona-virus-2019-dataset/time_series_covid_19_confirmed.csv")
#deaths = pd.read_csv("../input/novel-corona-virus-2019-dataset/time_series_covid_19_deaths.csv")
#recovered = pd.read_csv("../input/novel-corona-virus-2019-dataset/time_series_covid_19_recovered.csv")

# Load Data (Source: https://github.com/CSSEGISandData/COVID-19)
confirmed = pd.read_csv("../input/covid19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
recovered = pd.read_csv("../input/covid19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")
deaths = pd.read_csv("../input/covid19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")

In [None]:
# Overview of Data with Confirmed, Death & Recovered Cases
confirmed.head(10)
#deaths.head(10)
#recovered.head(10)

In [None]:
# Melting the Confirmed Cases Dataframe:
confirmed2=pd.melt(confirmed,id_vars=['Province/State','Country/Region','Lat','Long'],var_name='Dates', value_name='Confirmed Cases')
confirmed2

# Exploring Confirmed Cases Dataframe:
confirmed2.info()

# Converting Dates to Datetime Format:
confirmed2['Dates'] = pd.to_datetime(confirmed2['Dates'], infer_datetime_format=True)

# Converted from Object to Datetime
confirmed2.info()

confirmed2

In [None]:
# Melting the Death Cases Dataframe:
deaths2=pd.melt(deaths,id_vars=['Province/State','Country/Region','Lat','Long'],var_name='Dates', value_name='Death Cases')
#deaths2

# Exploring Deaths Cases Dataframe:
deaths2.info()

# Converting Dates to Datetime Format:
deaths2['Dates'] = pd.to_datetime(deaths2['Dates'], infer_datetime_format=True)

# Converted from Object to Datetime
deaths2.info()

In [None]:
# Melting the Recovered Cases Dataframe:
recovered2=pd.melt(recovered,id_vars=['Province/State','Country/Region','Lat','Long'],var_name='Dates', value_name='Recovered Cases')
recovered2

# Exploring Deaths Cases Dataframe:
recovered2.info()

# Converting Dates to Datetime Format:
recovered2['Dates'] = pd.to_datetime(recovered2['Dates'], infer_datetime_format=True)

#Converted from Object to Datetime
recovered2.info()
recovered2

In [None]:
# Merging Confirmed, Death and Recovered Cases Together:
confirmed2['Death Cases'] = pd.Series(deaths2['Death Cases'])
confirmed2['Recovered Cases'] = pd.Series(recovered2['Recovered Cases'])

# View the Final Merged Dataframe with All 3 Cases (Confirmed, Death and Recovered Cases):
covid19 = confirmed2

## Sneek Peak Into Some of the Data Variables


In [None]:
# Descriptive Stats of All Variables
covid19.describe(include='all')

In [None]:
# Descriptive Stats of Variables with Available Data
covid19.describe().transpose()

In [None]:
# Writing the Dataframe to CSV File
covid19.to_csv("covid19_timeseries.csv", index=False)


## Relationship Amongst Variables

From the Correlation Heatmap it can be seen that:
 * With increase in Latitude & increase in Longitude Confirmed Cases increases.
 * With increase in Latitude & decrease in Longitude Death Cases increases.
 * With increase in Latitude & decrease in Longitude Recovereed Cases increases.

In other words:
 * The **Confirmed Cases** peaks with increase in Longitude, and then drops with decrease in  Longitude and increase in Latitude.
 * The **Death Cases** peaks with decrease in Longitude, and then drops further with decrease in Longitude, (clearly indicating some other factors that might have influenced)
 * The **Recovered Cases** peaks wth decrease in Longitude and drops with increase in Lat & Long. 


## Correlation Matrix

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

#sns.pairplot(train_data)
corr = covid19.corr()
fig, (ax) = plt.subplots(1, 1, figsize=(7,6))

hm = sns.heatmap(corr, 
                 ax=ax,           # Axes in which to draw the plot, otherwise use the currently-active Axes.
                 cmap="coolwarm", # Color Map.
                 #square=True,    # If True, set the Axes aspect to “equal” so each cell will be square-shaped.
                 annot=True, 
                 fmt='.2f',       # String formatting code to use when adding annotations.
                 #annot_kws={"size": 14},
                 linewidths=.05)

fig.subplots_adjust(top=0.93)
fig.suptitle('COVID19 Cases Correlation Heatmap with Lat Long', 
              fontsize=14, 
              fontweight='bold')
plt.figure()
plt.show()

## Pairwise Plots
From the Pairwise Plots, it look the number of Death Cases are either not that high in the dataset as compared to Confirmed & Recovered Cases.

In [None]:
#Pairplot for just numerical variables
# Attributes of interest
plt.rcParams.update({'figure.max_open_warning': 0})
cols = ['Confirmed Cases', 
        'Recovered Cases', 
        'Death Cases']


pp = sns.pairplot(covid19[cols],palette="Set3",
                  diag_kws=dict(shade=True),
                  diag_kind="kde", # use "kde" for diagonal plots
                  kind="reg")

fig = pp.fig 
fig.subplots_adjust(top=0.97, wspace=0.4)
fig.suptitle('COVID19 Cases Attributes Pairwise Plots', fontsize=14, fontweight='bold')

## Plotting Univariate Data

In [None]:
import seaborn as sns; 
sns.set(style="ticks", color_codes=True)
#sns.pairplot(covid19)

#Using Kernel Density for Univariate Plot
sns.pairplot(covid19, diag_kind="kde")

## Confirmed Cases Across Time with & without 95% CI Around Mean

Since the default behaviour in seaborn is to aggregate the multiple measurements at each x value by plotting the mean and the 95% cofidence interval around the mean.

In [None]:
import seaborn as sns
sns.set(style="darkgrid")

# Plot the responses for different events and regions
sns.lineplot(x="Dates", y="Confirmed Cases", data=covid19)

import matplotlib.pyplot as plt
# control x and y limits
plt.xticks(x="Dates", rotation='vertical')

## Confirmed Cases Across Time without Confidence Interval Around Mean


In [None]:
sns.relplot(x="Dates", y="Confirmed Cases", ci=None, kind="line", data=covid19);
plt.xticks(x="Dates", rotation='vertical')

##  Recovered Cases Across Time with 95% CI Around Mean


In [None]:
import seaborn as sns
sns.set(style="darkgrid")

# Plot the responses for different events and regions
sns.lineplot(x="Dates", y="Recovered Cases", data=covid19)

import matplotlib.pyplot as plt
# control x and y limits
plt.xticks(x="Dates", rotation='vertical')

## Recovered Cases Across Time without Confidence Interval Around Mean

In [None]:
sns.relplot(x="Dates", y="Recovered Cases", ci=None, kind="line", data=covid19);
plt.xticks(x="Dates", rotation='vertical')

## Death Cases Across Time with 95% CI Around Mean

In [None]:
import seaborn as sns
sns.set(style="darkgrid")

# Plot the responses for different events and regions
sns.lineplot(x="Dates", y="Death Cases", data=covid19)

import matplotlib.pyplot as plt
# control x and y limits
plt.xticks(x="Dates", rotation='vertical')

## Death Cases Across Time without Confidence Interval Around Mean


In [None]:
sns.relplot(x="Dates", y="Death Cases", ci=None, kind="line", data=covid19);
plt.xticks(x="Dates", rotation='vertical')

In [None]:
# Select only Relevant Columns:
covid19cases_subset = covid19.loc[:, ['Dates', 'Confirmed Cases', 'Recovered Cases', 'Death Cases']]
covid19cases_subset.head()

## Visual Comparison

In [None]:
import matplotlib.pyplot as plt
import matplotlib
sns.set()
#%matplotlib inline
plt.style.use('fivethirtyeight')
#matplotlib.rcParams['axes.labelsize'] = 14
#matplotlib.rcParams['xtick.labelsize'] = 12
#matplotlib.rcParams['ytick.labelsize'] = 12
#matplotlib.rcParams['text.color'] = 'k'

plt.figure(figsize=(15, 10))
sns.lineplot(covid19cases_subset['Dates'], covid19cases_subset['Confirmed Cases'].tolist(), label = 'CONFIRMED')
sns.lineplot(covid19cases_subset['Dates'], covid19cases_subset['Death Cases'].tolist(), label = 'DEATH')
sns.lineplot(covid19cases_subset['Dates'], covid19cases_subset['Recovered Cases'].tolist(), label = 'RECOVERED')

plt.xlabel('Date'); plt.ylabel('Cases Number'); plt.title('All Cases Over Time')
plt.legend();

In [None]:
# Dropping Rows where No Cases are Reported
#covid19cases_subset.head(100)
#covid19cases_subset.shape()
covid19cases_subset = covid19cases_subset.loc[~((covid19cases_subset['Confirmed Cases']==0) | (covid19cases_subset['Recovered Cases']==0.0) | ((covid19cases_subset['Death Cases']==0)))]
covid19cases_subset.head(100)
covid19cases_subset.tail(1000)

## Kernel Density Function

The Kernel Density Function gives an interesting sneak peak to understand the Probability Density Fucntion of the Cases across the Confirmed, Recovered and Death Cases in the entire dataset. At least for the **Confirmed Cases** and the **Recovered Cases** the data seems to be Skewed to the Right, indicating that the Mean of the Cases Distribution would be lower than the Median and the Mode. We need to deep dive into the **Death Cases** because that seems to Right Skewed too but with some intermittent high spikes. The number of **Confirmed Cases** are also spanned over a larger number than both the Recovered and the Death Cases, that could be an interesting reason for why most of the initial cases were ignored by most of the countries.
Seems like the Probability Density Distribution for the Death Cases were at least 2 times higher than that of the Confirmed Cases. And the Confirmed and Recovered Cases were almost going hand in hand in terms of their Probability Density Function.

In [None]:
# Density Plot
fig = plt.figure(figsize=(30,8))
title = fig.suptitle("Cases Distribution", fontsize=14)
fig.subplots_adjust(top=0.85, wspace=0.3)

ax1 = fig.add_subplot(1,2,1)
ax1.set_title("Confirmed Cases Distribution")
ax1.set_xlabel("Confirmed Cases")
ax1.set_ylabel("Probability Density") 
sns.kdeplot(covid19['Confirmed Cases'], kernel='gau', bw=.000000000000029000000, ax=ax1, shade=True, color='b') # .00000000000005

ax3 = fig.add_subplot(1,2,2)
ax3.set_title("Death Cases Distribution")
ax3.set_xlabel("Death Cases")
ax3.set_ylabel("Probability Density") 
sns.kdeplot(covid19['Death Cases'], kernel='gau', bw=.000000000000029000000, ax=ax3, shade=True, color='r')

# Density Plot
fig = plt.figure(figsize=(30,8))
title = fig.suptitle("Cases Distribution", fontsize=14)
fig.subplots_adjust(top=0.85, wspace=0.3)

ax1 = fig.add_subplot(1,2,1)
ax1.set_title("Recovered Cases Distribution")
ax1.set_xlabel("Recovered Cases")
ax1.set_ylabel("Probability Density") 
sns.kdeplot(covid19['Recovered Cases'], kernel='gau', bw=.000000000000029000000, ax=ax1, shade=True, color='g')

# Time Series Analysis using Prophet

In [None]:
# Loading Datasets
#import os
#for dirname, _, filenames in os.walk('../output'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))
        
#covid19 = pd.read_csv("../input/covid19-timeseries/covid19_timeseries.csv")
#../input/covid19-timeseries/covid19_timeseries.csv
#./
#./
#./
#./covid19_timeseries.csv

In [None]:
# Work From Here. Not Replacing the Previous Data File
import os
for dirname, _, filenames in os.walk('../output/kaggle/working/covid19_timeseries.csv'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
covid19 = pd.read_csv("../input/covid19-timeseries/covid19_timeseries.csv")

In [None]:
# Libraries Required
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D

In [None]:
# Libraries for Forecasting
import pandas as pd
from matplotlib import pyplot as plt
plt.style.use('fivethirtyeight')
from fbprophet import Prophet

## Additive & Multiplicative Prophet Models

Prophet with The Additive Model, that we used here for the purpose of building the Model: y(t) = g(t) + s(t) + h(t) + e(t),
where: 
* g(t) represents trend
* s(t) represents periodic component
* h(t) holiday related events or may be social distancing in this case. 
* e(t) is the error
Prophet with **The Multiplicative Model**: y(t) = g(t) * s(t) * h(t) * e(t)

For the purpose of understanding the data, ran the Models with both Additive and Multiplicative Models. Usually **Multiplicative Models** are used when the magnitude of the seasonal pattern in the data depends on the magnitude of the data. Whereas **Additive Models** are usually used when the magnitude of seasonality does not change in relation to time. In this case so far we are still in the Initial Stage of the Data and we are yet to see the actual seasonal change over the behaviour on the data. So, it is a better practise to go with a Model that won't overfit the Model's Performance. 


## Confirmed Cases Forecasting

In [None]:
#Prophet Models
covid19.head()
#covid19.head(100)

# Prophet Requires columns ds (Dates) and y (value)
covid19confirmedcases = covid19.rename(columns={'Dates': 'ds', 'Confirmed Cases': 'y'})

# Let's Drop columns Death Cases & Recovered Cases for this dataframe
covid19confirmedcases.drop(['Death Cases', 'Recovered Cases'], axis=1, inplace=True)
covid19confirmedcases.head()

# Fitting Model
# changepoint_prior_scale can help to achieve a better fit by reducing uncertainty, Prophet Documentation suggest the scale at 0.5
mc=Prophet(changepoint_prior_scale=0.001, seasonality_mode='additive', daily_seasonality=True, yearly_seasonality=True)
mc.fit(covid19confirmedcases)

# Future dataframe with 1 year of data for each day
future_covid19confirmedcases=mc.make_future_dataframe(periods=90, freq='D')
#Checking how the dataframe looks like
#future_covid19confirmedcases.tail()

# Make predictions
forecast_covid19confirmedcases=mc.predict(future_covid19confirmedcases)

#interval_width=0.95
#forecast_covid19confirmedcases = (interval_width).fit(m).predict(future)

### Prediction Plot for Confirmed Cases

In [None]:
from fbprophet.plot import add_changepoints_to_plot

# Helps in addressing the Type Error for dates
pd.plotting.register_matplotlib_converters()

# Plotting
fig_confirmed = mc.plot(forecast_covid19confirmedcases, uncertainty=True, xlabel = 'Dates', ylabel = 'Confirmed Cases')
plt.title('Forecasting of Confirmed Cases')

## Seasonal Components in Forecasting of Confirmed Cases

In [None]:
## Plot Model Components
fig = mc.plot_components(forecast_covid19confirmedcases)

# Uncertainty in Models
The Model Uncertainty shows the Predicted value in terms of 'yhat'. This forecast model definitely consists of a lot of uncertainty in terms of the predicted interval as well as the upper and lower predicted uncertainty intervals. Definitely a good time to find a way to DECREASE uncertainty. First we will try by increasing the changepoint prior scale.There are 3 main sources of Uncertainty in the Forecast:
1. Uncertainty in Trend
2. Uncertainty in Seasonality Estimates.
3. Additional Observation Noise.

By default, Prophet returns uncertainty in Trend and Observation Noise. To capture uncertainty in seasonality we need to perform a full Bayesian Sampling taking into consideration data for the last 6 months.

## Model Uncertainty in Confirmed Cases

In [None]:
# Checking Model Uncertainty
#forecast_covid19confirmedcases.head()
forecast_covid19confirmedcases[['ds', 'trend', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
forecast_covid19confirmedcases.rename(columns={'trend':'y'}, inplace=True)
#forecast_covid19confirmedcases.head()


# Uncertainty in Trend
forecast_covid19confirmedcases_trenduncertainty = Prophet(interval_width=0.95).fit(forecast_covid19confirmedcases).predict(covid19confirmedcases)


# Uncertainty in Seasonality: Considering Samples from 6 Months Data
mc = Prophet(mcmc_samples=300)
forecast_covid19confirmedcases_seasonaluncertainty = mc.fit(forecast_covid19confirmedcases).predict(covid19confirmedcases)

fig = mc.plot_components(forecast_covid19confirmedcases_seasonaluncertainty)

#fig_confirmed = m.plot(forecast, uncertainty=False, xlabel = 'Dates', ylabel = 'Confirmed Cases')
#plt.title('Forecasting of Confirmed Cases')

## Death Cases Forecasting

In [None]:
#Prophet Models
covid19.head()

# Prophet Requires columns ds (Dates) and y (value)
covid19deathcases = covid19.rename(columns={'Dates': 'ds', 'Death Cases': 'y'})

# Let's Drop columns Death Cases & Recovered Cases for this dataframe
covid19deathcases.drop(['Confirmed Cases', 'Recovered Cases'], axis=1, inplace=True)
covid19deathcases.head()

# Fitting Model
# changepoint_prior_scale can help to achieve a better fit by reducing uncertainty, Prophet Documentation suggest the scale at 0.5
md=Prophet(changepoint_prior_scale=0.25, seasonality_mode='additive', daily_seasonality=True, yearly_seasonality=True)
md.fit(covid19deathcases)

# Future dataframe with 1 year of data for each day; Periods tell the Number of Days till when we would like to see the Forecast
future_covid19deathcases=md.make_future_dataframe(periods=90, freq='D')
#Checking how the dataframe looks like
#future_covid19confirmedcases.tail()
# Make predictions
forecast_covid19deathcases=md.predict(future_covid19deathcases)

### Prediction Plot for Death Cases

In [None]:
from fbprophet.plot import add_changepoints_to_plot

# Helps in addressing the Type Error for dates
pd.plotting.register_matplotlib_converters()

# Plotting
fig_death = md.plot(forecast_covid19deathcases, uncertainty=True, xlabel = 'Dates', ylabel = 'Death Cases')
plt.title('Forecasting of Death Cases')

## Seasonal Components in Forecasting of Death Cases

In [None]:
## Plot Model Components
fig1 = md.plot_components(forecast_covid19deathcases)

## Model Uncertainty for Death Cases

In [None]:
# Checking Model Uncertainty
forecast_covid19deathcases[['ds', 'trend', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
forecast_covid19deathcases.rename(columns={'trend':'y'}, inplace=True)
#forecast_covid19confirmedcases.head()


# Uncertainty in Trend
forecast_covid19deathcases_trenduncertainty = Prophet(interval_width=0.95).fit(forecast_covid19deathcases).predict(covid19deathcases)


# Uncertainty in Seasonality: Considering Samples from 6 Months Data
md = Prophet(mcmc_samples=300)
forecast_covid19deathcases_seasonaluncertainty = md.fit(forecast_covid19deathcases).predict(covid19deathcases)

fig = md.plot_components(forecast_covid19deathcases_seasonaluncertainty)

#fig_confirmed = m.plot(forecast, uncertainty=False, xlabel = 'Dates', ylabel = 'Confirmed Cases')
#plt.title('Forecasting of Confirmed Cases')

## Recovered Cases Forecasting

In [None]:
#Prophet Models
covid19.head()

# Prophet Requires columns ds (Dates) and y (value)
covid19recoveredcases = covid19.rename(columns={'Dates': 'ds', 'Recovered Cases': 'y'})

# Let's Drop columns Death Cases & Recovered Cases for this dataframe
covid19recoveredcases.drop(['Confirmed Cases', 'Death Cases'], axis=1, inplace=True)
covid19recoveredcases.head()

# Fitting Model
# changepoint_prior_scale can help to achieve a better fit by reducing uncertainty, Prophet Documentation suggest the scale at 0.5
mr=Prophet(changepoint_prior_scale=0.25, seasonality_mode='additive', daily_seasonality=True, yearly_seasonality=True)
mr.fit(covid19recoveredcases)

# Future dataframe with 1 year of data for each day
future_covid19recoveredcases=mr.make_future_dataframe(periods=90, freq='D')
#Checking how the dataframe looks like
#future_covid19confirmedcases.tail()
# Make predictions
forecast_covid19recoveredcases=mr.predict(future_covid19recoveredcases)

### Prediction Plot for Recovered Cases

In [None]:
from fbprophet.plot import add_changepoints_to_plot

# Helps in addressing the Type Error for dates
pd.plotting.register_matplotlib_converters()

# Plotting
fig_death = mr.plot(forecast_covid19recoveredcases, xlabel = 'Dates', ylabel = 'Recovered Cases')
plt.title('Forecasting of Recovered Cases')

## Seasonal Components of Recovered Cases

In [None]:
## Plot Model Components
fig1 = mr.plot_components(forecast_covid19recoveredcases)

## Model Uncertainty for Recovered Cases




In [None]:
# Checking Model Uncertainty
forecast_covid19recoveredcases[['ds', 'trend', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
forecast_covid19recoveredcases.rename(columns={'trend':'y'}, inplace=True)
#forecast_covid19confirmedcases.head()


# Uncertainty in Trend
forecast_covid19recoveredcases_trenduncertainty = Prophet(interval_width=0.95).fit(forecast_covid19recoveredcases).predict(covid19recoveredcases)


# Uncertainty in Seasonality: Considering Samples from 6 Months Data
mr = Prophet(mcmc_samples=300)
forecast_covid19recoveredcases_seasonaluncertainty = mr.fit(forecast_covid19recoveredcases).predict(covid19recoveredcases)

fig = mr.plot_components(forecast_covid19recoveredcases_seasonaluncertainty)

#fig_confirmed = m.plot(forecast, uncertainty=False, xlabel = 'Dates', ylabel = 'Confirmed Cases')
#plt.title('Forecasting of Confirmed Cases')


## Fourier Order In Seasonalities For Recovered Cases
The default seasonality is 10. However to fit higher frequency changes the variable can be increased, depending on the frequnecy of the data reporting.

In [None]:
from fbprophet.plot import plot_yearly
mr = Prophet(yearly_seasonality=40).fit(covid19recoveredcases)
a = plot_yearly(mr)

### Weekly & Monthly Seasonality with Fourier Transform
Considering both the Weekly as well as Monthly Seasonality in the data to make sure we consider all the uncertainties in the data.

In [None]:
# Considering Weekly & Monthly Seasonality
mr = Prophet(weekly_seasonality=True)
mr.add_seasonality(name='monthly', period=30.5, fourier_order=6)
forecast = mr.fit(covid19recoveredcases).predict(forecast_covid19recoveredcases_trenduncertainty)
fig = mr.plot_components(forecast)

## Combined Forecasting for Confirmed, Death & Recovered Cases

In [None]:
confirmedcases_names = ['confirmedcases_%s' % column for column in forecast_covid19confirmedcases.columns]
deathcases_names = ['deathcases_%s' % column for column in forecast_covid19deathcases.columns]
recoveredcases_names = ['recoveredcases_%s' % column for column in forecast_covid19recoveredcases.columns]

# Dataframes to merge
merge_confirmedcases_forecast = forecast_covid19confirmedcases.copy()
merge_deathcases_forecast = forecast_covid19deathcases.copy()
merge_recoveredcases_forecast = forecast_covid19recoveredcases.copy()

# Rename the columns
merge_confirmedcases_forecast.columns = confirmedcases_names
merge_deathcases_forecast.columns = deathcases_names
merge_recoveredcases_forecast.columns = recoveredcases_names

# Renaming the Date Columns
confirmedcases_forecast = merge_confirmedcases_forecast.rename(columns={'confirmedcases_ds': 'Date'})
deathcases_forecast = merge_deathcases_forecast.rename(columns={'deathcases_ds': 'Date'})
recoveredcases_forecast = merge_recoveredcases_forecast.rename(columns={'recoveredcases_ds': 'Date'})

# Dropping some of the Columns 
confirmedcases_forecast.drop(confirmedcases_forecast.iloc[:, 2:21], inplace = True, axis = 1)
deathcases_forecast.drop(deathcases_forecast.iloc[:, 2:21], inplace = True, axis = 1)
recoveredcases_forecast.drop(recoveredcases_forecast.iloc[:, 2:21], inplace = True, axis = 1)

#confirmedcases_forecast.head(10)
deathcases_forecast.tail(1000)
recoveredcases_forecast.tail(10)

## Comparing Trend & Prediction Value for Confirmed Cases

In [None]:
from matplotlib import pyplot as plt
plt.style.use('fivethirtyeight')

plt.figure(figsize=(10, 8))
plt.plot(confirmedcases_forecast['Date'], confirmedcases_forecast['confirmedcases_y'], 'b-', label = 'Trend')
plt.plot(confirmedcases_forecast['Date'], confirmedcases_forecast['confirmedcases_yhat'], 'r-', label = 'Prediction')
plt.legend(); plt.xlabel('Date'); plt.ylabel('Confirmed Cases'); 
plt.title('Trend vs. Prediction for Confirmed Cases'); 

## Comparing Trend and Prediction Value for Death Cases

In [None]:
plt.figure(figsize=(10, 8))
plt.plot(deathcases_forecast['Date'], deathcases_forecast['deathcases_y'], 'b-', label = 'Trend')
plt.plot(deathcases_forecast['Date'], deathcases_forecast['deathcases_yhat'], 'r-', label = 'Prediction')
plt.legend(); plt.xlabel('Date'); plt.ylabel('Death Cases'); 
plt.title('Trend vs. Prediction for Death Cases'); 

## Comparing Trend and Prediction Value for Recovered Cases 

In [None]:
plt.figure(figsize=(10, 8))
plt.plot(recoveredcases_forecast['Date'], recoveredcases_forecast['recoveredcases_y'], 'b-', label = 'Trend')
plt.plot(recoveredcases_forecast['Date'], recoveredcases_forecast['recoveredcases_yhat'], 'r-', label = 'Prediction')
plt.legend(); plt.xlabel('Date'); plt.ylabel('Recovered Cases'); 
plt.title('Trend vs. Prediction for Recovered Cases'); 

# Cross Validation

Cross Validation just automates the process of Model Performance. The first parameter given is the trained model m (not the data). Then the next parameter prediction horizon - how frequently we want to predict (in this case '30 days'). Then give an ***initial*** (how long to train before starting the tests) and a ***period*** (how frequently to stop and do a prediction). If we don't provide these parameter values, Prophet will assign defaults of **initial = 3 * horizon, and cutoffs every half a horizon**.

 ## 1. Model Cross Validation for Confirmed Cases

In [None]:
from fbprophet.diagnostics import cross_validation
dfconfirmed = cross_validation(mc, initial='60 days', period='7 days', horizon='30 days') # 30, 30, 100; 60, 30, 100; 60, 30, 200; 60, 90, 100 less error
#df_cv.head()

## Performance Evaluation

The performance_metrics utility can be used to compute some useful statistics of the prediction performance (yhat, yhat_lower, and yhat_upper compared to y), as a function of the distance from the cutoff (how far into the future the prediction was). The statistics computed are mean squared error (MSE), root mean squared error (RMSE), mean absolute error (MAE), mean absolute percent error (MAPE), and coverage of the yhat_lower and yhat_upper estimates. These are computed on a rolling window of the predictions in df after sorting by horizon (ds minus cutoff). 

In [None]:
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import plot_cross_validation_metric

df_pconfirmed = performance_metrics(dfconfirmed, rolling_window = 0.1)
df_pconfirmed.head()

#performance_metrics(df, metrics = NULL, rolling_window = 0.1) https://rdrr.io/cran/prophet/man/performance_metrics.html

## Error Visualization

Cross Validation Performance Metrics can be visualized with plot_cross_validation_metric, here shown for MSE, RMSE, MAE, MAPE. The blue line shows the MAPE, where the mean is taken over a rolling window of the dots. There is almost zero errors uptil 1 month, and then seems like the error would gradually increase with the number of days. The *MAPE* measures the deviation from the actual data in terms of percentage. 
*RMSE* is more accurate in terms of aggressive towards big errors.

In [None]:
# With RMSE
from fbprophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(dfconfirmed, metric='rmse')

# With MAPE
from fbprophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(dfconfirmed, metric='mape')

## 2. Model Cross Validation for Death Cases

In [None]:
from fbprophet.diagnostics import cross_validation
dfdeath = cross_validation(md, initial='60 days', period='7 days', horizon='30 days') # 30, 30, 100; 60, 30, 100; 60, 30, 200; 60, 90, 100 less error
#df_cv.head()

## Performance Evaluation

In [None]:
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import plot_cross_validation_metric

df_pdeath = performance_metrics(dfdeath, rolling_window = 0.1)
df_pdeath.head()

## Error Visualization

In [None]:
# With RMSE
from fbprophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(dfdeath, metric='rmse')

# With MAPE
from fbprophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(dfdeath, metric='mape')

## 3. Model Cross Validation for Recovered Cases

In [None]:
from fbprophet.diagnostics import cross_validation
dfrecovered = cross_validation(mr, initial='60 days', period='7 days', horizon='30 days') # 30, 30, 100; 60, 30, 100; 60, 30, 200; 60, 90, 100 less error
#df_cv.head()

## Performance Evaluation

In [None]:
from fbprophet.diagnostics import performance_metrics
from fbprophet.plot import plot_cross_validation_metric

df_precovered = performance_metrics(dfrecovered, rolling_window = 0.1)
df_precovered.head()

## Error Visualization

In [None]:
# With RMSE
from fbprophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(dfrecovered, metric='rmse')

# With MDAPE (Median Abs Percentage Error)
from fbprophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(dfrecovered, metric='mdape')