# Forecasting COVID-19 data with Facebook Prophet and Neural Prophet

In [None]:
import warnings
warnings.filterwarnings('ignore')
import math
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from sysidentpy.metrics import root_relative_squared_error

from prophet import Prophet
from neuralprophet import NeuralProphet

### Forecasting for different types of cases for selected countries

In [None]:
countries = {"Germany": "de", "Spain": "es", "France": "fr", "India": "in", "Italy": "it", "Russia": "ru", "Turkey": "tr", "United States": "us"}
country_list = list(countries.keys())
country_iso_code_list = list(countries.values())
data_type_list = ["confirmed_cases", "recovered_cases", "death_cases", "active_cases"]

In [8]:
def forecasting(data_format, country, country_code, data_type, start_date, end_date, split_date, results_table):
    df = pd.read_csv("./data/" + data_format + "/" + country_code + "_" + data_type + ".csv")
    df["ds"] = pd.to_datetime(df["ds"])
    period = (df['ds'] >= start_date) & (df['ds'] <= end_date)
    df = df.loc[period]

    # create train and test data samples
    train = df.loc[df["ds"] < split_date]
    test = df.loc[df["ds"] >= split_date]

    # instantiate a new Prophet model and add built-in holidays
    model = Prophet(n_changepoints=10, changepoint_prior_scale=10, yearly_seasonality=False, weekly_seasonality=True,
                    daily_seasonality=False)
    if (country == "United States"):
        model.add_country_holidays(country_name="US")
    else:
        model.add_country_holidays(country_name=country)

    # fit the model
    model.fit(train)

    # predict and plot
    future = model.make_future_dataframe(periods=len(test))
    forecast = model.predict(future)
    fig = model.plot(forecast, figsize=(25, 10))
    fig.show()
    fig.savefig("./data/figures/" + data_format + "/fbp_" + country.lower() + "_" + data_type + "_forecast.png")
    model.plot_components(forecast)

    plt.figure(figsize=(25, 10))
    plt.plot(df.groupby("ds")["y"].sum(), label="Actual")
    plt.plot(forecast.groupby("ds")["yhat"].sum(), label="Predicted")
    plt.legend()
    plt.savefig("./data/figures/" + data_format + "/fbp_" + country.lower() + "_" + data_type + "_actual_vs_predicted.png")
    plt.show()

    # model evaluation - measure performance of model
    MAE = mean_absolute_error(y_true=test["y"], y_pred=forecast["yhat"].tail(len(test)))
    RMSE = math.sqrt(mean_squared_error(y_true=test["y"], y_pred=forecast["yhat"].tail(len(test))))
    RRSE = root_relative_squared_error(np.array(test["y"]), np.array(forecast["yhat"].tail(len(test))))
    MAPE = mean_absolute_percentage_error(y_true=test["y"], y_pred=forecast["yhat"].tail(len(test))) * 100

    # create results table
    res = pd.DataFrame({"Country": country, "Model": "Facebook Prophet", "MAE": round(MAE, 3), "RMSE": round(RMSE, 3),
                        "RRSE": round(RRSE, 3), "MAPE": round(MAPE, 3)}, index=[len(results_table)])
    results_table = pd.concat([results_table, res])

    # instantiate a new Neural Prophet model and add built-in holidays
    model = NeuralProphet(yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=False)
    if (country == "United States"):
        model.add_country_holidays(country_name="US")
    else:
        model.add_country_holidays(country_name=country)

    # fit the model
    model.fit(train)

    # predict and plot
    future = model.make_future_dataframe(df=train, periods=len(test), n_historic_predictions=True)
    forecast = model.predict(future)
    fig = model.plot(forecast, figsize=(25, 10))
    fig.show()
    fig.savefig("./data/figures/" + data_format + "/nnp_" + country.lower() + "_" + data_type + "_forecast.png")
    model.plot_components(forecast)

    plt.figure(figsize=(25, 10))
    plt.plot(df.groupby("ds")["y"].sum(), label="Actual")
    plt.plot(forecast.groupby("ds")["yhat1"].sum(), label="Predicted")
    plt.legend()
    plt.savefig("./data/figures/" + data_format + "/nnp" + country.lower() + "_" + data_type + "_actual_vs_predicted.png")
    plt.show()

    # model evaluation - measure performance of model
    MAE = mean_absolute_error(y_true=test["y"], y_pred=forecast["yhat1"].tail(len(test)))
    RMSE = math.sqrt(mean_squared_error(y_true=test["y"], y_pred=forecast["yhat1"].tail(len(test))))
    RRSE = root_relative_squared_error(np.array(test["y"]), np.array(forecast["yhat1"].tail(len(test))))
    MAPE = mean_absolute_percentage_error(y_true=test["y"], y_pred=forecast["yhat1"].tail(len(test))) * 100

    # create results table
    res = pd.DataFrame({"Country": country, "Model": "Neural Prophet", "MAE": round(MAE, 3), "RMSE": round(RMSE, 3),
                        "RRSE": round(RRSE, 3), "MAPE": round(MAPE, 3)}, index=[len(results_table)])
    results_table = pd.concat([results_table, res])

    return results_table

In [9]:
def run_forecast(data_format, dt_id):
    forecasting_results = pd.DataFrame(columns=["Country", "Model", "MAE", "RMSE", "RRSE", "MAPE"])

    # US
    us_start_date = "2020-01-27"
    forecasting_results = forecasting(data_format, country_list[7], country_iso_code_list[7], data_type_list[dt_id], us_start_date,
                                      "2020-05-20", "2020-05-01", forecasting_results)

    # ES
    es_start_date = "2020-02-06"
    forecasting_results = forecasting(data_format, country_list[1], country_iso_code_list[1], data_type_list[dt_id], es_start_date,
                                      "2020-05-20", "2020-05-01", forecasting_results)

    # IT
    it_start_date = "2020-02-05"
    forecasting_results = forecasting(data_format, country_list[4], country_iso_code_list[4], data_type_list[dt_id], it_start_date,
                                      "2020-05-20", "2020-05-01", forecasting_results)

    # FR
    fr_start_date = "2020-01-29"
    forecasting_results = forecasting(data_format, country_list[2], country_iso_code_list[2], data_type_list[dt_id], fr_start_date,
                                      "2020-05-20", "2020-05-01", forecasting_results)

    # DE
    de_start_date = "2020-02-01"
    forecasting_results = forecasting(data_format, country_list[0], country_iso_code_list[0], data_type_list[dt_id], de_start_date,
                                      "2020-05-20", "2020-05-01", forecasting_results)

    # RU
    ru_start_date = "2020-02-05"
    forecasting_results = forecasting(data_format, country_list[5], country_iso_code_list[5], data_type_list[0], ru_start_date,
                                      "2020-05-20", "2020-05-01", forecasting_results)

    # TR
    tr_start_date = "2020-03-16"
    forecasting_results = forecasting(data_format, country_list[6], country_iso_code_list[6], data_type_list[dt_id], tr_start_date,
                                      "2020-05-20", "2020-05-01", forecasting_results)

    # IN
    in_start_date = "2020-02-04"
    forecasting_results = forecasting(data_format, country_list[3], country_iso_code_list[3], data_type_list[dt_id], in_start_date,
                                      "2020-05-20", "2020-05-01", forecasting_results)

    forecasting_results.to_csv(
        "./data/results/performance_forecasting_results_" +
        data_type_list[dt_id] + "_" + data_format + ".csv",
        index=False)

## Forecasting daily data for confirmed cases

In [None]:
run_forecast("daily_data", 0)

## Forecasting daily data for death cases

In [None]:
run_forecast("daily_data", 2)

## Forecasting cumulative data for confirmed cases

In [None]:
run_forecast("cumulative_data", 0)

## Forecasting cumulative data for recovered cases

In [None]:
run_forecast("cumulative_data", 1)

## Forecasting cumulative data for death cases

In [None]:
run_forecast("cumulative_data", 2)

## Forecasting cumulative data for active cases


In [None]:
run_forecast("cumulative_data", 3)