* Install greykite.

In [1]:
!pip install greykite



In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

plt.style.use('seaborn-whitegrid')
plt.rcParams["font.family"] = "Times New Roman"
plt.rcParams["font.size"] = "17"

* Load up the COVID cases from the *Our World in Data dataset*.

In [3]:
import pandas as pd

owid_covid = pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")
owid_covid["date"] = pd.to_datetime(owid_covid["date"])

In [4]:
owid_covid.location.unique()

array(['Afghanistan', 'Africa', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Anguilla', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Asia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bolivia', 'Bonaire Sint Eustatius and Saba',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British Virgin Islands', 'Brunei', 'Bulgaria', 'Burkina Faso',
       'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde',
       'Cayman Islands', 'Central African Republic', 'Chad', 'Chile',
       'China', 'Colombia', 'Comoros', 'Congo', 'Cook Islands',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Curacao',
       'Cyprus', 'Czechia', 'Democratic Republic of Congo', 'Denmark',
       'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Eswatini', 'Ethi

In [5]:
owid_covid.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,new_people_vaccinated_smoothed,new_people_vaccinated_smoothed_per_hundred,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-02-24,5.0,5.0,,,,,0.126,0.126,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,39835428.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,,,,
1,AFG,Asia,Afghanistan,2020-02-25,5.0,0.0,,,,,0.126,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,39835428.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,,,,
2,AFG,Asia,Afghanistan,2020-02-26,5.0,0.0,,,,,0.126,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,39835428.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,,,,
3,AFG,Asia,Afghanistan,2020-02-27,5.0,0.0,,,,,0.126,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,39835428.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,,,,
4,AFG,Asia,Afghanistan,2020-02-28,5.0,0.0,,,,,0.126,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,39835428.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,,,,


* Concentrate on cases in France.

In [6]:
df = owid_covid[owid_covid.location == "France"].set_index("date", drop=True).resample('D').interpolate(method='linear')

* Set up the Greykite metadata parameters.
* Then pass this object into the forecaster configuration.
* Time column is date and value column is new_cases.

In [7]:
from greykite.framework.templates.autogen.forecast_config import (
    ForecastConfig, MetadataParam
)

metadata = MetadataParam(
    time_col="date",
    value_col="new_cases",
    freq="D"
)

* Create the *forecaster* object, which creates forecasts and stores the result.
* The forecast horizon is 90 days.
* Prediction interval is 95%.
* Silverkite and Prophet support quantifying uncertainty by means of prediction intervals.
* A coverage of 95% means that 95% of actuals should fall within the prediction interval.
* In Greykite, the _components.uncertainty model
provides additional configuration options about uncertainty.
* Add a line to ignore warnings of the UserWarning type during training since
otherwise, there are about 500 lines of warnings about 0s in the target column.

In [8]:
import warnings
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum

forecaster = Forecaster()

warnings.filterwarnings("ignore", category=UserWarning)
result = forecaster.run_forecast_config(
    df=df.reset_index(),
    config=ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE_DAILY_90.name,
        forecast_horizon=90,
        coverage=0.95,
        metadata_param=metadata,
    )
)

Fitting 3 folds for each of 4 candidates, totalling 12 fits



Input data has many null values. Missing 11.81% of one input.



* Plot the original time-series from the result object and overlay forecasts.

In [9]:
forecast = result.forecast
forecast.plot().show(renderer="colab")

In [10]:
import warnings
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum

forecaster = Forecaster()

warnings.filterwarnings("ignore", category=UserWarning)
result = forecaster.run_forecast_config(
    df=df.reset_index(),
    config=ForecastConfig(
        model_template=ModelTemplateEnum.PROPHET.name,
        forecast_horizon=90,
        coverage=0.95,
        metadata_param=metadata,
    )
)

Fitting 3 folds for each of 1 candidates, totalling 3 fits


INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seaso

In [11]:
forecast = result.forecast
forecast.plot().show(renderer="colab")

* The forecasts are in the df attribute of the forecast object.
* These are the upper and lower confidence intervals of the forecasts.

In [12]:
forecast.df.head().round(2)

Unnamed: 0,date,actual,forecast,forecast_lower,forecast_upper
0,2020-01-24,2.0,-3186.85,-22125.27,19119.82
1,2020-01-25,1.0,-4787.56,-24642.18,14971.81
2,2020-01-26,0.0,-1328.79,-20504.75,18990.13
3,2020-01-27,0.0,-3555.04,-24058.34,16084.77
4,2020-01-28,1.0,-1900.9,-18927.71,17966.04


* Get some performance metrics for the model. 
* Get the performance of the historical forecast on the holdout test set like this.

In [13]:
from collections import defaultdict

backtest = result.backtest
backtest_eval = defaultdict(list)
for metric, value in backtest.train_evaluation.items():
    backtest_eval[metric].append(value)
    backtest_eval[metric].append(backtest.test_evaluation[metric])
metrics = pd.DataFrame(backtest_eval, index=["train", "test"]).T
metrics.head()

Unnamed: 0,train,test
CORR,0.773769,-0.165522
R2,0.598425,-5.09006
MSE,97298800.0,360701000.0
RMSE,9864.02,18992.1
MAE,5525.61,11415.1


* Apply the model conveniently to new data like this.

In [14]:
model = result.model
future_df = result.timeseries.make_future_dataframe(
    periods=4,
    include_history=False
)
model.predict(future_df)


Input data has many null values. Missing 100.00% of one input.



Unnamed: 0,ts,forecast,forecast_lower,forecast_upper
0,2021-11-26,-1818.611183,-21471.107765,17516.099151
1,2021-11-27,2259.754344,-17192.370775,20757.379287
2,2021-11-28,554.046881,-20199.848111,20381.536952
3,2021-11-29,-11909.242584,-30115.665114,8347.185114


In [15]:
model.predict(future_df)


Input data has many null values. Missing 100.00% of one input.



Unnamed: 0,ts,forecast,forecast_lower,forecast_upper
0,2021-11-26,-1818.611183,-21471.107765,17516.099151
1,2021-11-27,2259.754344,-17192.370775,20757.379287
2,2021-11-28,554.046881,-20199.848111,20381.536952
3,2021-11-29,-11909.242584,-30115.665114,8347.185114
