# rstudio-conf-2020 / time-series-forecasting

Removed unnecessary blank lines

robjhyndman committed Jan 14, 2020
1 parent 85e34a1 commit fddf51e833248609fba2c2250db93d6214eb19dc
 @@ -123,7 +123,6 @@ install.packages(c( \vspace*{.6cm}\begin{alertblock}{}{\centerline{\Large\textbf{\url{bit.ly/fable2020}}}} \end{alertblock} ## Find me at ... \Large\vspace*{2.5cm} \begin{flushright} @@ -138,6 +137,3 @@ install.packages(c( \end{flushright}
 @@ -264,9 +264,6 @@ where * $\bm{b}_t$ is a vector of the most disaggregated series at time $t$ * $\bS$ is a summing matrix'' containing the aggregation constraints. ## Hierarchical time series \begin{minipage}{4cm}\vspace*{0.2cm} @@ -438,7 +435,6 @@ fc %>% autoplot(tourism_agg, level = 95)  ## Example: Australian tourism \fontsize{12}{12.5}\sf
 @@ -172,7 +172,6 @@ prettify(p1, 2. Produce a calendar plot for the pedestrian data from one location and one year. # Seasonal or cyclic? ## Time series patterns @@ -321,7 +320,6 @@ new_production %>% \vspace*{10cm} ## ACF {r, fig.height=4, echo=TRUE} @@ -346,7 +344,6 @@ holidays %>% autoplot()  ## Trend and seasonality in ACF plots - When data have a trend, the autocorrelations for small lags tend to be large and positive. @@ -481,7 +478,6 @@ wn %>% ACF(y) \fontsize{10}{11}\sf\tabcolsep=0.1cm {r, echo=FALSE} wn %>% ACF(y, lag_max = 10) %>% @@ -549,12 +545,10 @@ These show the series is **not a white noise series**. # Lab Session 5 ## Lab Session 5 You can compute the daily changes in the Google stock price in 2018 using \fontsize{10.5}{13}\sf {r, eval = FALSE} dgoog <- gafa_stock %>% filter(Symbol == "GOOG", year(Date) >= 2018) %>%
 @@ -47,7 +47,6 @@ global_economy %>% Consider the GDP information in global_economy. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time? # Inflation adjustments ## Inflation adjustments @@ -73,7 +72,6 @@ print_retail %>% ggtitle("Turnover: Australian print media industry")  ## Inflation adjustments \fontsize{10}{10}\sf @@ -205,7 +203,6 @@ food %>% \fontsize{13}{15}\sf {r food-lambda, echo=TRUE} food %>% features(Turnover, features = guerrero) @@ -217,7 +214,6 @@ food %>% * Always check the results. * A low value of $\lambda$ can give extremely large prediction intervals. ## Box-Cox transformations \fontsize{13}{14}\sf
 @@ -23,7 +23,6 @@ library(purrr) elecequip <- as_tsibble(fpp2::elecequip)  # Time series decompositions ## Time series decomposition @@ -44,7 +43,6 @@ where & $y_t=$ & data at period $t$ \\ & $R_t=$ & remainder component at period $t$ \end{tabular} ## STL decomposition \fontsize{13}{14}\sf @@ -59,7 +57,6 @@ where & $y_t=$ & data at period $t$ \\ * Take logs to get multiplicative decomposition. * Use Box-Cox transformations to get other decompositions. ## Decomposition dable \fontsize{10}{11}\sf
 @@ -86,7 +86,6 @@ tourism %>% facet_grid(vars(State, Region, Purpose))  ## Feature extraction and statistics \fontsize{9}{9}\sf @@ -114,7 +113,6 @@ tourism %>% * Use GGally::ggpairs() to look at the relationships between the STL-based features. You might wish to change seasonal_peak_year and seasonal_trough_year to factors. * Which is the peak quarter for holidays in each state? ## Feature extraction and statistics \fontsize{9}{10}\sf @@ -184,7 +182,6 @@ All features from the feasts package \end{alertblock} \end{textblock} ## Feature extraction and statistics \fontsize{9}{9}\sf @@ -222,11 +219,9 @@ pcs %>% ggplot(aes(x=.fittedPC1, y=.fittedPC2)) + \placefig{4}{2.6}{height=6.4cm, width=12cm}{pca1} \vspace*{10cm} ## Feature extraction and statistics \fontsize{9}{9}\sf \begin{textblock}{3.3}(.4,3) \begin{alertblock}{}\fontsize{10}{12}\sf Principal components based on all features from the feasts package @@ -241,11 +236,9 @@ pcs %>% ggplot(aes(x=.fittedPC1, y=.fittedPC2, col=State)) + \placefig{4}{2.6}{height=6.4cm, width=12cm}{pca2} \vspace*{10cm} ## Feature extraction and statistics \fontsize{9}{9}\sf \begin{textblock}{3.3}(.4,3) \begin{alertblock}{}\fontsize{10}{12}\sf Principal components based on all features from the feasts package @@ -261,7 +254,6 @@ pcs %>% ggplot(aes(x=.fittedPC1, y=.fittedPC2, col=Purpose)) + \only<2>{\placefig{4}{2.6}{height=6.4cm, width=12cm}{pca4}} \vspace*{10cm} ## Feature extraction and statistics \fontsize{8}{8}\sf
 @@ -510,7 +510,6 @@ augment(fit) %>% \vspace*{-0.3cm} {r dj9, echo=TRUE} # lag=h and dof=K augment(fit) %>%
 @@ -31,7 +31,6 @@ austa <- as_tsibble(fpp2::austa) %>% rename(Year = index, Visitors = value)  # Exponential smoothing ## Pharmaceutical Benefits Scheme @@ -58,7 +57,6 @@ austa <- as_tsibble(fpp2::austa) %>% * Although monthly data available for 10 years, data are aggregated to annual values, and only the first three years are used in estimating the forecasts. * All forecasts being done with the \texttt{FORECAST} function in MS-Excel! ## Historical perspective * Developed in the 1950s and 1960s as methods (algorithms) to produce point forecasts. @@ -67,8 +65,6 @@ austa <- as_tsibble(fpp2::austa) %>% * Need to choose best values for the smoothing parameters (and initial states). * Equivalent ETS state space models developed in the 1990s and 2000s. ## A model for levels, trends, and seasonalities \fontsize{13}{14}\sf @@ -306,7 +302,6 @@ fit <- holidays %>% model(ets = ETS(Trips)) fit  ## Example: Australian holiday tourism \fontsize{9}{10}\sf @@ -450,7 +445,6 @@ Find an ETS model for the Gas data from aus_production. * Why is multiplicative seasonality necessary here? * Experiment with making the trend damped. Does it improve the forecasts? # Non-Gaussian forecast distributions ## Non-Gaussian forecast distributions
 @@ -34,7 +34,6 @@ austa <- as_tsibble(fpp2::austa) %>% rename(Year = index, Visitors = value)  # ARIMA models ## ARIMA models @@ -130,7 +129,6 @@ p2 <- tsibble(idx = seq_len(100), sim = arima.sim(list(ma = c(-1, +0.8)), n = 10 gridExtra::grid.arrange(p1, p2, nrow = 1)  ## ARIMA models \begin{block}{Autoregressive Moving Average models:}\vspace*{-0.4cm} @@ -190,7 +188,6 @@ fit %>% \end{textblock}} \vspace*{3cm} ## Understanding ARIMA models \fontsize{14}{16}\sf @@ -209,7 +206,6 @@ fit %>% * The higher the value of $d$, the more rapidly the prediction intervals increase in size. * For $d=0$, the long-term forecast standard deviation will go to the standard deviation of the historical data. ## Example: National populations \fontsize{9}{9}\sf
 @@ -377,7 +377,6 @@ forecast(fit, h = "3 years") %>% autoplot(gasoline)  # Lab Session 19 ## Lab Session 19 @@ -451,7 +450,6 @@ insurance %>% labs(title = "Insurance advertising and quotations")  ## Example: Insurance quotes and TV adverts \fontsize{10}{10}\sf @@ -461,14 +459,14 @@ fit <- insurance %>% mutate(Quotes = c(NA, NA, NA, Quotes[4:40])) %>% model( ARIMA(Quotes ~ pdq(d = 0) + TV.advert), ARIMA(Quotes ~ pdq(d = 0) + TV.advert + ARIMA(Quotes ~ pdq(d = 0) + TV.advert + lag(TV.advert)), ARIMA(Quotes ~ pdq(d = 0) + TV.advert + ARIMA(Quotes ~ pdq(d = 0) + TV.advert + lag(TV.advert) + lag(TV.advert, 2)), ARIMA(Quotes ~ pdq(d = 0) + TV.advert + ARIMA(Quotes ~ pdq(d = 0) + TV.advert + lag(TV.advert) + lag(TV.advert, 2) + lag(TV.advert, 2) + lag(TV.advert, 3)) ) 
 @@ -22,7 +22,6 @@ state_tourism <- mytourism %>% summarise(Trips = sum(Trips)) %>% ungroup() # Lab Session 2 aus_production %>% autoplot(Bricks) @@ -47,7 +46,6 @@ snowy %>% autoplot(Trips) snowy %>% gg_season(Trips) snowy %>% gg_subseries(Trips) # Lab Session 4 aus_production %>% gg_lag(Bricks) @@ -120,10 +118,9 @@ global_economy %>% holidays %>% model(STL(Trips ~ season(window = 13) + trend(window = 21))) %>% components() %>% components() %>% autoplot() # Lab Session 7 global_economy %>% @@ -147,7 +144,7 @@ aus_production %>% canadian_gas %>% model(STL(Volume ~ season(window=7) + trend(window=11))) %>% components() %>% components() %>% autoplot() ## Changing the size of the windows changes the trend and seasonal components @@ -156,12 +153,12 @@ canadian_gas %>% canadian_gas %>% model(STL(Volume ~ season(window=7) + trend(window=11))) %>% components() %>% components() %>% gg_season(season_year) canadian_gas %>% model(STL(Volume ~ season(window=7) + trend(window=11))) %>% components() %>% components() %>% select(index, season_adjust) %>% autoplot(season_adjust) @@ -183,7 +180,6 @@ tourism %>% features(Trips, feat_stl) %>% select(State, seasonal_peak_year) # Lab Session 10 ## Two series have all zeros, so we will drop these to avoid problems in the later calculations @@ -253,7 +249,6 @@ augment(beer_model) %>% # Lab Session 13 hh_budget_train <- hh_budget %>% filter(Year <= max(Year) - 4) @@ -288,11 +283,11 @@ aus_takeaway_forecast %>% # Lab Session 14 global_economy %>% filter(Country == "China") %>% filter(Country == "China") %>% autoplot(GDP) global_economy %>% filter(Country == "China") %>% filter(Country == "China") %>% model( ets = ETS(GDP), ets_damped = ETS(GDP ~ trend("Ad")), @@ -331,7 +326,6 @@ us_gdp_model %>% forecast(h = "10 years") %>% autoplot(us_gdp) # Lab Session 17 tourism_models <- tourism %>%