# Time Series Forecasting with PyCaret

**This notebook shows how easy it is to forecast using pycaret's time series module. For the purpose of this demonstration, we will assume that we need to forecast the web traffic for the next 4 weeks (28 days).**

## Install and import libraries

In [None]:
# !pip install pycaret-ts-alpha

In [None]:
# Import libraries
import logging
import pandas as pd
from pycaret.datasets import get_data
from pycaret.internal.pycaret_experiment import TimeSeriesExperiment
from sktime.utils.plotting import plot_series

In [None]:
# Import data
train1 = pd.read_csv("../input/web-traffic-time-series-forecasting/train_1.csv.zip", parse_dates=True)

## Prepare for modeling

In [None]:
## Drop unnexessary columns (or store a mapping of page name to index for later)
train1.drop(columns=["Page"], inplace=True)

## Drop NA values
train1.dropna(inplace=True)

## Reset Index
train1.reset_index(inplace=True, drop=True)
train1.index.name = 'index'

train1.head()

In [None]:
n_ts = 10  # Number of time series to build
fh = 28  ## 4 weeks

### Group data so we can use apply function (and in the future use PySpark UDF to parallel process all time series)

In [None]:
#subset = train1.sample(n_ts, random_state=101)
subset = train1.head(n_ts)
grouped_data = subset.groupby("index")

In [None]:
def basic_blend_workflow(y: pd.Series, fh: int, session_id: int=42):
    """
    A basic PyCaret Time Series Workflow that trains, tunes,
    and blends the three best models for a single dataset
    """
    try:
        # Setup Experiment (forecast horizon, cross-validation folds, etc.)
        exp = TimeSeriesExperiment()
        exp.setup(data=y, fh=fh, session_id=session_id, experiment_name=str(y.name))
        print(f"Time Series: {y.name} >> Seasonality Present: {exp.seasonality_present} | Seasonal Period: {exp.seasonal_period}")
        
        # Build multiple models >> then select, tune and blend the best models
        best_baseline_models = exp.compare_models(n_select=3, sort='MAE')
        best_tuned_models = [exp.tune_model(model) for model in best_baseline_models]
        mean_blender = exp.blend_models(best_tuned_models, method='mean')

        # Test Set Predictions
        y_pred = exp.predict_model(mean_blender)

        # Finalize Model
        final_model = exp.finalize_model(mean_blender)
    except Exception as e:
        print(f"Exception occured for '{y.name}': {e}")        
        exp, final_model = None, None
        
    return (exp, final_model) 

In [None]:
def transform_y(y):
    """
    Input y is a row of a dataframe.
      - Index is the name of the time series
      - Columns are the data points
    
    This function will convert it into a series
    and set the index appropriately for modeling
    """
    y = y.T 
    y.index = pd.to_datetime(y.index)
    
    # Convert to series
    if isinstance(y, pd.DataFrame):
        name = y.columns[0]
        y = y[name]
    return y

def run_basic_workflow(group, fh):
    """
    Run the Time Series Modeling Workflow for a single time series
    """
    # Transform dataset into correct format for modeling
    y = transform_y(group)
            
    # Run workflow
    exp, final_model = basic_blend_workflow(y=y, fh=fh, session_id=42)
    
    return exp, final_model

In [None]:
workflow_out_all = grouped_data.apply(run_basic_workflow, fh)

In [None]:
def plot_prediction(row):
    exp = row[0]
    final_model = row[1]
    
    if exp is not None:
        # From globals
        y = train1.loc[int(exp.exp_name_log)]
        y = transform_y(y)
        
        # Future Predictions
        y_pred_future = exp.predict_model(final_model, verbose=False)
        y_pred_future.index = y_pred_future.index.to_timestamp()
        
        # Plot Results
        plot_series(y[-100:], y_pred_future[0], labels=['All', 'Future Predictons'])
    else:
        logging.warning(f"Model not available")
        print(f"Model not available")

In [None]:
_ = workflow_out_all.apply(plot_prediction)