# Multiple Time Series Forecasting Using Prophet

Prophet is an open source library developed by Facebook which aims to make time-series forecasting easy and scalable. It is a type of a generalized additive model (GAM), which uses regression model with potentially non-linear smoothers. It is called additive because it addes multiple decomposed parts to explain some trends. For example, Prophet uses the following components: 

$$ y(t) = g(t) + s(t) + h(t) + e(t) $$

where,  
$g(t)$: Growth. Big trend. Non-periodic changes.   
$s(t)$: Sesonality. Periodic changes (e.g. weekly, yearly, etc.) represented by Fourier Series.  
$h(t)$: Holiday effect that represents irregular schedules.   
$e(t)$: Error. Any idiosyncratic changes not explained by the model. 

In [None]:
# Prophet model for time series forecast
from prophet import Prophet

# Data processing
import numpy as np
import pandas as pd

# Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Multi-processing
from multiprocessing import Pool, cpu_count# Spark
from tqdm import tqdm# Tracking time
from time import time

# import the math module 
import math 

# Model performance evaluation
import sklearn 

In [None]:
# you need to change the file path
data_path = "../../../data/raw/Time_Series_Merchants_Transactions_Anonymized.csv"
df_merchant_transactions = pd.read_csv(data_path)
ticker_list = df_merchant_transactions.iloc[:,0].values
df_merchant_transactions = df_merchant_transactions.drop(columns='Merchant Name')
ticker_list 

In [None]:
# replacing columns names with standard date format
stddates = pd.date_range(start='2020-08', end='2022-10', freq="M")
df_merchant_transactions = pd.DataFrame(df_merchant_transactions.values)
df_merchant_transactions = df_merchant_transactions.T
stddates

In [None]:
data = pd.DataFrame(df_merchant_transactions.values)
# Change variable names
data.columns = ticker_list #merchant_names
# Take a look at the data
data.info()

In [None]:
# Using DataFrame.insert() to add a column
data.insert(0, "ds", stddates, True)
data.head()

In [None]:
# Train test split the date need to be changed 
train_end_date = '2022-04-30'
train1 = data[data['ds'] <= train_end_date]
test1 = data[data['ds'] > train_end_date]

# Check the shape of the dataset
print(train1.shape)
print(test1.shape)

In [None]:
train1 = train1.reset_index()# Change data from the wide format to the long format
train = pd.melt(train1, id_vars='ds', value_vars=ticker_list)
train.columns = ['ds', 'ticker', 'y']
train.head()

In [None]:
# Release Date from the index
data = data.reset_index()# Change data from the wide format to the long format
df = pd.melt(data, id_vars='ds', value_vars=ticker_list)
df.columns = ['ds', 'ticker', 'y']
df.head()

In [None]:
# Check correlation
correlation = data.corrwith(data["Merchant 1"])
correlation

In [None]:
# Group the data by ticker
groups_by_ticker = train.groupby('ticker')# Check the groups in the dataframe
groups_by_ticker.groups.keys();

In [None]:
def train_and_forecast(group):  # Initiate the model
  m = Prophet(interval_width=0.95, seasonality_mode='multiplicative', weekly_seasonality=False, daily_seasonality=False)
  
  # Fit the model
  m.fit(group)  # Make predictions
  future = m.make_future_dataframe(test1.shape[0], freq='M')
  forecast = m.predict(future)[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
  # Visualize the forecast
  #m.plot(forecast)
  forecast['ticker'] = group['ticker'].iloc[0]
 
  # Return the forecasted results
  return forecast[['ds', 'ticker', 'yhat', 'yhat_upper', 'yhat_lower']]

In [None]:
# Start time
start_time = time()# Create an empty dataframe
for_loop_forecast = pd.DataFrame()# Loop through each ticker
for ticker in ticker_list:
  # Get the data for the ticker
  group = groups_by_ticker.get_group(ticker)  
  # Make forecast
  forecast = train_and_forecast(group)
  # Add the forecast results to the dataframe
  for_loop_forecast = pd.concat((for_loop_forecast, forecast))
    
print('The time used for the for-loop forecast is ', time()-start_time)# Take a look at the data
for_loop_forecast.head()

In [None]:
for_loop_forecast = for_loop_forecast.drop(columns = ['yhat_lower', 'yhat_upper'])
for_loop_forecast.head()

In [None]:
def rmse(data, forecast):
    # rmse = sqrt(sklearn.metrics.mean_squared_error(test[merchant_name].values, fitted.values))
    return math.sqrt(sklearn.metrics.mean_squared_error(data, forecast))

In [None]:
total_rmse = 0
rmse_list = []
for i in range(len(ticker_list)):
#for i in range(20):
    if i == 0:
        k = train1.shape[0]
        j = train1.shape[0] + test1.shape[0]
    elif i == 1:
        k = train1.shape[0] * 2
        j = (train1.shape[0] + test1.shape[0])*2
    else:
        k = (i+1) * train1.shape[0] - test1.shape[0]
        j = (i+1) * train1.shape[0] 
    # the total rmse error for all the merchants    
    total_rmse += rmse(df['y'].iloc[k:j], for_loop_forecast['yhat'].iloc[k:j])
    # rmse_list has the list of all the rmse errors for individual merchant forecast
    rmse_list.append(rmse(df['y'].iloc[k:j], for_loop_forecast['yhat'].iloc[k:j]))
print('total_rmse : ', total_rmse)

In [None]:
#rmse_list