# Time Series forecasting: Facebook Prophet

## Installation in python

[Get started](https://facebook.github.io/prophet/docs/installation.html) with Prophet

In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt

from prophet import Prophet

# sharper plots
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

Check missing values

In [None]:
import yfinance as yf

start_date = '2023-07-12'
end_date = '2025-06-11'

ticker = 'TSLA'
data = yf.download(ticker, start=start_date, end=end_date, interval='1d')
data.columns = data.columns.droplevel(1)
data.columns.name = None
data

In [None]:
data.info()

In [None]:
data.isnull().sum().any()

In [None]:
data.describe().T

### General Observations
1. The dataset contains 481 observations.
2.  The attributes are all numeric except for a date column that is used as the index.
3.  There are no missing values in the dataset.
4.  The variable Close can be used as a target for the models.

Note: If we had categorical columns, we would need to identify them and encode them or extract a feature from it

## Volume-Price Plot

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create a copy with resample data to a day
daily_data = data

fig = make_subplots(rows=2, cols=1, shared_xaxes=True, 
               vertical_spacing=0.03, subplot_titles=('Tesla', 'Volume'), 
               row_width=[0.2, 0.7])

fig.add_trace(go.Candlestick(
    x=daily_data.index, 
    open=daily_data["Open"], high=daily_data["High"],
    low=daily_data["Low"], close=daily_data["Close"], 
    name="Tesla"), row=1, col=1)

fig.add_trace(go.Bar(
    x=daily_data.index, 
    y=daily_data['Volume'], 
    showlegend=False), row=2, col=1)

fig.update_layout(
    yaxis_title='Price',
    shapes=[dict(
        x0=start_date, x1=end_date, y0=0, y1=1, xref='x', yref='paper',
        line_width=2
    )],
    xaxis_rangeslider_visible=False
)

fig.show()

## Plot Closing Price vs Moving Avergaes

In [None]:
import plotly.graph_objects as go

# Create plot with daily daya to gain clarity
daily_data['5d_sma'] = daily_data['Close'].rolling(window=5).mean().fillna(data['Close'])
daily_data['9d_sma'] = daily_data['Close'].rolling(window=9).mean().fillna(data['Close'])
daily_data['17d_sma'] = daily_data['Close'].rolling(window=17).mean().fillna(data['Close'])

fig = go.Figure()

# Close line
fig.add_trace(go.Scatter(
    x=daily_data.index, y=daily_data['Close'],
    mode='lines',
    name='Close',
    line=dict(width=2)
))

# SMA 5 days
fig.add_trace(go.Scatter(
    x=daily_data.index, y=daily_data['5d_sma'],
    mode='lines',
    name='5d SMA',
    line=dict(dash='dot')
))

# SMA 9 days
fig.add_trace(go.Scatter(
    x=daily_data.index, y=daily_data['9d_sma'],
    mode='lines',
    name='9d SMA',
    line=dict(dash='dash')
))

# SMA 17 days
fig.add_trace(go.Scatter(
    x=daily_data.index, y=daily_data['17d_sma'],
    mode='lines',
    name='17d SMA',
    line=dict(dash='dashdot')
))

# Layout
fig.update_layout(
    title='Tesla Close Price with Daily SMAs',
    xaxis_title='Date',
    yaxis_title='Price',
    legend=dict(x=0, y=1.1, orientation='h'),
    template='plotly_white'
)

fig.show()

## Bollinger Bands

**Bollinger Bands** are a popular technical analysis tool developed by John Bollinger. They consist of three lines: a simple moving average (SMA) in the middle, and two bands (upper and lower) plotted at a specified number of standard deviations above and below the SMA. 

These bands expand and contract based on market volatility. Traders use Bollinger Bands to identify overbought or oversold conditions, potential breakout opportunities, and to assess price volatility. When the price moves close to the upper band, the asset may be considered overbought; when it approaches the lower band, it may be considered oversold.

In [None]:
# Standard Bollinger Bands Parameters
window = 20  
num_std = 2  

# Central band (SMA)
data['bb_mid'] = data['Close'].rolling(window=window).mean()

# Rolling standard deviation
rolling_std = data['Close'].rolling(window=window).std()

# Upper band
data['bb_upper'] = data['bb_mid'] + num_std * rolling_std

# Lower band
data['bb_lower'] = data['bb_mid'] - num_std * rolling_std

In [None]:
import plotly.graph_objects as go
import pandas as pd

fig = go.Figure()

daily_data = data

# Gráfico de velas
fig.add_trace(go.Candlestick(
    x=daily_data.index,
    open=daily_data['Open'],
    high=daily_data['High'],
    low=daily_data['Low'],
    close=daily_data['Close'],
    name='Price'
))

# Bandas de Bollinger (solo líneas)
fig.add_trace(go.Scatter(
    x=daily_data.index, y=daily_data['bb_upper'],
    name='Upper Band', line=dict(color='rgba(173,216,230,0.75)', dash='dot')
))

fig.add_trace(go.Scatter(
    x=daily_data.index, y=daily_data['bb_mid'],
    name='Middle Band', line=dict(color='blue')
))

fig.add_trace(go.Scatter(
    x=daily_data.index, y=daily_data['bb_lower'],
    name='Lower Band', line=dict(color='rgba(173,216,230,0.75)', dash='dot')
))

# Configuración final
fig.update_layout(
    title='Bollinger Bands on Daily Price',
    xaxis_title='Date',
    yaxis_title='Price',
    template='plotly_white',
    autosize=False,
    width=1200,
    height=600,
    xaxis_rangeslider_visible=False
)

fig.show()

## Making predicitons

In [None]:
# Prepare data for Prophet
data.reset_index(inplace=True)
data = data[['Date', 'Close']].rename(columns={'Date': 'ds', 'Close': 'y'})

In [None]:
prediction_size = 30
train_df = data[:-prediction_size]
train_df.tail(n=3)

In [None]:
# Create and fit Prophet model
model = Prophet()
model.fit(train_df)

In [None]:
future = model.make_future_dataframe(periods=prediction_size)
future.tail(n=3)

In [None]:
forecast = model.predict(future)
forecast.tail(n=3)

In [None]:
model.plot(forecast)

In [None]:
model.plot_components(forecast)

Let's evaluate the quality of the algorithm by calculating the error metrics for the last 30 days that we predicted. For this, we will need the observations  𝑦𝑖 and the corresponding predicted values  𝑦̂𝑖.

Let's look into the object forecast that the library created for us:

In [None]:
print(', '.join(forecast.columns))

We can see that this dataframe contains all the information we need except for the historical values. We need to join the forecast object with the actual values y from the original dataset df. For this we will define a helper function that we will reuse later:

In [None]:
def make_comparison_dataframe(historical, forecast):
    """Join the history with the forecast.
    
       The resulting dataset will contain columns 'yhat', 'yhat_lower', 'yhat_upper' and 'y'.
    """
    return forecast.set_index('ds')[['yhat', 'yhat_lower', 'yhat_upper']].join(historical.set_index('ds'))

Let's apply this function to our last forecast:

In [None]:
cmp_df = make_comparison_dataframe(data, forecast)
cmp_df.dropna(subset=['y'], inplace=True)
cmp_df.tail(n=3)

We are also going to define a helper function that we will use to gauge the quality of our forecasting with MAPE and MAE error measures:

In [None]:
def calculate_forecast_errors(df, prediction_size):
    """Calculate MAPE and MAE of the forecast.
    
       Args:
           df: joined dataset with 'y' and 'yhat' columns.
           prediction_size: number of days at the end to predict.
    """
    
    # Make a copy
    df = df.copy()
    
    # Now we calculate the values of e_i and p_i according to the formulas given in the article above.
    df['e'] = df['y'] - df['yhat']
    df['p'] = 100 * df['e'] / df['y']
    
    # Recall that we held out the values of the last `prediction_size` days
    # in order to predict them and measure the quality of the model. 
    
    # Now cut out the part of the data which we made our prediction for.
    predicted_part = df[-prediction_size:]
    
    # Define the function that averages absolute error values over the predicted part.
    error_mean = lambda error_name: np.mean(np.abs(predicted_part[error_name]))
    
    # Now we can calculate MAPE and MAE and return the resulting dictionary of errors.
    return {'MAPE': error_mean('p'), 'MAE': error_mean('e')}

In [None]:
for err_name, err_value in calculate_forecast_errors(cmp_df, prediction_size).items():
    print(err_name, err_value)

As a result, the relative error of our forecast (MAPE) is about 19.55%, and on average our model is wrong by 62.72 USD (MAE).

## Visualization

Let's create our own visualization of the model built by Prophet. It will comprise the actual values, forecast and confidence intervals.

First, we will plot the data for a shorter period of time to make the data points easier to distinguish. Second, we will show the model performance only for the period that we predicted, that is the last 30 days. It seems that these two measures should give us a more legible plot.

Third, we will use Plotly to make our chart interactive, which is great for exploring.

We will define a custom helper function [show_forecast](https://plotly.com/python/) and call it (for more on how it works please refer to the comments in the code and the documentation):

In [None]:
def show_forecast(cmp_df, num_predictions, num_values, title):
    """Visualize the forecast."""
    
    def create_go(name, column, num, **kwargs):
        points = cmp_df.tail(num)
        args = dict(name=name, x=points.index, y=points[column], mode='lines')
        args.update(kwargs)
        return go.Scatter(**args)
    
    lower_bound = create_go('Lower Bound', 'yhat_lower', num_predictions,
                            line=dict(width=0),
                            marker=dict(color="gray"))
    upper_bound = create_go('Upper Bound', 'yhat_upper', num_predictions,
                            line=dict(width=0),
                            marker=dict(color="gray"),
                            fillcolor='rgba(68, 68, 68, 0.3)', 
                            fill='tonexty')
    forecast = create_go('Forecast', 'yhat', num_predictions,
                         line=dict(color='rgb(31, 119, 180)'))
    actual = create_go('Actual', 'y', num_values,
                       marker=dict(color="red"))
    
    # In this case the order of the series is important because of the filling
    data = [lower_bound, upper_bound, forecast, actual]

    layout = go.Layout(yaxis=dict(title='Tesla'), title=title, showlegend = False)
    fig = go.Figure(data=data, layout=layout)
    fig.show()

show_forecast(cmp_df, prediction_size, 100, 'New Closing Prices')