# Prophet: Forecasting Air Passenger Numbers
Remember the weekly time series that you formed in Milestone 1? Your objective is to train an ARIMA model using this newly formed time series to forecast weekly cancellations for the hotel.


## Goal: 
The deliverable for this project is a Jupyter notebook demonstrating a forecast of air passenger numbers using Prophet modeling.

## Workflow:
1. Use a train/test split of your choice to train the model.
2. Fit a Prophet model appropriate to the data.
3. Plot the forecast components of the model.
4. Calculate the root mean squared error of the predictions against the test set.

In [41]:
import pandas as pd
import plotly.graph_objects as go

from prophet import Prophet

from sklearn.metrics import root_mean_squared_error

In [42]:
# Load data

df = pd.read_csv("./data/british airways.csv")
df = df.rename(columns={"Date": "ds", "Adjusted Passenger Count": "y"})

df["ds"] = pd.to_datetime(df["ds"])
df = df.sort_values(by="ds")


df.head()

Unnamed: 0,ds,y
0,2005-07-01,21686
1,2005-08-01,20084
2,2005-09-01,20635
3,2005-10-01,18988
4,2005-11-01,15554


In [43]:
# Apply 0.9 train/test split

split_point = int(len(df) * 0.9)
print(f"Split point: {split_point}")

# Split the data
train = df.iloc[:split_point]
test = df.iloc[split_point:]

Split point: 116


In [44]:
# Fit model

m = Prophet()
m.fit(train)

20:54:43 - cmdstanpy - INFO - Chain [1] start processing


20:54:43 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x116bcc890>

In [45]:
# Predict

forecast = m.predict(test[["ds"]])
forecast = pd.merge(test, forecast, on="ds", how="left")

In [46]:
# Calculate and print RMSE

rmse = root_mean_squared_error(forecast["y"], forecast["yhat"])
print(f"RMSE: {rmse}")

RMSE: 2022.7924127733727


In [49]:
# Plot predictions and forecast components

fig = go.Figure()

# Add actual values
fig.add_trace(
    go.Scatter(
        x=forecast["ds"],
        y=forecast["y"],
        name="Actual",
        mode="lines",
        line=dict(color="blue"),
    )
)

# Add predicted values
fig.add_trace(
    go.Scatter(
        x=forecast["ds"],
        y=forecast["yhat"],
        name="Predicted",
        mode="lines",
        line=dict(color="red"),
    )
)

# Add confidence interval
fig.add_trace(
    go.Scatter(
        x=forecast["ds"],
        y=forecast["yhat_upper"],
        fill=None,
        mode="lines",
        line=dict(color="rgba(255,0,0,0.1)"),
        showlegend=False,
    )
)

fig.add_trace(
    go.Scatter(
        x=forecast["ds"],
        y=forecast["yhat_lower"],
        fill="tonexty",
        mode="lines",
        line=dict(color="rgba(255,0,0,0.1)"),
        name="Conf. bound",
    )
)

fig.update_layout(
    title=f"Passenger forecast, RMSE = {rmse:.2f}",
    xaxis_title="Date",
    yaxis_title="Number of passengers (#)",
    hovermode="x unified",
)

fig.show()