# FB Prophet Model - Unemployment Rate (%)

Training (1997-2020), Training (2021-2023)

# Packages & Data Loading

## Packages

In [1]:
# Import packages
import pandas as pd
import numpy as np
from prophet import Prophet

import gdown
import requests
from io import StringIO

from prophet.diagnostics import cross_validation, performance_metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error

from prophet.plot import plot_plotly, plot_components_plotly
import plotly.graph_objects as go

## Data Loading

In [None]:
# Google Drive file
file_id = '117pk3NKZPfYmHQyP452HeMV7_8Jq6_W0'

# Create the download URL
download_url = f'https://drive.google.com/uc?export=download&id={file_id}'

# Use requests to get file content
response = requests.get(download_url)
response.raise_for_status()  

# Load the CSV directly into df
csv_data = StringIO(response.text)
df = pd.read_csv(csv_data)

# Display data
print('Shape of the dataset:', df.shape)
print(df.dtypes)

In [None]:
# Create a copy with selected columns
df_var = df[['ref_date', 'value_unemployment']].copy()

print(df_var.head())

In [None]:
# Group df_var by 'ref_date' and calculate the median 'value_unemployment'
df_fb = df_var.groupby('ref_date').agg(
    median_unemployment=('value_unemployment', 'median')  # Calculate the median 
).reset_index()

print(df_fb)

In [None]:
# Display shape and data types 
print('Shape of the dataset:\n', df_var.shape)
print(df_fb.dtypes)

# Convert 'ref_date' to datetime
df_fb['ref_date'] = pd.to_datetime(df_fb['ref_date'])

# Set 'ref_date' as the index 
df_fb = df_fb.set_index('ref_date')

# Verify the index is set and DatetimeIndex
print('\n\nIndex after setting ref_date:\n', df_fb.head().index)
print('\n\nData types after setting index:\n', df_fb.dtypes)
print('\n\nDataset:\n', df_fb.head(2))

# FB Prophet Model

## Assign Training & Testing Sets

In [None]:
# Split into training and testing sets
train = df_fb['1997-01-01':'2020-12-01']
test = df_fb['2021-01-01':'2023-12-01']

print(train.head())
print(train.tail())

In [None]:
# Prepare data for Prophet
train = train.reset_index()  # Reset index to make 'ref_date' a column
train.rename(columns={'ref_date': 'ds', 'median_unemployment': 'y'}, inplace=True)

# Prepare data for Prophet
test = test.reset_index()  # Reset index to make 'ref_date' a column
test.rename(columns={'ref_date': 'ds', 'median_unemployment': 'y'}, inplace=True)

## Model Fitting

In [None]:
# Fit model
model = Prophet(yearly_seasonality=True)
model.fit(train)

## Cross Validation

In [None]:
# Cross validation
df_cv = cross_validation(model, initial='730 days', period='180 days', horizon = '365 days')
df_p = performance_metrics(df_cv)
df_p.head()

## Model Predictions

In [None]:
# Predictions
future = model.make_future_dataframe(periods=36, freq='ME')
future.tail()

In [None]:
# Prediction columns 
forecast = model.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

In [None]:
# Ensure alignment between forecast and test data
forecasted_values = forecast.tail(len(test)) 

In [None]:
# Merge actuals and predictions
evaluation = test.copy()
evaluation['yhat'] = forecasted_values['yhat']
evaluation['yhat_lower'] = forecasted_values['yhat_lower']
evaluation['yhat_upper'] = forecasted_values['yhat_upper']

# Calculate performance metrics
actuals = evaluation['y'].values
predictions = evaluation['yhat'].values

# Metrics calculation
mse = mean_squared_error(actuals, predictions)
rmse = np.sqrt(mse)
mae = mean_absolute_error(actuals, predictions)

# Handle zero or near-zero actuals for MAPE
epsilon = 1e-10
valid_indices = actuals > epsilon
if valid_indices.any():
    mape = np.mean(np.abs((actuals[valid_indices] - predictions[valid_indices]) / (actuals[valid_indices] + epsilon))) * 100
else:
    mape = np.nan

# Print final model evaluation statistics
print(f"Final Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Absolute Percentage Error (MAPE): {mape}")

# Visualizations

In [None]:
# Generate plot
fig = plot_plotly(model, forecast)

# Add labels and title
fig.update_layout(
    title="Prophet Forecast",                 
    xaxis_title="Date",                       
    yaxis_title="Median Unemployment",        
    legend_title="Legend",                   
    font=dict(size=12),                       
)

# Show plot
fig.show()

In [None]:
# Generate plot
fig = plot_plotly(model, forecast)

# Add test data 
fig.add_trace(
    go.Scatter(
        x=test['ds'],  # x-axis (test data dates)
        y=test['y'],  # y-axis (actual values)
        mode='markers+lines',
        name='True Values (Test Data)',
        line=dict(color='red', dash='dot'),  
        marker=dict(color='red')
    )
)

# Adjust x-axis range (2021-2023)
fig.update_layout(
    xaxis=dict(
        range=['2021-01-01', '2023-12-31'],  # Set x-axis range
        title='Date',  # Add x-axis title
    ),
    yaxis=dict(
        title='Median Unemployment Rate',  # Add y-axis title
    ),
    title='Prophet Forecast with Test Data (2021-2023)',  # Add plot title
)

# Show plot
fig.show()

In [None]:
# Generate plot
fig = plot_components_plotly(model, forecast)

# Add title and labels
fig.update_layout(
    title="Prophet Forecast",     
    font=dict(size=12),                      
)       

# Show plot
fig.show()