# FB Prophet Model - Wage Rate ($)

Training (1997-2020), Training (2021-2023)

# Packages & Data Loading

## Packages

In [1]:
# Import packages
import pandas as pd
from prophet import Prophet

import gdown
import requests
from io import StringIO

from prophet.plot import plot_plotly, plot_components_plotly
import plotly.graph_objects as go

## Data Loading

In [2]:
# Google Drive file
file_id = '117pk3NKZPfYmHQyP452HeMV7_8Jq6_W0'

# Create the download URL
download_url = f'https://drive.google.com/uc?export=download&id={file_id}'

# Use requests to get file content
response = requests.get(download_url)
response.raise_for_status()  

# Load the CSV directly into df
csv_data = StringIO(response.text)
df = pd.read_csv(csv_data)

# Display data
print('Shape of the dataset:', df.shape)
print(df.dtypes)

Shape of the dataset: (349920, 14)
ref_date                   object
geo                        object
labour_force               object
industry                   object
sex                        object
age_group                  object
value_unemployment        float64
wages                      object
value_wage                float64
year                        int64
month                       int64
ref_date_numeric          float64
value_unemployment_log    float64
value_wage_log            float64
dtype: object


In [3]:
# Create a copy with selected columns
df_var = df[['ref_date', 'value_wage']].copy()

print(df_var.head())

     ref_date  value_wage
0  1997-01-01        7.00
1  1997-01-01        8.00
2  1997-01-01        7.50
3  1997-01-01        7.00
4  1997-01-01        9.49


In [4]:
# Group df_var by 'ref_date' and calculate the median 'value_unemployment'
df_fb = df_var.groupby('ref_date').agg(
    median_unemployment=('value_wage', 'median')  # Calculate the median 
).reset_index()

print(df_fb)

       ref_date  median_unemployment
0    1997-01-01            12.375000
1    1997-02-01            12.695000
2    1997-03-01            12.511667
3    1997-04-01            12.585000
4    1997-05-01            12.000000
..          ...                  ...
319  2023-08-01            26.037639
320  2023-09-01            27.000000
321  2023-10-01            26.670000
322  2023-11-01            27.000000
323  2023-12-01            27.400000

[324 rows x 2 columns]


In [5]:
# Display shape and data types 
print('Shape of the dataset:\n', df_var.shape)
print(df_fb.dtypes)

# Convert 'ref_date' to datetime
df_fb['ref_date'] = pd.to_datetime(df_fb['ref_date'])

# Set 'ref_date' as the index 
df_fb = df_fb.set_index('ref_date')

# Verify the index is set and DatetimeIndex
print('\n\nIndex after setting ref_date:\n', df_fb.head().index)
print('\n\nData types after setting index:\n', df_fb.dtypes)
print('\n\nDataset:\n', df_fb.head(2))

Shape of the dataset:
 (349920, 2)
ref_date                object
median_unemployment    float64
dtype: object


Index after setting ref_date:
 DatetimeIndex(['1997-01-01', '1997-02-01', '1997-03-01', '1997-04-01',
               '1997-05-01'],
              dtype='datetime64[ns]', name='ref_date', freq=None)


Data types after setting index:
 median_unemployment    float64
dtype: object


Dataset:
             median_unemployment
ref_date                       
1997-01-01               12.375
1997-02-01               12.695


# FB Prophet Model

## Assign Training & Testing Sets

In [6]:
# Split into training and testing sets
train = df_fb['1997-01-01':'2020-12-01']
test = df_fb['2021-01-01':'2023-12-01']

print(train.head())
print(train.tail())

            median_unemployment
ref_date                       
1997-01-01            12.375000
1997-02-01            12.695000
1997-03-01            12.511667
1997-04-01            12.585000
1997-05-01            12.000000
            median_unemployment
ref_date                       
2020-08-01            23.463571
2020-09-01            24.927500
2020-10-01            24.897857
2020-11-01            24.281875
2020-12-01            24.040000


In [7]:
# Prepare data for Prophet
train = train.reset_index()  # Reset index to make 'ref_date' a column
train.rename(columns={'ref_date': 'ds', 'median_unemployment': 'y'}, inplace=True)

# Prepare data for Prophet
test = test.reset_index()  # Reset index to make 'ref_date' a column
test.rename(columns={'ref_date': 'ds', 'median_unemployment': 'y'}, inplace=True)

## Model Fitting

In [8]:
# Fit model
m = Prophet()
m.fit(train)

21:56:48 - cmdstanpy - INFO - Chain [1] start processing
21:56:49 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x21d03d73590>

# Model Predictions

In [9]:
# Predictions
future = m.make_future_dataframe(periods=36, freq='ME')
future.tail()

Unnamed: 0,ds
319,2023-07-31
320,2023-08-31
321,2023-09-30
322,2023-10-31
323,2023-11-30


In [10]:
# Prediction columns 
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
319,2023-07-31,24.797886,24.456063,25.159283
320,2023-08-31,25.831992,25.462332,26.211917
321,2023-09-30,26.216291,25.865952,26.52945
322,2023-10-31,26.468455,26.114201,26.816188
323,2023-11-30,26.58117,26.224602,26.953643


# Visualizations

In [13]:
plot_plotly(m, forecast)

In [14]:
# Generate the Prophet forecast plot
fig = plot_plotly(m, forecast)

# Add the test data to the plot
fig.add_trace(
    go.Scatter(
        x=test['ds'],  # x-axis (dates from test data)
        y=test['y'],  # y-axis (actual values)
        mode='markers+lines',
        name='True Values (Test Data)',
        line=dict(color='red', dash='dot'),  # Custom line style
        marker=dict(color='red')
    )
)

# Adjust x-axis range to focus on 2021-2023
fig.update_layout(
    xaxis=dict(
        range=['2021-01-01', '2023-12-31'],  # Set x-axis range
        title='Date',  # Add x-axis title
    ),
    yaxis=dict(
        title='Median Unemployment Rate',  # Add y-axis title
    ),
    title='Prophet Forecast with Test Data (2021-2023)',  # Add plot title
)

# Show the combined plot
fig.show()


In [15]:
plot_components_plotly(m, forecast)