# Random Forest Regressor for Time Series Forecasting

In [25]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
import numpy as np

In [9]:
# Sample time series data (replace with your actual data)
data = pd.read_csv('data/AirPassengers.csv')

data.rename(columns={'Month': 'date', '#Passengers': 'value'}, inplace=True)

In [10]:
# Features Engineering

# Temporal features

def create_features(data, date):
    ### Create Date indicators
    data["Year"]         = pd.to_datetime(data[date]).dt.year
    data["Quarter"]      = pd.to_datetime(data[date]).dt.quarter
    data["Month"]        = pd.to_datetime(data[date]).dt.month
    data["Semester"]     = data["Month"].map(lambda x: 1 if x<=6 else 2)
    return data

In [14]:
df = create_features(data, 'date')
df

Unnamed: 0,date,value,Year,Quarter,Month,Semester
0,1949-01,112,1949,1,1,1
1,1949-02,118,1949,1,2,1
2,1949-03,132,1949,1,3,1
3,1949-04,129,1949,2,4,1
4,1949-05,121,1949,2,5,1
...,...,...,...,...,...,...
139,1960-08,606,1960,3,8,2
140,1960-09,508,1960,3,9,2
141,1960-10,461,1960,4,10,2
142,1960-11,390,1960,4,11,2


In [15]:
df = df.set_index('date')

In [16]:
# Create lagged features

lags = 3 # Number of previous time steps to use as features

for col in df.columns:
    
    for i in range(1, lags + 1):
        df[col + f'_lag_{i}'] = df[col].shift(i)
    
df = df.dropna()
df

Unnamed: 0_level_0,value,Year,Quarter,Month,Semester,value_lag_1,value_lag_2,value_lag_3,Year_lag_1,Year_lag_2,Year_lag_3,Quarter_lag_1,Quarter_lag_2,Quarter_lag_3,Month_lag_1,Month_lag_2,Month_lag_3,Semester_lag_1,Semester_lag_2,Semester_lag_3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1949-04,129,1949,2,4,1,132.0,118.0,112.0,1949.0,1949.0,1949.0,1.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0
1949-05,121,1949,2,5,1,129.0,132.0,118.0,1949.0,1949.0,1949.0,2.0,1.0,1.0,4.0,3.0,2.0,1.0,1.0,1.0
1949-06,135,1949,2,6,1,121.0,129.0,132.0,1949.0,1949.0,1949.0,2.0,2.0,1.0,5.0,4.0,3.0,1.0,1.0,1.0
1949-07,148,1949,3,7,2,135.0,121.0,129.0,1949.0,1949.0,1949.0,2.0,2.0,2.0,6.0,5.0,4.0,1.0,1.0,1.0
1949-08,148,1949,3,8,2,148.0,135.0,121.0,1949.0,1949.0,1949.0,3.0,2.0,2.0,7.0,6.0,5.0,2.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1960-08,606,1960,3,8,2,622.0,535.0,472.0,1960.0,1960.0,1960.0,3.0,2.0,2.0,7.0,6.0,5.0,2.0,1.0,1.0
1960-09,508,1960,3,9,2,606.0,622.0,535.0,1960.0,1960.0,1960.0,3.0,3.0,2.0,8.0,7.0,6.0,2.0,2.0,1.0
1960-10,461,1960,4,10,2,508.0,606.0,622.0,1960.0,1960.0,1960.0,3.0,3.0,3.0,9.0,8.0,7.0,2.0,2.0,2.0
1960-11,390,1960,4,11,2,461.0,508.0,606.0,1960.0,1960.0,1960.0,4.0,3.0,3.0,10.0,9.0,8.0,2.0,2.0,2.0


In [21]:
# Split data into training and testing sets
X = df[[col for col in df.columns if 'lag' in col]]
y = df['value']

split_date = "1959-01"

X_train = X.loc[X.index < split_date]
y_train = y.loc[y.index < split_date]
X_test = X.loc[X.index >= split_date]
y_test = y.loc[y.index >= split_date]


Unnamed: 0_level_0,value_lag_1,value_lag_2,value_lag_3,Year_lag_1,Year_lag_2,Year_lag_3,Quarter_lag_1,Quarter_lag_2,Quarter_lag_3,Month_lag_1,Month_lag_2,Month_lag_3,Semester_lag_1,Semester_lag_2,Semester_lag_3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1959-01,337.0,310.0,359.0,1958.0,1958.0,1958.0,4.0,4.0,4.0,12.0,11.0,10.0,2.0,2.0,2.0
1959-02,360.0,337.0,310.0,1959.0,1958.0,1958.0,1.0,4.0,4.0,1.0,12.0,11.0,1.0,2.0,2.0
1959-03,342.0,360.0,337.0,1959.0,1959.0,1958.0,1.0,1.0,4.0,2.0,1.0,12.0,1.0,1.0,2.0
1959-04,406.0,342.0,360.0,1959.0,1959.0,1959.0,1.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0
1959-05,396.0,406.0,342.0,1959.0,1959.0,1959.0,2.0,1.0,1.0,4.0,3.0,2.0,1.0,1.0,1.0
1959-06,420.0,396.0,406.0,1959.0,1959.0,1959.0,2.0,2.0,1.0,5.0,4.0,3.0,1.0,1.0,1.0
1959-07,472.0,420.0,396.0,1959.0,1959.0,1959.0,2.0,2.0,2.0,6.0,5.0,4.0,1.0,1.0,1.0
1959-08,548.0,472.0,420.0,1959.0,1959.0,1959.0,3.0,2.0,2.0,7.0,6.0,5.0,2.0,1.0,1.0
1959-09,559.0,548.0,472.0,1959.0,1959.0,1959.0,3.0,3.0,2.0,8.0,7.0,6.0,2.0,2.0,1.0
1959-10,463.0,559.0,548.0,1959.0,1959.0,1959.0,3.0,3.0,3.0,9.0,8.0,7.0,2.0,2.0,2.0


In [33]:
X_test

Unnamed: 0_level_0,value_lag_1,value_lag_2,value_lag_3,Year_lag_1,Year_lag_2,Year_lag_3,Quarter_lag_1,Quarter_lag_2,Quarter_lag_3,Month_lag_1,Month_lag_2,Month_lag_3,Semester_lag_1,Semester_lag_2,Semester_lag_3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1959-01,337.0,310.0,359.0,1958.0,1958.0,1958.0,4.0,4.0,4.0,12.0,11.0,10.0,2.0,2.0,2.0
1959-02,360.0,337.0,310.0,1959.0,1958.0,1958.0,1.0,4.0,4.0,1.0,12.0,11.0,1.0,2.0,2.0
1959-03,342.0,360.0,337.0,1959.0,1959.0,1958.0,1.0,1.0,4.0,2.0,1.0,12.0,1.0,1.0,2.0
1959-04,406.0,342.0,360.0,1959.0,1959.0,1959.0,1.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0
1959-05,396.0,406.0,342.0,1959.0,1959.0,1959.0,2.0,1.0,1.0,4.0,3.0,2.0,1.0,1.0,1.0
1959-06,420.0,396.0,406.0,1959.0,1959.0,1959.0,2.0,2.0,1.0,5.0,4.0,3.0,1.0,1.0,1.0
1959-07,472.0,420.0,396.0,1959.0,1959.0,1959.0,2.0,2.0,2.0,6.0,5.0,4.0,1.0,1.0,1.0
1959-08,548.0,472.0,420.0,1959.0,1959.0,1959.0,3.0,2.0,2.0,7.0,6.0,5.0,2.0,1.0,1.0
1959-09,559.0,548.0,472.0,1959.0,1959.0,1959.0,3.0,3.0,2.0,8.0,7.0,6.0,2.0,2.0,1.0
1959-10,463.0,559.0,548.0,1959.0,1959.0,1959.0,3.0,3.0,3.0,9.0,8.0,7.0,2.0,2.0,2.0


In [22]:
y_test

date
1959-01    360
1959-02    342
1959-03    406
1959-04    396
1959-05    420
1959-06    472
1959-07    548
1959-08    559
1959-09    463
1959-10    407
1959-11    362
1959-12    405
1960-01    417
1960-02    391
1960-03    419
1960-04    461
1960-05    472
1960-06    535
1960-07    622
1960-08    606
1960-09    508
1960-10    461
1960-11    390
1960-12    432
Name: value, dtype: int64

In [23]:
# Train the Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
predictions = model.predict(X_test)

In [28]:
# Evaluate the model
rmse = round(np.sqrt(mean_squared_error(y_test, predictions)), 3)
print(f'Root Mean Squared Error: {rmse}')

mae = round(mean_absolute_error(y_test, predictions), 3)
print(f'Mean Absolute Error: {mae}')

mape = round(mean_absolute_percentage_error(y_test, predictions), 3)
print(f'Mean Absolute Percentage Error: {mape}')

Root Mean Squared Error: 64.599
Mean Absolute Error: 49.001
Mean Absolute Percentage Error: 0.101


In [None]:
# Forecast future values
forecast_steps = 5
forecast_input = X.iloc[[-1]].values # Use the last available data as input
forecasts = []

for _ in range(forecast_steps):
    forecast = model.predict(forecast_input)[0]
    forecasts.append(forecast)
    
    forecast_input = np.roll(forecast_input, -1, axis=1) # Shift values to the left
    forecast_input[0, -1] = forecast # Replace the last value with the new forecast



In [32]:
print(f'Forecasted values for the next {forecast_steps} steps: {forecasts}')

Forecasted values for the next 5 steps: [352.81, 398.53, 377.1, 370.09, 359.46]


# Submission Instructions:

- Go to the **Assessments** tab in iCollege and click on **Assignments.** Submit your solution under the **Homework 3** category.

- Report your values for **rmse, mae, and mape** for your model performnace in the Homework submission field in iCollege.

- Report the list of the **predicted passenger counts** for the next 5 forecast steps.

- Attach this **executed (!)** Jupyter notebook and submit with your response above in iCollege.