<a href="https://colab.research.google.com/github/lvscious/Com-Aided-Case-Study/blob/main/ARIMA_MODEL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
install.packages("forecast")
install.packages("tseries")
install.packages("ggplot2")

In [None]:
library(forecast)
library(tseries)
library(ggplot2)

# Load the dataset
data <- read.csv("full_dataset_new.csv")

# Ensure Year and Quarter_num are treated as integers
data$Year <- as.integer(data$Year)
data$Quarter_num <- as.integer(data$Quarter_num)

# Define the GDP time series object
# Data starts in 2000, Quarter 1, and is quarterly (frequency=4)
gdp_ts <- ts(data$GDP, start = c(2000, 1), frequency = 4)

cat("\n--- Time Series Object Summary ---\n")
print(head(gdp_ts))

# Log Differencing (Approximates the Quarterly Growth Rate)
gdp_growth_rate <- diff(log(gdp_ts))

# Define the size of the test set (e.g., last 8 quarters = 2 years)
h <- 8
n <- length(gdp_growth_rate)
train_growth <- head(gdp_growth_rate, n - h)
test_growth <- tail(gdp_growth_rate, h)

# Keep the last observation of the original GDP to 'anchor' the forecasts
last_gdp_level <- tail(gdp_ts, h + 1)[1]

cat(paste("\nData Split: Training on", length(train_growth), "quarters. Testing on", h, "quarters.\n"))

# 3. Model Selection (on the Training Growth Rate Data)
# Use auto.arima() to find the best ARMA model (since the data is already differenced, d=0)
# We allow for seasonal terms (P, D, Q) for quarterly data.
cat("\n--- Running auto.arima for Model Selection ---\n")
arima_model <- auto.arima(train_growth,
                          D = 0, # Since we manually applied log differencing
                          trace = TRUE,
                          stepwise = FALSE,
                          approximation = FALSE)


# 4. View Model Result
cat("\n--- Final Selected ARIMA Model Summary ---\n")
print(arima_model)


# 5. Check Model Diagnostic (Residuals)
cat("\n--- Model Diagnostic Check: Ljung-Box Test for White Noise ---\n")

# Ljung-Box test checks if residuals (errors) are white noise (uncorrelated)
# A p-value > 0.05 suggests the model is adequate (residuals are white noise)
checkresiduals(arima_model) # This command also plots the ACF of the residuals


# 6. Generate Forecast (for Test Period and Future Period)
# Forecast the next 'h' periods (the test set)
forecast_test <- forecast(arima_model, h = h)

# Check the accuracy of the forecast against the test set
accuracy_result <- accuracy(forecast_test, test_growth)
cat("\n--- Forecast Accuracy (on Test Set) ---\n")
print(accuracy_result)


#Revert Log-Difference Forecasts to Original GDP Level

# Step 1: Add the last known log(GDP) to the cumulative sum of log-growth forecasts
log_forecast_levels <- last_gdp_level + cumsum(forecast_test$mean)

# Step 2: Exponentialiate to get back to the original GDP level scale
forecast_levels <- exp(log_forecast_levels)

# Forecast another 8 quarters into the future (beyond the test set)
future_h <- 8
future_forecast_growth <- forecast(arima_model, h = future_h)
# You would need to anchor this future forecast to the last actual GDP value from the whole dataset.
# For simplicity and presentation, we will focus the final plot on the train/test split.


# 7. Plot the Forecast
# Plot the forecast of the growth rate for diagnostic visualization
plot(forecast_test, main = "Forecast of Quarterly GDP Growth Rate (Test Period)")
lines(test_growth, col = "red")
legend("topright", legend = c("Forecast Mean", "Actual Growth Rate"), col = c("blue", "red"), lty = 1, cex = 0.8)


#Plotting the Actual GDP Levels (The Panelist-Friendly Plot)

# Reconstruct the forecasted levels for the test period
# Create a full time series object for the observed GDP up to the test period start
gdp_observed_train <- head(gdp_ts, length(gdp_ts) - h)

# Create a ts object for the actual GDP in the test period
gdp_observed_test <- tail(gdp_ts, h)

# Set up the plot area
plot(gdp_ts,
     main = "Forecasting Philippine Economic Recovery: GDP Levels",
     xlab = "Year",
     ylab = "GDP (Current Prices)",
     ylim = c(min(gdp_ts) * 0.95, max(gdp_ts) * 1.05),
     xlim = c(time(gdp_ts)[1], time(gdp_ts)[length(gdp_ts)] + h/4))

# Plot the training data
lines(gdp_observed_train, col = "blue", lwd = 2)

# Plot the actual test data
lines(gdp_observed_test, col = "red", lwd = 2)

# Plot the forecasted mean (transformed back to levels)
lines(ts(forecast_levels, start = time(gdp_observed_test)[1], frequency = 4),
      col = "green", lwd = 2)

# Add Legend
legend("topleft",
       legend = c("Training Data", "Actual Test Data", "ARIMA Forecast"),
       col = c("blue", "red", "green"),
       lty = 1, lwd = 2, cex = 0.8)

#Final Forecast Output

# Combine the results for easy CSV output
final_forecast_output <- data.frame(
  Date = time(ts(forecast_levels, start = time(gdp_observed_test)[1], frequency = 4)),
  Forecast_GDP_Level = as.numeric(forecast_levels)
)

