To evaluate how well the ARIMA model performs across different countries and sectors using historical data (via train-test split) and record metrics like:

Mean Absolute Error (MAE)

Root Mean Squared Error (RMSE)

R² Score

In [2]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings("ignore")

# Load preprocessed data
data_path = "/content/drive/MyDrive/sectoral_inflation_forecasting/data/processed/sectoral_inflation_data_processed.csv"
df_processed = pd.read_csv(data_path)

# Initialize performance storage
evaluation_results = []

# Loop through countries and sectors
for country in df_processed['Country'].unique():
    for sector in df_processed['Sector'].unique():
        sub_df = df_processed[(df_processed['Country'] == country) & (df_processed['Sector'] == sector)].sort_values('Year')

        if len(sub_df) < 6:
            continue  # Skip if not enough data

        y = sub_df['Inflation_Rate'].values
        train_size = int(len(y) * 0.8)
        train, test = y[:train_size], y[train_size:]

        # Fit ARIMA
        try:
            model = ARIMA(train, order=(1, 1, 0))
            model_fit = model.fit()
            forecast = model_fit.forecast(steps=len(test))

            # Evaluation metrics
            mae = mean_absolute_error(test, forecast)
            rmse = np.sqrt(mean_squared_error(test, forecast))
            r2 = r2_score(test, forecast)

            evaluation_results.append({
                'Country': country,
                'Sector': sector,
                'MAE': mae,
                'RMSE': rmse,
                'R2_Score': r2
            })

            # Optional: Plot actual vs predicted
            plt.figure(figsize=(8, 4))
            plt.plot(range(len(test)), test, label='Actual')
            plt.plot(range(len(test)), forecast, label='Predicted', linestyle='--')
            plt.title(f"{country} - {sector} | ARIMA Forecast vs Actual")
            plt.xlabel("Time")
            plt.ylabel("Inflation Rate")
            plt.legend()
            plt.tight_layout()
            plt.savefig(f"/content/drive/MyDrive/sectoral_inflation_forecasting/outputs/plots/model_eval_{country}_{sector}.png")
            plt.close()

        except:
            continue


Export Evaluation Results

In [4]:
eval_df = pd.DataFrame(evaluation_results)
eval_path = "/content/drive/MyDrive/sectoral_inflation_forecasting/outputs/reports/arima_model_evaluation_metrics.csv"
eval_df.to_csv(eval_path, index=False)

# View a few top performers
eval_df.sort_values(by='RMSE').head()


Unnamed: 0,Country,Sector,MAE,RMSE,R2_Score
2,India,Transport,0.144412,0.180698,-0.359067
12,Denmark,Transport,0.242043,0.242462,-0.546032
10,Denmark,Food,0.28887,0.301186,-11.555435
7,Germany,Transport,0.264409,0.341524,-0.72542
1,India,Energy,0.309751,0.413468,-5.27938
