In [None]:
import os
import pandas as pd
from glob import glob
import numpy as np
import random
from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [None]:
import pandas as pd
import numpy as np
from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Loop through each dataset in new_dict
for filename, df_selected in new_dict.items():
    print(f"Processing {filename}...")

    # Ensure datetime index
    df_selected.index = pd.to_datetime(df_selected.index)

    # Define rolling window parameters
    train_window = pd.Timedelta(days=60)  
    predict_horizon = pd.Timedelta(days=1) 

    # Initialize lists to store results
    arima_params = []
    aics = []
    bics = []
    errors = []
    mses = []
    predictions_list = []
    actual_values_list = []
    dates = []


    date_start = df_selected.index.min()
    date_end = df_selected.index.max()
    current_start = date_start
    iteration = 1

    while current_start + train_window + predict_horizon <= date_end:
        # Define training and prediction windows
        train_end = current_start + train_window - predict_horizon 
        predict_start = train_end + predict_horizon 

        print(f"Iteration {iteration}: Training from {current_start.date()} to {train_end.date()}, Predicting {predict_start.date()}")

        train_data = df_selected.loc[current_start:train_end]
        test_data = df_selected.loc[predict_start:predict_start]

        # Fit Auto ARIMA
        model = auto_arima(train_data,
                           seasonal=False,
                           d=None,
                           start_p=0, max_p=10,
                           start_q=0, max_q=10,
                           max_order= None,
                           max_d=2,
                           trace=True,
                           n_jobs=10,
                           error_action="ignore",
                           suppress_warnings=True,
                           stepwise=False)

        # Make predictions
        forecast = model.predict(n_periods=1)

        # Compute error metrics
        mae = mean_absolute_error(test_data, forecast)
        mse = mean_squared_error(test_data, forecast)

        # Store results
        arima_params.append(model.order)
        aics.append(model.aic())
        bics.append(model.bic())
        errors.append(mae)
        mses.append(mse)
        predictions_list.append(forecast[0])
        actual_values_list.append(test_data.iloc[0])
        dates.append(predict_start.date())

        # Move the window forward by one day
        current_start += predict_horizon
        iteration += 1

    # Convert results to DataFrame
    results_df = pd.DataFrame({
        "Date": dates,
        "ARIMA_Params": arima_params,
        "AIC": aics,
        "BIC": bics,
        "MAE": errors,
        "MSE": mses,
        "Forecast": predictions_list,
        "Actual Values": actual_values_list
    })

    # Save results to CSV
    csv_filename = f"{filename.replace('.csv', '')}_arima_injected_results.csv"
    results_df.to_csv(csv_filename, index=False)
    print(f"Results saved to {csv_filename}\n")

print("Processing complete for all datasets.")