In [1]:
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from statsmodels.tsa.arima.model import ARIMA

# Define a function to perform grid search for hyperparameter tuning
def grid_search_arima(df, p_values, d_values, q_values):
    best_aic = np.inf
    best_order = None
    best_model = None
    
    for p in p_values:
        for d in d_values:
            for q in q_values:
                try:
                    # Fit the ARIMA model
                    model = ARIMA(df['Close'], order=(p, d, q))
                    model_fit = model.fit()
                    
                    # Store the model if it has the lowest AIC
                    if model_fit.aic < best_aic:
                        best_aic = model_fit.aic
                        best_order = (p, d, q)
                        best_model = model_fit
                except Exception as e:
                    # Skip combinations that cause errors
                    continue
    
    return best_model, best_order, best_aic

# Define ranges for hyperparameters
p_values = range(0, 6)  # Autoregressive order
d_values = range(0, 3)  # Differencing order
q_values = range(0, 6)  # Moving Average order

# Perform grid search to find the best ARIMA model
best_model, best_order, best_aic = grid_search_arima(df, p_values, d_values, q_values)

# Print the best model parameters
print(f"Best ARIMA Order: {best_order}")
print(f"Best AIC: {best_aic}")

# Forecasting using the best model
forecast_steps = 30
forecast = best_model.forecast(steps=forecast_steps)

# Visualizing the forecast
plt.figure(figsize=(12, 6))
plt.plot(df['Close'], label='Historical Prices')
plt.plot(pd.date_range(df.index[-1], periods=forecast_steps+1, freq='B')[1:], forecast, label='Forecasted Prices', color='orange')
plt.title("Stock Price Forecast")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.show()

# Calculate forecast error
forecast_dates = pd.date_range(df.index[-1], periods=forecast_steps+1, freq='B')[1:]
y_true = df['Close'].iloc[-forecast_steps:]  # True values for comparison

# Calculate the Mean Absolute Error (MAE)
mae = mean_absolute_error(y_true, forecast)
# Calculate the Mean Squared Error (MSE)
mse = mean_squared_error(y_true, forecast)
# Calculate the Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")

NameError: name 'df' is not defined