In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
from statsmodels.tsa.arima.model import ARIMA
import pmdarima as pm  # For Auto-ARIMA

warnings.filterwarnings('ignore')

# Load the dataset
file_path = 'Ethereum Historical Data.csv'
df = pd.read_csv(file_path)
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
df.set_index('Date', inplace=True)
df = df.sort_index()
df['Price'] = df['Price'].str.replace(',', '').astype(float)
df['Price'] = df['Price'].interpolate(method='linear')

# Extract the 'Price' column
price_data = df['Price']

# Split the data into training and testing
train_size = int(len(price_data) * 0.8)
train_data, test_data = price_data[:train_size], price_data[train_size:]

# Auto-ARIMA to determine optimal order
model_auto = pm.auto_arima(train_data, start_p=1, start_q=1,
                           max_p=5, max_q=5, seasonal=False, trace=True,
                           error_action='ignore', suppress_warnings=True, stepwise=True)

# Display the chosen order
print(f"Optimal Order (p, d, q): {model_auto.order}")

# Train the ARIMA model with optimal order
model_arima = ARIMA(train_data, order=model_auto.order)
model_arima_fit = model_arima.fit()

# Forecasting
forecast = model_arima_fit.forecast(steps=len(test_data))

# Define function to evaluate metrics

def evaluate_metrics(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / np.maximum(y_true, 1e-10))) * 100
    r2 = r2_score(y_true, y_pred)
    return rmse, mse, mae, mape, r2

# Evaluate metrics for 3-day, 7-day, and 30-day predictions
for days in [3, 7, 30]:
    if days <= len(forecast):
        metrics = evaluate_metrics(test_data[:days], forecast[:days])
        print(f"\nEvaluation for {days}-day Prediction:")
        print(f"RMSE: {metrics[0]}")
        print(f"MSE: {metrics[1]}")
        print(f"MAE: {metrics[2]}")
        print(f"MAPE: {metrics[3]}%")
        print(f"R²: {metrics[4]}")
    else:
        print(f"\nSkipping evaluation for {days}-day Prediction as not enough data points are available.")

Performing stepwise search to minimize aic
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=17649.047, Time=0.14 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=17656.916, Time=0.03 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=17647.279, Time=0.08 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=17647.761, Time=0.10 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=17655.305, Time=0.03 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=17648.935, Time=0.19 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=17649.850, Time=0.94 sec
 ARIMA(1,1,0)(0,0,0)[0]             : AIC=17645.742, Time=0.03 sec
 ARIMA(2,1,0)(0,0,0)[0]             : AIC=17647.384, Time=0.04 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=17647.496, Time=0.10 sec
 ARIMA(0,1,1)(0,0,0)[0]             : AIC=17646.223, Time=0.06 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=17648.256, Time=0.35 sec

Best model:  ARIMA(1,1,0)(0,0,0)[0]          
Total fit time: 2.120 seconds
Optimal Order (p, d, q): (1, 1, 0)

Evaluation for 3-day Prediction:
RMSE: 2