# Ethereum Price Prediction with ARIMA, LSTM and Random Forest

This notebook downloads daily ETH-USD prices and compares three models:

- ARIMA (classical time series)
- LSTM (deep learning)
- Random Forest (tree-based machine learning)

You can run this end-to-end on Google Colab or a local Jupyter environment.


In [None]:
# If you run this on a fresh environment (e.g. Colab),
# uncomment the line below to install the required packages.

# !pip install yfinance statsmodels tensorflow scikit-learn


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler

import yfinance as yf
from statsmodels.tsa.arima.model import ARIMA

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping


In [None]:
def evaluate_regression(y_true, y_pred, name="model"):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    print(f"{name} -> RMSE: {rmse:.4f}, MAE: {mae:.4f}")
    return {"name": name, "rmse": rmse, "mae": mae}


In [None]:
symbol = "ETH-USD"
start_date = "2017-01-01"

eth_df = yf.download(symbol, start=start_date)
eth_df = eth_df[["Close"]].dropna()
print(eth_df.head())
print(eth_df.tail())


In [None]:
plt.figure(figsize=(10, 4))
plt.plot(eth_df["Close"])
plt.title("ETH-USD Closing Price")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.tight_layout()
plt.show()


In [None]:
train_size = int(len(eth_df) * 0.8)
train_arima = eth_df["Close"].iloc[:train_size]
test_arima = eth_df["Close"].iloc[train_size:]

print(f"Train points: {len(train_arima)}, Test points: {len(test_arima)}")


In [None]:
# === ARIMA model ===
p, d, q = 5, 1, 0  # you can tune these manually later

arima_model = ARIMA(train_arima, order=(p, d, q))
arima_result = arima_model.fit()

print(arima_result.summary())


In [None]:
arima_forecast = arima_result.forecast(steps=len(test_arima))
arima_forecast.index = test_arima.index  # align index

metrics_arima = evaluate_regression(test_arima.values, arima_forecast.values, name="ARIMA")

plt.figure(figsize=(10, 4))
plt.plot(train_arima.index, train_arima.values, label="Train")
plt.plot(test_arima.index, test_arima.values, label="Test")
plt.plot(arima_forecast.index, arima_forecast.values, label="ARIMA forecast")
plt.title("ARIMA - ETH-USD Close Price Forecast")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# === Shared preparation for LSTM and Random Forest ===

values = eth_df["Close"].values.reshape(-1, 1)

scaler = MinMaxScaler()
values_scaled = scaler.fit_transform(values)

def create_sequences(series, window_size=60):
    X, y = [], []
    for i in range(len(series) - window_size):
        X.append(series[i : i + window_size])
        y.append(series[i + window_size])
    return np.array(X), np.array(y)

window_size = 60
X_all, y_all = create_sequences(values_scaled, window_size=window_size)

print("X_all shape:", X_all.shape, "y_all shape:", y_all.shape)

# Train / test split on the sequences
train_size_seq = int(len(X_all) * 0.8)

X_train_seq = X_all[:train_size_seq]
y_train_seq = y_all[:train_size_seq]

X_test_seq = X_all[train_size_seq:]
y_test_seq = y_all[train_size_seq:]

print("Train sequences:", X_train_seq.shape, "Test sequences:", X_test_seq.shape)

# Date index for y values
dates_all = eth_df.index[window_size:]
train_dates_seq = dates_all[:train_size_seq]
test_dates_seq = dates_all[train_size_seq:]


In [None]:
# === LSTM model ===

X_train_lstm = X_train_seq
X_test_lstm = X_test_seq

model_lstm = Sequential([
    LSTM(50, input_shape=(window_size, 1)),
    Dense(1)
])

model_lstm.compile(optimizer="adam", loss="mse")

early_stop = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

history = model_lstm.fit(
    X_train_lstm,
    y_train_seq,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1,
)


In [None]:
lstm_pred_scaled = model_lstm.predict(X_test_lstm)
y_test_lstm_scaled = y_test_seq.reshape(-1, 1)

lstm_pred = scaler.inverse_transform(lstm_pred_scaled)
y_test_lstm = scaler.inverse_transform(y_test_lstm_scaled)

metrics_lstm = evaluate_regression(y_test_lstm.ravel(), lstm_pred.ravel(), name="LSTM")

plt.figure(figsize=(10, 4))
plt.plot(test_dates_seq, y_test_lstm.ravel(), label="True")
plt.plot(test_dates_seq, lstm_pred.ravel(), label="LSTM prediction")
plt.title("LSTM - ETH-USD Close Price Forecast")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# === Random Forest model ===

# Flatten the time dimension into features
X_train_rf = X_train_seq.reshape(X_train_seq.shape[0], -1)
X_test_rf = X_test_seq.reshape(X_test_seq.shape[0], -1)

rf = RandomForestRegressor(
    n_estimators=300,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train_rf, y_train_seq.ravel())

rf_pred_scaled = rf.predict(X_test_rf).reshape(-1, 1)
y_test_rf_scaled = y_test_seq.reshape(-1, 1)

rf_pred = scaler.inverse_transform(rf_pred_scaled)
y_test_rf = scaler.inverse_transform(y_test_rf_scaled)

metrics_rf = evaluate_regression(y_test_rf.ravel(), rf_pred.ravel(), name="Random Forest")

plt.figure(figsize=(10, 4))
plt.plot(test_dates_seq, y_test_rf.ravel(), label="True")
plt.plot(test_dates_seq, rf_pred.ravel(), label="RF prediction")
plt.title("Random Forest - ETH-USD Close Price Forecast")
plt.xlabel("Date")
plt.ylabel("Price (USD)")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
summary = pd.DataFrame([
    metrics_arima,
    metrics_lstm,
    metrics_rf
]).set_index("name")
summary
