In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import hashlib
import json
from datetime import datetime, timedelta

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import layers, models

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

In [3]:
end_date = datetime.now()
start_date = end_date - timedelta(days=60)

print("Start date:", start_date)
print("End date:", end_date)

Start date: 2025-10-01 02:30:32.483749
End date: 2025-11-30 02:30:32.483749


In [5]:
gold = yf.download("GC=F", start=start_date, end=end_date, interval="30m", auto_adjust = False)[["Close"]].rename(columns={"Close": "Gold"})
silver = yf.download("SI=F", start=start_date, end=end_date, interval="30m", auto_adjust = False)[["Close"]].rename(columns={"Close": "Silver"})
cad = yf.download("CADUSD=X", start=start_date, end=end_date, interval="30m", auto_adjust = False)[["Close"]].rename(columns={"Close": "CAD"})

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [6]:
all_prices = gold.join([silver, cad], how="outer")
all_prices.to_csv("prices_with_null.csv")

In [7]:
# filling missing values
all_prices = all_prices.ffill().bfill()

In [8]:
def create_sequences(df, window=20):
    X, y = [], []
    values = df.values
    for i in range(len(df) - window):
        X.append(values[i:i+window]) # past window prices
        y.append(values[i+window]) # next timestep prices
    return np.array(X), np.array(y)

In [9]:
window = 20
X, y = create_sequences(all_prices, window)
X_flat = X.reshape(X.shape[0], -1)

X_train, X_test, y_train, y_test = train_test_split(
    X_flat, y, test_size=0.2, shuffle=False
)

rf = RandomForestRegressor(
    n_estimators=500,
    max_depth=20,
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

print("MAE per asset:")
print("Gold  :", mean_absolute_error(y_test[:,0], y_pred[:,0]))
print("Silver:", mean_absolute_error(y_test[:,1], y_pred[:,1]))
print("CAD   :", mean_absolute_error(y_test[:,2], y_pred[:,2]))

MAE per asset:
Gold  : 9.119622518104109
Silver: 0.8905631041503308
CAD   : 0.00204900661689261


In [10]:
def forecast_interval(model, df, start_date, end_date, window=20):
    # date range at 30min freq
    future_index = pd.date_range(start=start_date, end=end_date, freq="30min")
    
    history = df.values[-window:].copy()
    predictions = []

    for _ in future_index:
        X_input = history.reshape(1, -1)
        y_pred = model.predict(X_input)[0]
        predictions.append(y_pred)
        history = np.vstack([history[1:], y_pred])

    pred_df = pd.DataFrame(predictions, index=future_index, columns=["Gold", "Silver", "CAD"])
    return pred_df

In [11]:
future_start = "2025-11-27"
future_end   = "2025-11-28"

forecast_df = forecast_interval(rf, all_prices, future_start, future_end, window)
print(forecast_df)

                            Gold    Silver       CAD
2025-11-27 00:00:00  4238.054573  52.83838  0.713708
2025-11-27 00:30:00  4237.159568  52.78568  0.713715
2025-11-27 01:00:00  4234.438973  52.79580  0.713708
2025-11-27 01:30:00  4233.568168  52.81762  0.713726
2025-11-27 02:00:00  4234.048971  52.80894  0.713705
2025-11-27 02:30:00  4233.651378  52.79551  0.713711
2025-11-27 03:00:00  4233.971577  52.77721  0.713715
2025-11-27 03:30:00  4233.729575  52.78821  0.713717
2025-11-27 04:00:00  4233.712581  52.80401  0.713704
2025-11-27 04:30:00  4233.436978  52.84130  0.713731
2025-11-27 05:00:00  4233.260577  52.83359  0.713732
2025-11-27 05:30:00  4233.594183  52.83743  0.713715
2025-11-27 06:00:00  4235.195582  52.79704  0.713665
2025-11-27 06:30:00  4236.016180  52.82022  0.713653
2025-11-27 07:00:00  4236.706582  52.76928  0.713603
2025-11-27 07:30:00  4236.602785  52.70582  0.713576
2025-11-27 08:00:00  4237.982786  52.55268  0.713387
2025-11-27 08:30:00  4241.708571  52.49275  0.

In [12]:
all_prices = gold.join([silver, cad], how="outer")
all_prices = all_prices.ffill().bfill()

df = all_prices.copy()

for col in ["Gold", "Silver", "CAD"]:
    df[f"{col}_return"] = df[col].pct_change()
    df[f"{col}_ma_5"] = df[col].rolling(5).mean()
    df[f"{col}_ma_10"] = df[col].rolling(10).mean()
    df[f"{col}_std_5"] = df[col].rolling(5).std()
    df[f"{col}_momentum"] = df[col] - df[col].shift(5)

df = df.dropna()

target = df[["Gold", "Silver", "CAD"]]

features = df.drop(columns=["Gold", "Silver", "CAD"])

X_train, X_test, y_train, y_test = train_test_split(
    features, target, test_size=0.2, shuffle=False
)

rf = RandomForestRegressor(
    n_estimators=700,
    max_depth=25,
    random_state=42,
    n_jobs=-1
)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

print("Improved MAE:")
print("Gold  :", mean_absolute_error(y_test["Gold"], y_pred[:,0]))
print("Silver:", mean_absolute_error(y_test["Silver"], y_pred[:,1]))
print("CAD   :", mean_absolute_error(y_test["CAD"], y_pred[:,2]))

  features = df.drop(columns=["Gold", "Silver", "CAD"])


Improved MAE:
Gold  : 3.9775589646936567
Silver: 0.9728448411180056
CAD   : 0.002524902424368064
