## plan:
- learn the basics of gold, silver and CAD prices
- try a simple linear regression just to say that we tried it
- experiment with LSTMs
- account for inflation and other economic factors that may be relevant
- scrape news headlines and use them for sentiment analysis

In [None]:
# !pip install yfinance

In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import hashlib
import json
from datetime import datetime, timedelta

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import layers, models

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [3]:
end_date = datetime.now()
start_date = end_date - timedelta(days=60)

print("Start date:", start_date)
print("End date:", end_date)

Start date: 2025-10-01 00:52:17.334073
End date: 2025-11-30 00:52:17.334073


In [4]:
# my_end = "2025-11-20" # so I can use the days after that for testing
# end_date = datetime.strptime(my_end, "%Y-%m-%d")
# start_date = "2025-10-01"

gold = yf.download("GC=F", start = start_date, end = end_date, interval = "30m", auto_adjust = False)
silver = yf.download("SI=F", start = start_date, end = end_date, interval = "30m",auto_adjust = False)
cad = yf.download("CADUSD=X", start = start_date, end = end_date, interval = "30m", auto_adjust = False)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [5]:
gold = gold[["Close"]].rename(columns={"Close": "Gold"})
silver = silver[["Close"]].rename(columns={"Close": "Silver"})
cad = cad[["Close"]].rename(columns={"Close": "CAD"})

In [14]:
all_prices = gold.join([silver, cad], how="outer")
all_prices.to_csv("prices_with_null.csv")

In [15]:
all_prices = all_prices.ffill().bfill() # forward-fill + backward-fill to deal with missing values
all_prices.to_csv("prices.csv")

In [16]:
all_prices.head()

Price,Gold,Silver,CAD
Ticker,GC=F,SI=F,CADUSD=X
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2025-09-30 23:30:00+00:00,3892.600098,47.325001,0.7182
2025-10-01 00:00:00+00:00,3892.600098,47.325001,0.718066
2025-10-01 00:30:00+00:00,3892.600098,47.325001,0.718045
2025-10-01 01:00:00+00:00,3892.600098,47.325001,0.718174
2025-10-01 01:30:00+00:00,3892.600098,47.325001,0.718231


In [17]:
all_prices.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2053 entries, 2025-09-30 23:30:00+00:00 to 2025-11-28 23:00:00+00:00
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   (Gold, GC=F)     2053 non-null   float64
 1   (Silver, SI=F)   2053 non-null   float64
 2   (CAD, CADUSD=X)  2053 non-null   float64
dtypes: float64(3)
memory usage: 64.2 KB


In [18]:
all_prices.describe()

Price,Gold,Silver,CAD
Ticker,GC=F,SI=F,CADUSD=X
count,2053.0,2053.0,2053.0
mean,4083.217005,49.639613,0.713281
std,109.418956,2.08817,0.002607
min,3851.800049,45.665001,0.707269
25%,4004.0,47.970001,0.711805
50%,4072.100098,48.884998,0.713267
75%,4153.899902,51.029999,0.714944
max,4394.299805,57.080002,0.719746


In [19]:
# flatten the multi-index columns
all_prices.columns = [' '.join(col).strip() for col in all_prices.columns.values]

all_prices.head()

Unnamed: 0_level_0,Gold GC=F,Silver SI=F,CAD CADUSD=X
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-09-30 23:30:00+00:00,3892.600098,47.325001,0.7182
2025-10-01 00:00:00+00:00,3892.600098,47.325001,0.718066
2025-10-01 00:30:00+00:00,3892.600098,47.325001,0.718045
2025-10-01 01:00:00+00:00,3892.600098,47.325001,0.718174
2025-10-01 01:30:00+00:00,3892.600098,47.325001,0.718231


In [20]:
all_prices = all_prices.rename(columns={
    'Gold GC=F': 'Gold',
    'Silver SI=F': 'Silver',
    'CAD CADUSD=X': 'CAD'
})

In [21]:
all_prices.head()

Unnamed: 0_level_0,Gold,Silver,CAD
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-09-30 23:30:00+00:00,3892.600098,47.325001,0.7182
2025-10-01 00:00:00+00:00,3892.600098,47.325001,0.718066
2025-10-01 00:30:00+00:00,3892.600098,47.325001,0.718045
2025-10-01 01:00:00+00:00,3892.600098,47.325001,0.718174
2025-10-01 01:30:00+00:00,3892.600098,47.325001,0.718231


## feature engineering

## Transformer

In [16]:
import numpy as np

def rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred)**2))

def mae(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [12]:
def create_sequences(data, window):
    X = []
    y = []
    for i in range(len(data) - window):
        X.append(data[i : i + window])
        y.append(data[i + window])
    return np.array(X), np.array(y)

In [13]:
def baseline_predict(X):
    return X[:, -1]

In [14]:
def train_test_split(X, y, test_ratio=0.2):
    split = int(len(X) * (1 - test_ratio))
    return X[:split], X[split:], y[:split], y[split:]

In [15]:
def build_model(window, features=3):
    model = models.Sequential([
        layers.Input((window, features)),
        layers.LSTM(64, return_sequences=False),
        layers.Dense(32, activation='relu'),
        layers.Dense(features) 
    ])
    
    model.compile(
        optimizer='adam',
        loss='mse'
    )
    return model

In [17]:
values = all_prices.values 

scaler = MinMaxScaler()
scaled = scaler.fit_transform(values)

window_sizes = [10, 20, 30, 40, 60, 90, 120]

results = []


for window in window_sizes:
    print(f"\n Window = {window}")

    X, y = create_sequences(scaled, window)

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    model = build_model(window)
    
    model.fit(
        X_train, y_train,
        epochs=20, batch_size=32,
        validation_split=0.1,
        verbose=0
    )
    
    y_pred = model.predict(X_test)

    y_base = baseline_predict(X_test)

    y_test_inv = scaler.inverse_transform(y_test)
    y_pred_inv = scaler.inverse_transform(y_pred)
    y_base_inv = scaler.inverse_transform(y_base)

    metrics = {}

    for i, asset in enumerate(["Gold", "Silver", "CAD"]):
        rmse_m = rmse(y_test_inv[:, i], y_pred_inv[:, i])
        mae_m  = mae (y_test_inv[:, i], y_pred_inv[:, i])
        mape_m = mape(y_test_inv[:, i], y_pred_inv[:, i])

        rmse_b = rmse(y_test_inv[:, i], y_base_inv[:, i])
        mae_b  = mae (y_test_inv[:, i], y_base_inv[:, i])
        mape_b = mape(y_test_inv[:, i], y_base_inv[:, i])

        metrics[asset] = {
            "RMSE": rmse_m, "MAE": mae_m, "MAPE": mape_m,
            "Base_RMSE": rmse_b, "Base_MAE": mae_b, "Base_MAPE": mape_b,
        }

    results.append((window, metrics))

    for asset in metrics:
        print(f"{asset}:")
        print("  Model      → RMSE: {:.4f} | MAE: {:.4f} | MAPE: {:.2f}%".format(
            metrics[asset]["RMSE"], metrics[asset]["MAE"], metrics[asset]["MAPE"]))
        print("  Baseline   → RMSE: {:.4f} | MAE: {:.4f} | MAPE: {:.2f}%".format(
            metrics[asset]["Base_RMSE"], metrics[asset]["Base_MAE"], metrics[asset]["Base_MAPE"]))
    print()


=== Window = 10 ===


I0000 00:00:1764456049.347698  113632 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10065 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4080 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9
2025-11-30 00:40:50.482425: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91400


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Gold:
  Model      → RMSE: 11.1630 | MAE: 7.9792 | MAPE: 0.19%
  Baseline   → RMSE: 10.0473 | MAE: 5.7122 | MAPE: 0.14%
Silver:
  Model      → RMSE: 0.3952 | MAE: 0.2930 | MAPE: 0.57%
  Baseline   → RMSE: 0.2145 | MAE: 0.1182 | MAPE: 0.23%
CAD:
  Model      → RMSE: 0.0004 | MAE: 0.0003 | MAPE: 0.04%
  Baseline   → RMSE: 0.0003 | MAE: 0.0002 | MAPE: 0.03%


=== Window = 20 ===
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Gold:
  Model      → RMSE: 11.7884 | MAE: 8.4966 | MAPE: 0.21%
  Baseline   → RMSE: 10.0676 | MAE: 5.7219 | MAPE: 0.14%
Silver:
  Model      → RMSE: 0.4293 | MAE: 0.3004 | MAPE: 0.57%
  Baseline   → RMSE: 0.2150 | MAE: 0.1185 | MAPE: 0.23%
CAD:
  Model      → RMSE: 0.0005 | MAE: 0.0003 | MAPE: 0.04%
  Baseline   → RMSE: 0.0003 | MAE: 0.0002 | MAPE: 0.03%


=== Window = 30 ===
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
Gold:
  Model      →

In [22]:
def build_time_features(index: pd.DatetimeIndex) -> np.ndarray:
    """
    Build cyclical time features from a DatetimeIndex.
    Returns array of shape (len(index), 8)
    """
    # basic components
    hour = index.hour.values
    minute = index.minute.values
    dayofweek = index.dayofweek.values      # 0–6
    dayofyear = index.dayofyear.values      # 1–366
    
    # cyclical encodings
    hour_sin = np.sin(2 * np.pi * hour / 24)
    hour_cos = np.cos(2 * np.pi * hour / 24)

    minute_sin = np.sin(2 * np.pi * minute / 60)
    minute_cos = np.cos(2 * np.pi * minute / 60)

    dow_sin = np.sin(2 * np.pi * dayofweek / 7)
    dow_cos = np.cos(2 * np.pi * dayofweek / 7)

    doy_sin = np.sin(2 * np.pi * dayofyear / 365)
    doy_cos = np.cos(2 * np.pi * dayofyear / 365)
    
    time_feats = np.stack(
        [hour_sin, hour_cos,
         minute_sin, minute_cos,
         dow_sin, dow_cos,
         doy_sin, doy_cos],
        axis=1
    )
    return time_feats  # (N, 8)

In [23]:
from sklearn.preprocessing import MinMaxScaler

def make_seq2seq_data(all_prices, enc_len, dec_len):
    """
    all_prices: DataFrame with columns [Gold, Silver, CAD] and DateTimeIndex
    enc_len: encoder history length (in steps)
    dec_len: decoder forecast length (in steps)
    
    Returns:
        enc_inputs: (num_samples, enc_len, num_features)
        dec_inputs: (num_samples, dec_len, num_features)
        targets:    (num_samples, dec_len, 3)   # only prices
        scaler:     fitted MinMaxScaler for prices
    """
    values = all_prices[["Gold", "Silver", "CAD"]].values.astype("float32")
    time_feats = build_time_features(all_prices.index)  # (N, 8)

    scaler = MinMaxScaler()
    scaled_prices = scaler.fit_transform(values)        # (N, 3)

    N = len(all_prices)
    num_samples = N - enc_len - dec_len
    if num_samples <= 0:
        raise ValueError("Not enough data for given enc_len and dec_len.")

    enc_list, dec_list, tgt_list = [], [], []

    for i in range(num_samples):
        # indices
        enc_start = i
        enc_end   = i + enc_len
        dec_end   = enc_end + dec_len

        # slices
        past_prices = scaled_prices[enc_start:enc_end]          # (enc_len, 3)
        past_time   = time_feats[enc_start:enc_end]             # (enc_len, 8)

        future_prices = scaled_prices[enc_end:dec_end]          # (dec_len, 3)
        future_time   = time_feats[enc_end:dec_end]             # (dec_len, 8)

        # encoder input: past prices + time feats
        enc_in = np.concatenate([past_prices, past_time], axis=1)   # (enc_len, 11)

        # decoder input:
        # teacher forcing: input_t at step k = true price at k-1 (first one uses last encoder price)
        first_price = past_prices[-1:]          # shape (1, 3)
        shifted_prices = np.concatenate([first_price, future_prices[:-1]], axis=0)  # (dec_len, 3)
        dec_in = np.concatenate([shifted_prices, future_time], axis=1)              # (dec_len, 11)

        # target: future prices (not including time feats)
        tgt = future_prices   # (dec_len, 3)

        enc_list.append(enc_in)
        dec_list.append(dec_in)
        tgt_list.append(tgt)

    enc_inputs = np.stack(enc_list)  # (samples, enc_len, 11)
    dec_inputs = np.stack(dec_list)  # (samples, dec_len, 11)
    targets    = np.stack(tgt_list)  # (samples, dec_len, 3)

    return enc_inputs, dec_inputs, targets, scaler

In [24]:
import tensorflow as tf
from tensorflow.keras import layers, Model

def transformer_encoder(x, num_layers, d_model, num_heads, dff, dropout):
    for _ in range(num_layers):
        # Self-attention
        attn_out = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=d_model
        )(x, x)
        x = layers.Add()([x, attn_out])
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        
        # Feed-forward
        ffn = layers.Dense(dff, activation="relu")(x)
        ffn = layers.Dense(d_model)(ffn)
        x = layers.Add()([x, ffn])
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        
        x = layers.Dropout(dropout)(x)
    return x

def transformer_decoder(y, enc_output, num_layers, d_model, num_heads, dff, dropout):
    for _ in range(num_layers):
        # 1) masked self-attention on decoder
        self_attn = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=d_model
        )(y, y, use_causal_mask=True)
        y = layers.Add()([y, self_attn])
        y = layers.LayerNormalization(epsilon=1e-6)(y)

        # 2) cross-attention over encoder outputs
        cross_attn = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=d_model
        )(y, enc_output)
        y = layers.Add()([y, cross_attn])
        y = layers.LayerNormalization(epsilon=1e-6)(y)

        # 3) feed-forward
        ffn = layers.Dense(dff, activation="relu")(y)
        ffn = layers.Dense(d_model)(ffn)
        y = layers.Add()([y, ffn])
        y = layers.LayerNormalization(epsilon=1e-6)(y)

        y = layers.Dropout(dropout)(y)
    return y

def build_transformer_seq2seq(
    enc_len,
    dec_len,
    num_features,  # prices + time feats = 11
    d_model=64,
    num_heads=4,
    dff=128,
    num_layers=2,
    dropout=0.1,
):
    # encoder input
    enc_inputs = layers.Input(shape=(enc_len, num_features), name="encoder_inputs")
    # decoder input
    dec_inputs = layers.Input(shape=(dec_len, num_features), name="decoder_inputs")

    # project input features to d_model
    enc_proj = layers.Dense(d_model)(enc_inputs)
    dec_proj = layers.Dense(d_model)(dec_inputs)

    # encoder
    enc_output = transformer_encoder(enc_proj, num_layers, d_model, num_heads, dff, dropout)

    # decoder
    dec_output = transformer_decoder(dec_proj, enc_output, num_layers, d_model, num_heads, dff, dropout)

    # final prediction: 3 outputs (Gold, Silver, CAD) per decoder timestep
    outputs = layers.Dense(3, name="prices")(dec_output)

    model = Model([enc_inputs, dec_inputs], outputs, name="transformer_seq2seq")
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
        loss="mse"
    )
    return model

In [25]:
# choose lengths
ENC_LEN = 60   # past 60 steps = 30 hours
DEC_LEN = 1   # forecast next 48 steps = 24 hours (for example)
# If you really want 30min horizon only, set DEC_LEN = 1

enc_in, dec_in, targets, scaler = make_seq2seq_data(all_prices, ENC_LEN, DEC_LEN)

# train/test split (time-based)
split = int(len(enc_in) * 0.8)
X_enc_train, X_enc_test = enc_in[:split], enc_in[split:]
X_dec_train, X_dec_test = dec_in[:split], dec_in[split:]
y_train, y_test         = targets[:split], targets[split:]

num_features = enc_in.shape[-1]  # should be 11

model = build_transformer_seq2seq(
    enc_len=ENC_LEN,
    dec_len=DEC_LEN,
    num_features=num_features
)

history = model.fit(
    [X_enc_train, X_dec_train], y_train,
    validation_data=([X_enc_test, X_dec_test], y_test),
    epochs=50,
    batch_size=32,
    verbose=1
)

I0000 00:00:1764457069.523672  134221 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10065 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4080 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


Epoch 1/50


2025-11-30 00:57:54.033205: I external/local_xla/xla/service/service.cc:163] XLA service 0x73d77800fc20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2025-11-30 00:57:54.033222: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce RTX 4080 Laptop GPU, Compute Capability 8.9
2025-11-30 00:57:54.175797: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-11-30 00:57:55.023266: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91400
2025-11-30 00:57:55.463003: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-30 00:57:55.

[1m29/49[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 6ms/step - loss: 1.0837 

I0000 00:00:1764457084.744403  134517 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m46/49[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 6ms/step - loss: 0.8398

2025-11-30 00:58:06.129986: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-30 00:58:06.130069: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-30 00:58:06.130083: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-30 00:58:06.130138: I external/l








[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step - loss: 0.8105

2025-11-30 00:58:18.508894: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-30 00:58:18.508943: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-30 00:58:18.508965: I external/local_xla/xla/service/gpu/autotuning/dot_search_space.cc:208] All configs were filtered out because none of them sufficiently match the hints. Maybe the hints set does not contain a good representative set of valid configs? Working around this by using the full hints set instead.
2025-11-30 00:58:18.509008: I external/l

[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 360ms/step - loss: 0.3582 - val_loss: 0.0204
Epoch 2/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0952 - val_loss: 0.0095
Epoch 3/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0472 - val_loss: 0.0109
Epoch 4/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0239 - val_loss: 0.0079
Epoch 5/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0146 - val_loss: 0.0071
Epoch 6/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0105 - val_loss: 0.0057
Epoch 7/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0080 - val_loss: 0.0050
Epoch 8/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0067 - val_loss: 0.0045
Epoch 9/50
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [26]:
y_pred = model.predict([X_enc_test, X_dec_test])

# reshape to 2D for inverse scaling: (samples*DEC_LEN, 3)
y_test_2d = y_test.reshape(-1, 3)
y_pred_2d = y_pred.reshape(-1, 3)

y_test_inv = scaler.inverse_transform(y_test_2d)
y_pred_inv = scaler.inverse_transform(y_pred_2d)

# you can now compute RMSE/MAE/MAPE per asset over all forecast steps
def rmse(y_true, y_pred):
    return np.sqrt(np.mean((y_true - y_pred) ** 2))

def mae(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def mape(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

for i, asset in enumerate(["Gold", "Silver", "CAD"]):
    r = rmse(y_test_inv[:, i], y_pred_inv[:, i])
    a = mae(y_test_inv[:, i], y_pred_inv[:, i])
    m = mape(y_test_inv[:, i], y_pred_inv[:, i])
    print(f"{asset} → RMSE: {r:.4f} | MAE: {a:.4f} | MAPE: {m:.2f}%")

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 162ms/step
Gold → RMSE: 14.2299 | MAE: 10.7272 | MAPE: 0.26%
Silver → RMSE: 0.5359 | MAE: 0.4050 | MAPE: 0.79%
CAD → RMSE: 0.0005 | MAE: 0.0004 | MAPE: 0.05%


In [36]:
# The test target windows correspond to these rows:
test_start_idx = split  # split from your earlier train/test split
test_timestamps = all_prices.index[test_start_idx + ENC_LEN : 
                                   test_start_idx + ENC_LEN + len(y_test_inv)]

df_actual_test = pd.DataFrame(
    y_test_inv,
    index=test_timestamps,
    columns=["Gold", "Silver", "CAD"]
)

df_pred_test = pd.DataFrame(
    y_pred_inv,
    index=test_timestamps,
    columns=["Gold", "Silver", "CAD"]
)

ValueError: Shape of passed values is (18672, 3), indices imply (437, 3)

In [28]:
import pandas as pd
import numpy as np

def forecast_interval(model, all_prices, scaler, ENC_LEN, start_date, end_date):
    """
    Autoregressive forecasting with a trained Transformer encoder–decoder.
    
    model      : trained Keras model
    all_prices : DataFrame with flat columns ['Gold','Silver','CAD'] and DatetimeIndex
    scaler     : fitted MinMaxScaler (for 3 price columns)
    ENC_LEN    : encoder window length (same used in training)
    start_date : str or pd.Timestamp
    end_date   : str or pd.Timestamp
    """

    start_date = pd.to_datetime(start_date)
    end_date   = pd.to_datetime(end_date)

    # ------------------------------------------------------------------
    # 1. Compute prediction horizon (# of 30-min steps)
    # ------------------------------------------------------------------
    future_index = pd.date_range(start=start_date, end=end_date, freq="30min")
    DEC_LEN = len(future_index)

    if DEC_LEN <= 0:
        raise ValueError("Invalid date interval: DEC_LEN = 0")

    # ------------------------------------------------------------------
    # 2. Get the encoder past window before start_date
    # ------------------------------------------------------------------
    if start_date not in all_prices.index:
        raise ValueError("start_date is outside the data index")

    # The last ENC_LEN rows before start_date
    enc_start = all_prices.index.get_loc(start_date) - ENC_LEN
    if enc_start < 0:
        raise ValueError("Not enough history before start_date")

    past_prices = all_prices.iloc[enc_start:enc_start+ENC_LEN]
    
    # Scale prices
    past_scaled = scaler.transform(past_prices[['Gold','Silver','CAD']].values)

    # Build time features
    past_time_features = build_time_features(past_prices.index)

    # Encoder input: concat prices + time features
    enc_input = np.concatenate([past_scaled, past_time_features], axis=1)  # (ENC_LEN, 11)
    enc_input = np.expand_dims(enc_input, axis=0)  # (1, ENC_LEN, 11)

    # ------------------------------------------------------------------
    # 3. Prepare autoregressive decoder loop
    # ------------------------------------------------------------------

    # Build the future time embeddings (we know timestamps)
    future_time_features = build_time_features(future_index)  # (DEC_LEN, 8)

    # Initialize decoder input
    # At step 0: price = last encoder price
    last_price = past_scaled[-1]  # shape (3,)
    
    dec_input_prices = []
    dec_outputs = []

    prev_price = last_price.copy()

    # ------------------------------------------------------------------
    # 4. Autoregressive decoding
    # ------------------------------------------------------------------
    for step in range(DEC_LEN):

        # Decoder input at current step:
        #   prev_price + future_time_features[step]
        dec_in_step = np.concatenate([prev_price, future_time_features[step]], axis=0)
        dec_input_prices.append(dec_in_step)

        # Convert accumulated decoder steps to array (1, step+1, 11)
        dec_input_arr = np.array(dec_input_prices).reshape(1, -1, 11)

        # FIX: pad decoder input to full DEC_LEN with zeros (model expects fixed length)
        pad_len = DEC_LEN - dec_input_arr.shape[1]
        if pad_len > 0:
            dec_input_arr = np.concatenate(
                [dec_input_arr, np.zeros((1, pad_len, 11))],
                axis=1
            )

        # Predict full horizon, take only the first (step) prediction
        pred = model.predict([enc_input, dec_input_arr], verbose=0)[0][step]

        # Save prediction
        dec_outputs.append(pred)

        # Update prev_price for next step
        prev_price = pred.copy()

    # ------------------------------------------------------------------
    # 5. Inverse scale predictions
    # ------------------------------------------------------------------
    preds_scaled = np.array(dec_outputs)  # (DEC_LEN, 3)
    preds = scaler.inverse_transform(preds_scaled)

    # ------------------------------------------------------------------
    # 6. Build output DataFrame
    # ------------------------------------------------------------------
    df_pred = pd.DataFrame(
        preds,
        index=future_index,
        columns=['Gold', 'Silver', 'CAD']
    )

    return df_pred

In [33]:
df_pred = forecast_interval(
    model,
    all_prices,
    scaler,
    ENC_LEN=60,
    start_date="2025-11-28 00:00:00",
    end_date="2025-11-30 23:30:00"
)

print(df_pred.head())

ValueError: start_date is outside the data index

In [34]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def plot_forecast_with_baseline(
    actual_df,     # true prices (DataFrame with DateTimeIndex)
    predicted_df,  # predicted prices (same index)
    asset="Gold"   # column name
):
    """
    Plots actual vs predicted vs naive baseline for a given asset.
    actual_df    : DataFrame with actual prices, indexed by Datetime
    predicted_df : DataFrame with predicted prices (same index)
    asset        : One of 'Gold', 'Silver', 'CAD'
    """

    # -----------------------------------------------------------
    # 1. Extract series
    # -----------------------------------------------------------
    actual = actual_df[asset]
    predicted = predicted_df[asset]

    # -----------------------------------------------------------
    # 2. Baseline (persistence model)
    # baseline[t] = actual[t-1]
    # First baseline value = NaN, so shift actual by 1
    # -----------------------------------------------------------
    baseline = actual.shift(1)

    # -----------------------------------------------------------
    # 3. Plot
    # -----------------------------------------------------------
    plt.figure(figsize=(14, 6))

    plt.plot(actual.index, actual.values, 
             color="blue", label="Actual Price", linewidth=2)

    plt.plot(predicted.index, predicted.values, 
             color="orange", label="Predicted Price (Transformer)", linewidth=2)

    plt.plot(baseline.index, baseline.values, 
             color="magenta", label="Baseline (Persistence)", linewidth=2, linestyle="--")

    # -----------------------------------------------------------
    # 4. Labels & formatting
    # -----------------------------------------------------------
    plt.title(f"{asset} Price – Actual vs Predicted vs Baseline", fontsize=16)
    plt.xlabel("Time and Date", fontsize=14)
    plt.ylabel("Price in US Dollars", fontsize=14)

    plt.grid(True, alpha=0.3)
    plt.legend(fontsize=12)
    plt.tight_layout()
    plt.show()

In [35]:
# Suppose you predicted Gold for a specific 30-min interval:

plot_forecast_with_baseline(
    actual_df = all_prices.loc[df_pred.index],
    predicted_df = df_pred,
    asset="Gold"
)

NameError: name 'df_pred' is not defined