In [None]:
import pandas as pd
import numpy as np
import torch
from transformer_time_series import train_model,GPTTimeSeries,build_loaders_from_returns,GPTTSConfig
import plotly.express as px

In [22]:
# -----------------------------
# Load and preprocess data
# -----------------------------
csv_path = r"D:\Quan\Quants\Neural Network\financial_attention\1h_data_20220101_20250601.csv"
closes = pd.read_csv(csv_path, index_col=0, parse_dates=True)[['SOL', 'ETH', 'BTC','ADA','XRP','LTC','TRX','LINK','DOT','DOGE']]
rets = closes.values.astype(np.float32)  

# -----------------------------
# Config + model
# -----------------------------
target_col = 0  # Predicting SOL returns
feature_cols =  [i for i in range(closes.shape[1]) if i != target_col]  # Using all other assets as features

cfg = GPTTSConfig(
    block_size=32,
    n_layer=2,
    n_head=2,
    n_embd=16,
    d_input=len(feature_cols) + 1,  # +1 if you include lagged target
    dropout=0.0,
    learning_rate=3e-4,
    weight_decay=0.1,
    grad_clip=1.0,
    device="cuda" if torch.cuda.is_available() else "cpu"
)
model = GPTTimeSeries(cfg)

# -----------------------------
# Build loaders
# -----------------------------
B = 8
T = cfg.block_size
train_loader, val_loader = build_loaders_from_returns(
    rets, block_size=cfg.block_size, batch_size=B,
    target_col=target_col, feature_cols=feature_cols
)

In [23]:
rets

array([[1.724200e+02, 3.723330e+03, 4.666330e+04, ..., 2.002377e+01,
        2.727000e+01, 1.719513e-01],
       [1.728300e+02, 3.725020e+03, 4.679370e+04, ..., 1.996571e+01,
        2.721100e+01, 1.718596e-01],
       [1.727000e+02, 3.727540e+03, 4.678920e+04, ..., 1.988366e+01,
        2.718500e+01, 1.716666e-01],
       ...,
       [1.570100e+02, 2.534000e+03, 1.047217e+05, ..., 1.408654e+01,
        4.081000e+00, 1.932989e-01],
       [1.565300e+02, 2.529110e+03, 1.046620e+05, ..., 1.396585e+01,
        4.077900e+00, 1.925647e-01],
       [1.562600e+02, 2.520990e+03, 1.044750e+05, ..., 1.393207e+01,
        4.068200e+00, 1.914600e-01]], dtype=float32)

In [20]:
# -----------------------------
# Train
model, train_loss_history, val_loss_history, steps_history = train_model(
        model, train_loader, val_loader, cfg, max_steps=1000, warmup_steps=200
    )
total_learnable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total learnable parameters: {total_learnable_params:,}")

2025-10-01 14:54:45,086 | INFO | Using fused AdamW: True
2025-10-01 14:54:45,100 | INFO | step 0 | loss 0.978688 | lr 3.000e-04 | tok/s 18621.0
2025-10-01 14:54:46,089 | INFO | step 0 | VALIDATION loss 0.981742
2025-10-01 14:54:46,090 | INFO | New best model at step 0 (val loss 0.981742)
2025-10-01 14:54:46,175 | INFO | step 10 | loss 0.063137 | lr 3.000e-04 | tok/s 235.1
2025-10-01 14:54:46,265 | INFO | step 20 | loss 0.010692 | lr 3.000e-04 | tok/s 217.2
2025-10-01 14:54:46,341 | INFO | step 30 | loss 0.002934 | lr 3.000e-04 | tok/s 204.0
2025-10-01 14:54:46,427 | INFO | step 40 | loss 0.001306 | lr 3.000e-04 | tok/s 191.0
2025-10-01 14:54:46,516 | INFO | step 50 | loss 0.000333 | lr 3.000e-04 | tok/s 179.1
2025-10-01 14:54:46,583 | INFO | step 60 | loss 0.000254 | lr 3.000e-04 | tok/s 171.0
2025-10-01 14:54:46,673 | INFO | step 70 | loss 0.000140 | lr 3.000e-04 | tok/s 161.3
2025-10-01 14:54:46,762 | INFO | step 80 | loss 0.000204 | lr 3.000e-04 | tok/s 152.8
2025-10-01 14:54:46,848

Total learnable parameters: 207,809


In [17]:

device = torch.device(cfg.device)
preds = []
targets = []
with torch.no_grad():
    for x, y in val_loader:
        x = x.to(device)
        y = y.to(device)
        out, _ = model(x)
        # Take only the last step prediction per sequence
        preds.append(out[:, -1].cpu().numpy())
        targets.append(y[:, -1].cpu().numpy())
preds = np.concatenate(preds, axis=0).flatten()
targets = np.concatenate(targets, axis=0).flatten()

df_plot = pd.DataFrame({
    "Time Step": np.arange(len(targets)),
    "Actual Returns": targets,
    "Predicted Returns": preds
})
df_plot = df_plot.melt(id_vars="Time Step", value_vars=["Actual Returns", "Predicted Returns"],
                        var_name="Type", value_name="Return")

fig = px.line(df_plot, x="Time Step", y="Return", color="Type",
                title="Predicted vs Actual Returns (Validation Set)")
fig.show()






In [10]:
targets

array([ 0.00377243, -0.00663316,  0.00903832, ..., -0.004702  ,
       -0.00306181, -0.0017264 ], dtype=float32)

In [5]:
pd.DataFrame(train_loss_history, columns=['Train Loss'])

Unnamed: 0,Train Loss
0,1.233902
1,0.086801
2,0.008269
3,0.003982
4,0.001928
...,...
95,0.000049
96,0.000081
97,0.000267
98,0.000081


In [7]:
pd.DataFrame(val_loss_history)

Unnamed: 0,0,1
0,0,1.240906
1,100,0.000152
2,200,0.000109
3,300,0.000118
4,400,7.8e-05
5,500,8.6e-05
6,600,9.4e-05
7,700,4.8e-05
8,800,7e-05
9,900,6.8e-05


In [8]:
0.000046-0.000032


1.4000000000000001e-05

In [9]:

df_train = pd.DataFrame({
    "Step": steps_history,
    "Loss": train_loss_history,
    "Type": "Train"
})
if val_loss_history:
    val_steps, val_losses = zip(*val_loss_history)
    df_val = pd.DataFrame({
        "Step": val_steps,
        "Loss": val_losses,
        "Type": "Validation"
    })
    df_loss = pd.concat([df_train, df_val], ignore_index=True)
else:
    df_loss = df_train

fig = px.line(df_loss, x="Step", y="Loss", color="Type", title="Training and Validation Loss")
fig.show()



