In [None]:
import pandas as pd
import numpy as np
import torch
from transformer_time_series import train_model,GPTTimeSeries,build_loaders_from_returns,GPTTSConfig
import plotly.express as px

In [2]:
# -----------------------------
# Load and preprocess data
# -----------------------------
csv_path = r"D:\Quan\Quants\Neural Network\financial_attention\1h_data_20220101_20250601.csv"
closes = pd.read_csv(csv_path, index_col=0, parse_dates=True)[['SOL', 'ETH', 'BTC','ADA','XRP','LTC','TRX','LINK','DOT','DOGE']]
rets = np.log(closes).diff().dropna().values.astype(np.float32)  

# -----------------------------
# Config + model
# -----------------------------
target_col = 0  # Predicting SOL returns
feature_cols =  [i for i in range(closes.shape[1]) if i != target_col]  # Using all other assets as features

cfg = GPTTSConfig(
    block_size=32,
    n_layer=2,
    n_head=2,
    n_embd=16,
    d_input=len(feature_cols) + 1,  # +1 if you include lagged target
    dropout=0.0,
    learning_rate=3e-4,
    weight_decay=0.1,
    grad_clip=1.0,
    device="cuda" if torch.cuda.is_available() else "cpu"
)
model = GPTTimeSeries(cfg)

# -----------------------------
# Build loaders
# -----------------------------
B = 8
T = cfg.block_size
train_loader, val_loader = build_loaders_from_returns(
    rets, block_size=cfg.block_size, batch_size=B,
    target_col=target_col, feature_cols=feature_cols
)

In [3]:
closes

Unnamed: 0_level_0,SOL,ETH,BTC,ADA,XRP,LTC,TRX,LINK,DOT,DOGE
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-01-01 00:00:00+00:00,172.42,3723.33,46663.3,1.329982,0.83882,149.22,0.075930,20.02377,27.2700,0.171951
2022-01-01 01:00:00+00:00,172.83,3725.02,46793.7,1.328465,0.83911,148.31,0.075885,19.96571,27.2110,0.171860
2022-01-01 02:00:00+00:00,172.70,3727.54,46789.2,1.325466,0.84216,148.05,0.075852,19.88366,27.1850,0.171667
2022-01-01 03:00:00+00:00,172.90,3723.69,46844.0,1.325690,0.84295,147.88,0.075854,19.78830,27.1916,0.171317
2022-01-01 04:00:00+00:00,171.53,3708.72,46716.3,1.308206,0.83507,147.09,0.075703,19.56234,26.9299,0.170304
...,...,...,...,...,...,...,...,...,...,...
2025-05-31 20:00:00+00:00,157.50,2542.99,104800.0,0.690130,2.19501,87.68,0.266555,14.13086,4.1053,0.194531
2025-05-31 21:00:00+00:00,157.75,2542.98,104885.2,0.691366,2.19578,87.86,0.266146,14.15699,4.1034,0.194910
2025-05-31 22:00:00+00:00,157.01,2534.00,104721.7,0.685698,2.17952,87.39,0.265442,14.08654,4.0810,0.193299
2025-05-31 23:00:00+00:00,156.53,2529.11,104662.0,0.685873,2.17456,87.08,0.265768,13.96585,4.0779,0.192565


In [13]:
# -----------------------------
# Train
model, train_loss_history, val_loss_history, steps_history = train_model(
        model, train_loader, val_loader, cfg, max_steps=1000, warmup_steps=200
    )
total_learnable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total learnable parameters: {total_learnable_params:,}")

2025-10-02 16:29:59,316 | INFO | Using fused AdamW: True
2025-10-02 16:29:59,329 | INFO | step 0 | loss 2.391993 | lr 3.000e-04 | tok/s 25610.4
2025-10-02 16:29:59,943 | INFO | step 0 | VALIDATION loss 2.398009
2025-10-02 16:29:59,943 | INFO | New best model at step 0 (val loss 2.398009)
2025-10-02 16:30:00,020 | INFO | step 10 | loss 0.273899 | lr 3.000e-04 | tok/s 364.4
2025-10-02 16:30:00,096 | INFO | step 20 | loss 0.015277 | lr 3.000e-04 | tok/s 329.0
2025-10-02 16:30:00,159 | INFO | step 30 | loss 0.005454 | lr 3.000e-04 | tok/s 304.1
2025-10-02 16:30:00,221 | INFO | step 40 | loss 0.002140 | lr 3.000e-04 | tok/s 283.4
2025-10-02 16:30:00,291 | INFO | step 50 | loss 0.000831 | lr 3.000e-04 | tok/s 263.0
2025-10-02 16:30:00,346 | INFO | step 60 | loss 0.000343 | lr 3.000e-04 | tok/s 248.8
2025-10-02 16:30:00,401 | INFO | step 70 | loss 0.000877 | lr 3.000e-04 | tok/s 236.2
2025-10-02 16:30:00,464 | INFO | step 80 | loss 0.000173 | lr 3.000e-04 | tok/s 223.2
2025-10-02 16:30:00,527

Total learnable parameters: 205,825


In [14]:

device = torch.device(cfg.device)
preds = []
targets = []
with torch.no_grad():
    for x, y in val_loader:
        x = x.to(device)
        y = y.to(device)
        out, _ = model(x)
        # Take only the last step prediction per sequence
        preds.append(out[:, -1].cpu().numpy())
        targets.append(y[:, -1].cpu().numpy())
preds = np.concatenate(preds, axis=0).flatten()
targets = np.concatenate(targets, axis=0).flatten()

df_plot = pd.DataFrame({
    "Time Step": np.arange(len(targets)),
    "Actual Returns": targets,
    "Predicted Returns": preds
})
df_plot = df_plot.melt(id_vars="Time Step", value_vars=["Actual Returns", "Predicted Returns"],
                        var_name="Type", value_name="Return")

fig = px.line(df_plot, x="Time Step", y="Return", color="Type",
                title="Predicted vs Actual Returns (Validation Set)")
fig.show()






In [16]:
pd.DataFrame(train_loss_history, columns=['Train Loss'])

Unnamed: 0,Train Loss
0,2.391993
1,0.273899
2,0.015277
3,0.005454
4,0.002140
...,...
95,0.000108
96,0.000064
97,0.000094
98,0.000083


In [17]:
pd.DataFrame(val_loss_history)

Unnamed: 0,0,1
0,0,2.398009
1,100,0.000125
2,200,9.8e-05
3,300,8.8e-05
4,400,5.3e-05
5,500,5.8e-05
6,600,4.5e-05
7,700,5.6e-05
8,800,5.8e-05
9,900,4.1e-05


In [9]:
0.000046-0.000032


1.4000000000000001e-05

In [10]:

df_train = pd.DataFrame({
    "Step": steps_history,
    "Loss": train_loss_history,
    "Type": "Train"
})
if val_loss_history:
    val_steps, val_losses = zip(*val_loss_history)
    df_val = pd.DataFrame({
        "Step": val_steps,
        "Loss": val_losses,
        "Type": "Validation"
    })
    df_loss = pd.concat([df_train, df_val], ignore_index=True)
else:
    df_loss = df_train

fig = px.line(df_loss, x="Step", y="Loss", color="Type", title="Training and Validation Loss")
fig.show()



