W O R K O U T

In [7]:
# Full pretrain + fine-tune pipeline

import os
import torch
import pandas as pd
import importlib
from torch.utils.data import DataLoader
from Agents.r1_es import (init_trading_agent, generate_labels,
                          RLPretrainDataset, pretrain_macro, pretrain_vol, pretrain_price,
                          unfreeze_all)

from training_module import run_training
from RL1 import load_rl_data, TradingEnv


# === User-configurable parameters ===
ticker = 'ES'
rl_folder = 'RL Data'
episodes_per_run = 1
checkpoint_dir = 'checkpoints'
batch_size = 32
pretrain_epochs = 100

# Ensure checkpoint directory exists
os.makedirs(checkpoint_dir, exist_ok=True)
ckpt_path = os.path.join(checkpoint_dir, f'{ticker.lower()}_agent.pth')

# === STEP 1: Load Data and Generate Labels ===
df = pd.read_csv(os.path.join(rl_folder, f"RL - {ticker}.csv"))
df = generate_labels(df)

feature_cols = ['Macro', 'Vol', 'Security', 'Security Proba', 'AVWAP', 'Base', 'R1', 'R2', 'S1', 'S2']
pretrain_feature_cols = feature_cols + ['Price']

# === STEP 2: Build Pretraining Datasets ===
macro_dataset = RLPretrainDataset(df, pretrain_feature_cols, label_type='direction')
vol_dataset   = RLPretrainDataset(df, pretrain_feature_cols, label_type='size')
price_dataset = RLPretrainDataset(df, pretrain_feature_cols, label_type='stop')

macro_loader = DataLoader(macro_dataset, batch_size=batch_size, shuffle=True)
vol_loader   = DataLoader(vol_dataset, batch_size=batch_size, shuffle=True)
price_loader = DataLoader(price_dataset, batch_size=batch_size, shuffle=True)

# === STEP 3: Initialize Agent ===
agent = init_trading_agent(input_size=len(pretrain_feature_cols))

# === STEP 4: Pretrain Each Branch ===
print("Starting Macro Pretraining...")
pretrain_macro(agent, macro_loader, epochs=pretrain_epochs)

print("Starting Vol Pretraining...")
pretrain_vol(agent, vol_loader, epochs=pretrain_epochs)

print("Starting Price Pretraining...")
pretrain_price(agent, price_loader, epochs=pretrain_epochs)

# === STEP 5: Unfreeze Agent for RL Fine-tuning ===
unfreeze_all(agent)

# === STEP 5.5: Rebuild agent for RL ===
agent_rl = init_trading_agent(input_size=len(feature_cols) + 2)  # (Macro...S2 + Price + position_size)

# Manually filter pretrained state_dict
pretrained_state = agent.state_dict()
filtered_state = {k: v for k, v in pretrained_state.items() if not k.startswith('shared_layer.0.')}

# Load only compatible parameters
agent_rl.load_state_dict(filtered_state, strict=False)

# Replace agent
agent = agent_rl


# === STEP 6: Load Checkpoint if available ===
if os.path.exists(ckpt_path):
    print(f"Loading checkpoint from {ckpt_path}")
    agent.load_state_dict(torch.load(ckpt_path))
else:
    print("No checkpoint found, starting from pretrained weights.")

# === STEP 7: Create TradingEnv ===
df_env = load_rl_data(rl_folder, ticker)
env = TradingEnv(df_env)

# === STEP 8: Run RL Training ===
results = run_training(env, agent, num_episodes=episodes_per_run)

# === STEP 9: Save updated checkpoint ===
torch.save(agent.state_dict(), ckpt_path)
print(f"Checkpoint saved to {ckpt_path}")

# === STEP 10: Save results ===
results.to_csv('results.csv')


Starting Macro Pretraining...
Macro Pretrain Epoch 1, Loss: 0.6770
Macro Pretrain Epoch 2, Loss: 0.6768
Macro Pretrain Epoch 3, Loss: 0.6773
Macro Pretrain Epoch 4, Loss: 0.6768
Macro Pretrain Epoch 5, Loss: 0.6770
Macro Pretrain Epoch 6, Loss: 0.6764
Macro Pretrain Epoch 7, Loss: 0.6766
Macro Pretrain Epoch 8, Loss: 0.6772
Macro Pretrain Epoch 9, Loss: 0.6772
Macro Pretrain Epoch 10, Loss: 0.6762
Macro Pretrain Epoch 11, Loss: 0.6769
Macro Pretrain Epoch 12, Loss: 0.6766
Macro Pretrain Epoch 13, Loss: 0.6766
Macro Pretrain Epoch 14, Loss: 0.6768
Macro Pretrain Epoch 15, Loss: 0.6772
Macro Pretrain Epoch 16, Loss: 0.6766
Macro Pretrain Epoch 17, Loss: 0.6765
Macro Pretrain Epoch 18, Loss: 0.6769
Macro Pretrain Epoch 19, Loss: 0.6764
Macro Pretrain Epoch 20, Loss: 0.6770
Macro Pretrain Epoch 21, Loss: 0.6766
Macro Pretrain Epoch 22, Loss: 0.6768
Macro Pretrain Epoch 23, Loss: 0.6772
Macro Pretrain Epoch 24, Loss: 0.6763
Macro Pretrain Epoch 25, Loss: 0.6768
Macro Pretrain Epoch 26, Loss