<a href="https://colab.research.google.com/github/sh20022002/probability_of_change/blob/main/trader_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import StandardScaler
import pickle
import os
import random
import matplotlib.pyplot as plt
import pandas_ta as ta

from torchrl.envs import EnvBase
from tensordict import TensorDict
from torchrl.envs.utils import step_mdp
from torch.distributions import Categorical
from tensordict import TensorDict
from torchrl.collectors import SyncDataCollector
from torchrl.data import ReplayBuffer, LazyTensorStorage
from torchrl.objectives import A2CLoss
from tensordict import TensorDictBase
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torchrl.data import UnboundedContinuousTensorSpec, DiscreteTensorSpec

import torch
import torch.nn as nn
from torch.utils.data import  Dataset
import torch.optim as optim

import collections
import warnings


In [None]:

MODEL_SAVE_PATH = "trading_model.pth"
SP500_TICKERS = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
COLUMNS = ['Open', 'Close', 'Volume', 'MACD', 'ATR'] #change to open close volume macd rsi


In [None]:

class StockDataset(Dataset):
    def __init__(self, tickers, num_segments=10):
        self.tickers = tickers
        self.num_segments = num_segments
        self.segmented_data = []
        self.current_ticker_idx = 0

        self._load_and_split_data()

    def _load_and_split_data(self):
        saved = False
        for ticker in self.tickers:
            print(f"Processing ticker: {ticker}")

            try:
                stock_data = yf.download(ticker, period='max', progress=False)
                if stock_data.empty:
                    print(f"Warning: Empty data for ticker: {ticker}")
                    return None
                
                df = stock_data['Open', 'High', 'Low', 'Close', 'Volume'].dropna()

                df['MACD'] = ta.macd(df['Close'], fast=12, slow=26, signal=9)['MACD_12_26_9']
                df['ATR'] = ta.atr(df['High'], df['Low'], df['Close'], length=14)
                
                features = df[COLUMNS].dropna()
                if features.empty:
                    return None

                    
                # Select features to scale
                scaler = StandardScaler()
                scaled_data = scaler.fit_transform(features)

                # Save the fitted scaler
                if not saved:
                    os.makedirs("data", exist_ok=True)
                    with open("data/scaler.pkl", "wb") as f:
                        pickle.dump(scaler, f)

            except Exception as e:
                        print(f"Error downloading data for {ticker}: {e}")
                        return None

            scaled_values = scaled_data.astype(np.float32)
            segment_size = len(scaled_values) // self.num_segments
            for i in range(self.num_segments):
                start = i * segment_size
                end = (i + 1) * segment_size if i < self.num_segments - 1 else len(scaled_values)
                segment = scaled_values[start:end]
                if len(segment) > 0:
                    self.segmented_data.append((segment, ticker))
                    self.current_ticker_idx += 1

            
    def __len__(self):
        return len(self.segmented_data)

    def __getitem__(self, idx):
        return self.segmented_data[idx]


    def fetch_next_stock(self):
        """Returns a dataset segment in order, keeping track of stock index."""
        if not self.segmented_data:
            return None, None  # No more stocks

        if self.current_ticker_idx >= len(self.segmented_data):
            self.current_ticker_idx = 0  # Loop back to start

        segment, ticker = self.segmented_data[self.current_ticker_idx]
        self.current_ticker_idx += 1
        return segment, ticker

    def save(self, path):
        """
        Saves the StockDataset to a pickle file.

        Args:
            filepath (str): The path to the pickle file.
        """
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with open(path, 'wb') as f:
            pickle.dump(self, f)
        print('Saved Dataset.')

    @staticmethod
    def load(path):
        """
        Loads the StockDataset from a pickle file.

        Args:
            filepath (str): The path to the pickle file.

        Returns:
            StockDataset: The loaded StockDataset object.
        """
        with open(path, 'rb') as f:
            return pickle.load(f)




In [175]:
class TradingEnv(EnvBase):
    def __init__(self, df: pd.DataFrame, window_size=30):
        super().__init__()

        if isinstance(df, np.ndarray):
            df = pd.DataFrame(df)

        if len(df) < window_size:
            raise ValueError(f"Insufficient data: need at least {window_size} rows, got {len(df)}.")

        self.df = df.reset_index(drop=True)
        self.initial_balance = 1000.0
        self.balance = 1000.0
        self.net_worth = 1000.0
        self.window_size = window_size
        self.current_step = 0

        obs_dim = window_size * self.df.shape[1]
        self.observation_spec = UnboundedContinuousTensorSpec(shape=(obs_dim,))
        self.action_spec = DiscreteTensorSpec(n=4)

        self._set_seed(0)

    def _reset(self, tensordict=None):
        self.current_step = self.window_size
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.positions = []
        self.position_type = None
        self.trades = []

        obs = self._get_observation()
        return TensorDict({"observation": obs.unsqueeze(0)}, batch_size=[])

    def _get_observation(self):
        frame = self.df.iloc[self.current_step - self.window_size:self.current_step].copy()
        frame["Balance"] = self.balance
        frame["NetWorth"] = self.net_worth

        obs = torch.tensor(frame.astype(np.float32).values, dtype=torch.float32)
        return obs

    def _set_seed(self, seed: int):
        torch.manual_seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        return seed

    def _step(self, tensordict):
        action = tensordict["action"].item()
        reward = 0.0
        done = False

        current_price = self.df.loc[self.current_step, 'Close']

        if action == 0:  # Buy
            if self.position_type is None:
                self.positions.append(current_price)
                self.position_type = 'long'
            elif self.position_type == 'short':
                entry = self.positions.pop(0)
                profit = entry - current_price
                reward += profit
                self.balance += profit
                if not self.positions:
                    self.position_type = None

        elif action == 1:  # Sell
            if self.position_type == 'long':
                entry = self.positions.pop(0)
                profit = current_price - entry
                reward += profit
                self.balance += profit
                if not self.positions:
                    self.position_type = None

        elif action == 2:  # Short
            if self.position_type is None:
                self.positions.append(current_price)
                self.position_type = 'short'
            elif self.position_type == 'long':
                entry = self.positions.pop(0)
                profit = current_price - entry
                reward += profit
                self.balance += profit
                if not self.positions:
                    self.position_type = None

        elif action == 3:  # Cover
            if self.position_type == 'short':
                entry = self.positions.pop(0)
                profit = entry - current_price
                reward += profit
                self.balance += profit
                if not self.positions:
                    self.position_type = None

        self.net_worth = self.balance
        self.trades.append((self.current_step, action, current_price, reward))

        self.current_step += 1
        if self.current_step >= len(self.df):
            done = True

        next_obs = self._get_observation()

        return TensorDict({
            "next": TensorDict({"observation": next_obs.unsqueeze(0)}, batch_size=[]),
            "reward": torch.tensor([reward], dtype=torch.float32),
            "done": torch.tensor([done], dtype=torch.bool),
            "terminated": torch.tensor([done], dtype=torch.bool),
        }, batch_size=[])


In [None]:
def evaluate_model(stock_data, model):
    """Evaluates the model on stock data and returns total reward."""
    env = TradingEnv(stock_data)
    state = env.reset()
    done = False
    total_reward = 0
    actions = []

    while not done:
        with torch.inference_mode:
            state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
            logits, value, _ = model(state_tensor)
            action = torch.argmax(logits).item()


        actions.append(action)
        state, reward, done, _ = env.step(action)
        total_reward += reward

    return total_reward, actions

# Function to plot evaluation results
def plot_evaluation_results(stock_data, actions):
    """Plots stock data and overlays model evaluation results."""
    close_prices = stock_data[:, 3]  # Close prices

    plt.figure(figsize=(12, 6))
    plt.plot(close_prices, label="Close Price", color='black')

    buy_signals = [i for i in range(len(actions)) if actions[i] == 1]
    sell_signals = [i for i in range(len(actions)) if actions[i] == 2]

    plt.scatter(buy_signals, close_prices[buy_signals], color='green', marker='^', label='Buy')
    plt.scatter(sell_signals, close_prices[sell_signals], color='red', marker='v', label='Sell')

    plt.xlabel("Time")
    plt.ylabel("Stock Price")
    plt.legend()
    plt.title(f"Model Evaluation Results")
    plt.show()



In [177]:

if os.path.exists("data/stock_dataset.pkl"):
    dataset = StockDataset.load("data/stock_dataset.pkl")
else:
    tickers = pd.read_html(SP500_TICKERS)[0]['Symbol'].tolist()
    dataset = StockDataset(tickers, num_segments=10)
    dataset.save("data/stock_dataset.pkl")


In [178]:


class StockLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, lstm_layers=2, fc_dim=64, output_dim=4):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.lstm_layers = lstm_layers

        self.lstm = nn.LSTM(input_dim, hidden_dim, lstm_layers, batch_first=True, dropout=0.2 if lstm_layers > 1 else 0.0)
        self.norm = nn.LayerNorm(hidden_dim)

        self.actor = nn.Sequential(
            nn.Linear(hidden_dim, fc_dim),
            nn.ReLU(),
            nn.Linear(fc_dim, output_dim)
        )

        self.critic = nn.Sequential(
            nn.Linear(hidden_dim, fc_dim),
            nn.ReLU(),
            nn.Linear(fc_dim, 1)
        )

    def forward(self, tensordict: TensorDictBase):
        obs = tensordict["observation"]  # Shape: (B, T, input_dim)

        if obs.ndim == 2:
            obs = obs.unsqueeze(0)

        # Get sequence lengths or default to full length
        lengths = tensordict.get("lengths", torch.full((obs.size(0),), obs.size(1), dtype=torch.long, device=obs.device))

        # Pack padded sequences for LSTM
        packed = pack_padded_sequence(obs, lengths.cpu(), batch_first=True, enforce_sorted=False)
        packed_out, _ = self.lstm(packed)
        lstm_out, _ = pad_packed_sequence(packed_out, batch_first=True)

        # Gather last output per sequence (according to actual length)
        idx = (lengths - 1).view(-1, 1, 1).expand(-1, 1, self.hidden_dim)
        last_out = lstm_out.gather(1, idx).squeeze(1)  # Shape: (B, hidden_dim)

        last_out = self.norm(last_out)

        logits = self.actor(last_out)
        value = self.critic(last_out)

        tensordict.set("logits", logits)
        tensordict.set("value", value)
        return tensordict


In [179]:
def init_or_load(input_dim: int = 7,
                     output_dim: int = 3,
                     lr: float = 1e-3,
                     memory_size: int = 10_000):
    """
    Returns (model, optimizer, memory), loading pretrained weights if available.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)

    model = StockLSTM(input_dim=input_dim, output_dim=output_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    memory = collections.deque(maxlen=memory_size)

    if os.path.exists(MODEL_SAVE_PATH):
        model.load_state_dict(torch.load(MODEL_SAVE_PATH))
        print(f" Loaded existing model from {MODEL_SAVE_PATH}")
    else:
        print("  No existing model found — initialized new network.")

    return model, optimizer, memory, device

In [180]:

def convert_state(state):
    if isinstance(state, np.ndarray):
        state = torch.tensor(state, dtype=torch.float32)
    if isinstance(state, list):
        state = torch.tensor(state, dtype=torch.float32)
    if state.ndim == 2:
        state = state.unsqueeze(0)
    return state

In [None]:

def train(dataset, episodes=10, gamma=0.95, lr=1e-3):
    model, optimizer, _, device = init_or_load(input_dim=7, output_dim=4, lr=lr)
    model = model.to(device)
    optimizer = optimizer.to(device)

    if os.path.exists(MODEL_SAVE_PATH):
        model.load_state_dict(torch.load(MODEL_SAVE_PATH))
        print("Loaded existing model.")

    window_size = 7

    while True:
        stock_data, ticker = dataset.fetch_next_stock()
        if stock_data is None:
            print("All data segments processed, restarting training loop...")
            break

        if isinstance(stock_data, np.ndarray):
            stock_data = pd.DataFrame(stock_data)

        if len(stock_data) < window_size:
            print(f"Skipping stock {ticker}: only {len(stock_data)} rows (<{window_size}).")
            continue

        env = TradingEnv(stock_data, window_size=window_size)

        collector = SyncDataCollector(
            create_env_fn=lambda: env,
            policy=lambda td: TensorDict({"action": Categorical(logits=model(td["observation"]))}, batch_size=[]),
            frames_per_batch=episodes,
            total_frames=episodes
        )

        loss_module = A2CLoss(model, gamma=gamma)

        buffer = ReplayBuffer(storage=LazyTensorStorage(max_size=episodes))

        for i, tensordict_data in enumerate(collector):
            buffer.extend(tensordict_data)

            sampled_data = buffer.sample(batch_size=episodes)

            loss_td = loss_module(sampled_data)

            optimizer.zero_grad()
            loss_td["loss"].backward()
            optimizer.step()

            print(f"[{ticker}] Episode {i+1}/{episodes} | Loss: {loss_td['loss'].item():.4f}")

        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        print(f"[{ticker}] Training complete and model saved.")


In [182]:

# Run training
train(dataset, episodes=10)


Using device: cpu
  No existing model found — initialized new network.




IndexError: too many indices for tensor of dimension 3