<a href="https://colab.research.google.com/github/sh20022002/probability_of_change/blob/main/trader_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [170]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import StandardScaler
import pickle
import os
import random
import matplotlib.pyplot as plt


from torchrl.envs import EnvBase
from tensordict import TensorDict
from torchrl.envs.utils import step_mdp
from torch.distributions import Categorical
from tensordict import TensorDict
from torchrl.collectors import SyncDataCollector
from torchrl.data import ReplayBuffer, LazyTensorStorage
from torchrl.objectives import A2CLoss, ValueEstimators
from tensordict import TensorDictBase
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torchrl.data import Unbounded, OneHot
from torchrl.modules.distributions.continuous import TanhNormal
from torchrl.modules import Actor
from tensordict.nn import TensorDictModule
from torchrl.modules import ProbabilisticActor




import torch
import torch.nn as nn
from torch.utils.data import  Dataset
import torch.optim as optim

import collections
import warnings


In [171]:

MODEL_SAVE_PATH = "trading_model.pth"
SP500_TICKERS = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
COLUMNS = ['Open', 'Close', 'Volume', 'MACD', 'ATR'] #change to open close volume macd rsi


In [172]:
def calculate_macd(df: pd.DataFrame, fast_period: int = 12, slow_period: int = 26, signal_period: int = 9) -> pd.DataFrame:
    """Calculate MACD, Signal Line, and MACD Histogram."""
    df = df.copy()
    df['EMA_fast'] = df['Close'].ewm(span=fast_period, adjust=False).mean()
    df['EMA_slow'] = df['Close'].ewm(span=slow_period, adjust=False).mean()
    df['MACD'] = df['EMA_fast'] - df['EMA_slow']
    return df['MACD']

def calculate_atr(df: pd.DataFrame, period: int = 14) -> pd.Series:
    """Calculate Average True Range (ATR)."""
    df = df.copy()
    df['H-L'] = df['High'] - df['Low']
    df['H-PC'] = np.abs(df['High'] - df['Close'].shift(1))
    df['L-PC'] = np.abs(df['Low'] - df['Close'].shift(1))
    df['TR'] = df[['H-L', 'H-PC', 'L-PC']].max(axis=1)
    atr = df['TR'].ewm(span=period, adjust=False).mean()
    return atr

In [173]:

class StockDataset(Dataset):
    def __init__(self, tickers):
        self.tickers = tickers
        self.segmented_data = []
        self.current_ticker_idx = 0

        self._load_and_split_data()

    def _load_and_split_data(self):
        saved = False
        for ticker in self.tickers:
            print(f"Processing ticker: {ticker}")

            try:
                df = yf.download(ticker, period='max', progress=False)
                if df.empty:
                    print(f"Warning: Empty data for ticker: {ticker}")
                    continue
                
                df = df.dropna()
                
                df['MACD'] = calculate_macd(df)
                df['ATR'] = calculate_atr(df)

                features = df[COLUMNS].dropna()
                if features.empty:
                    continue

                    
                # Select features to scale
                scaler = StandardScaler()
                scaled_data = scaler.fit_transform(features)

                # Save the fitted scaler
                if not saved:
                    os.makedirs("data", exist_ok=True)
                    with open("data/scaler.pkl", "wb") as f:
                        pickle.dump(scaler, f)
                        saved = True
                        

            except Exception as e:
                print(f"Error downloading data for {ticker}: {e}")
                continue        

            scaled_values = scaled_data.astype(np.float32)
            self.segmented_data.append((scaled_values, ticker))
            self.current_ticker_idx += 1

            
    def __len__(self):
        return len(self.segmented_data)

    def __getitem__(self, idx):
        return self.segmented_data[idx]


    def fetch_next_stock(self):
        """Returns a dataset segment in order, keeping track of stock index."""
        if not self.segmented_data:
            return None, None  # No more stocks

        if self.current_ticker_idx >= len(self.segmented_data):
            self.current_ticker_idx = 0  # Loop back to start

        segment, ticker = self.segmented_data[self.current_ticker_idx]
        self.current_ticker_idx += 1
        return segment, ticker

    def save(self, path):
        """
        Saves the StockDataset to a pickle file.

        Args:
            filepath (str): The path to the pickle file.
        """
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with open(path, 'wb') as f:
            pickle.dump(self, f)
        print('Saved Dataset.')

    @staticmethod
    def load(path):
        """
        Loads the StockDataset from a pickle file.

        Args:
            filepath (str): The path to the pickle file.

        Returns:
            StockDataset: The loaded StockDataset object.
        """
        with open(path, 'rb') as f:
            return pickle.load(f)




In [174]:
class TradingEnv(EnvBase):
    def __init__(self, df: pd.DataFrame, window_size=30):
        super().__init__()

        if isinstance(df, np.ndarray):
            df = pd.DataFrame(df)

        if len(df) < window_size:
            raise ValueError(f"Insufficient data: need at least {window_size} rows, got {len(df)}.")

        self.df = df.reset_index(drop=True)
        self.initial_balance = 1000.0
        self.balance = 1000.0
        self.net_worth = 1000.0
        self.window_size = window_size
        self.current_step = 0

        obs_dim = window_size * self.df.shape[1]
        self.observation_spec = Unbounded(shape=(obs_dim,))
        self.action_spec = OneHot(n=4)
        self._set_seed(0)

    def _reset(self, tensordict=None):
        self.current_step = self.window_size
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.positions = []
        self.position_type = None
        self.trades = []

        obs = self._get_observation()
        return TensorDict({"observation": obs.unsqueeze(0)}, batch_size=[])

    def _get_observation(self):
        frame = self.df.iloc[self.current_step - self.window_size:self.current_step].copy()
        frame["Balance"] = self.balance
        frame["NetWorth"] = self.net_worth

        obs = torch.tensor(frame.astype(np.float32).values, dtype=torch.float32)
        return obs

    def _set_seed(self, seed: int):
        torch.manual_seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        return seed

    def _step(self, tensordict):
        action = tensordict["action"]

        if action.ndim == 0:
            action = action.unsqueeze(0)

        batch_size = action.shape[0]
        rewards = torch.zeros(batch_size)
        dones = torch.zeros(batch_size, dtype=torch.bool)
        next_observations = []

        commission_rate = 0.001  # 0.1%
        tax_rate = 0.15          # 15% profit tax
        leverage = 1.0           # 2x leverage

        for i in range(batch_size):
            a = action[i].item()
            current_price = self.stock_data[self.current_step]
            next_price = self.stock_data[self.current_step + 1] if self.current_step + 1 < len(self.stock_data) else current_price
            price_diff = next_price - current_price

            reward = 0.0

            if a == 1:  # Buy
                profit = price_diff * leverage
                tax = tax_rate * max(profit, 0.0)
                cost = commission_rate * current_price * leverage
                reward = profit - tax - cost
            elif a == 2:  # Sell
                profit = -price_diff * leverage
                tax = tax_rate * max(profit, 0.0)
                cost = commission_rate * current_price * leverage
                reward = profit - tax - cost
            elif a == 3:  # Short
                profit = -price_diff * leverage
                tax = tax_rate * max(profit, 0.0)
                cost = commission_rate * current_price * leverage
                reward = profit - tax - cost
            # a == 0 (Hold): reward stays 0

            rewards[i] = reward
            dones[i] = self.current_step + 1 >= len(self.stock_data) - 1

            next_obs = self._get_next_observation()
            next_observations.append(next_obs)

        self.current_step += 1
        next_observations = torch.stack(next_observations, dim=0)

        return TensorDict({
            "reward": rewards,
            "done": dones,
            "observation": next_observations,
        }, batch_size=[batch_size])


In [175]:
def evaluate_model(stock_data, model):
    """Evaluates the model on stock data and returns total reward."""
    env = TradingEnv(stock_data)
    state = env.reset()
    done = False
    total_reward = 0
    actions = []

    while not done:
        with torch.inference_mode:
            state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
            logits, value, _ = model(state_tensor)
            action = torch.argmax(logits).item()


        actions.append(action)
        state, reward, done, _ = env.step(action)
        total_reward += reward

    return total_reward, actions

# Function to plot evaluation results
def plot_evaluation_results(stock_data, actions):
    """Plots stock data and overlays model evaluation results."""
    close_prices = stock_data[:, 3]  # Close prices

    plt.figure(figsize=(12, 6))
    plt.plot(close_prices, label="Close Price", color='black')

    buy_signals = [i for i in range(len(actions)) if actions[i] == 1]
    sell_signals = [i for i in range(len(actions)) if actions[i] == 2]

    plt.scatter(buy_signals, close_prices[buy_signals], color='green', marker='^', label='Buy')
    plt.scatter(sell_signals, close_prices[sell_signals], color='red', marker='v', label='Sell')

    plt.xlabel("Time")
    plt.ylabel("Stock Price")
    plt.legend()
    plt.title(f"Model Evaluation Results")
    plt.show()



In [176]:

if os.path.exists("data/stock_dataset.pkl"):
    dataset = StockDataset.load("data/stock_dataset.pkl")
else:
    tickers = pd.read_html(SP500_TICKERS)[0]['Symbol'].tolist()
    dataset = StockDataset(tickers)
    dataset.save("data/stock_dataset.pkl")


In [177]:
print(dataset.fetch_next_stock())

(array([[-0.7405194 , -0.7300623 , -0.87796915, -0.07685785, -0.66487986],
       [-0.7405194 , -0.72995913, -0.7758922 , -0.07643412, -0.66538656],
       [-0.7405194 , -0.72995913, -0.87796915, -0.07610697, -0.6665013 ],
       ...,
       [ 2.6384516 ,  2.5597277 ,  0.9142302 , -4.8513846 ,  7.275909  ],
       [ 2.5477831 ,  2.628197  ,  0.20786287, -4.0690956 ,  6.952138  ],
       [ 2.6292393 ,  2.5903203 , -0.10967056, -3.566246  ,  6.235909  ]],
      dtype=float32), 'MMM')


In [178]:


class StockLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, lstm_layers=2, fc_dim=64, output_dim=4):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.lstm_layers = lstm_layers
        self.output_dim = output_dim

        self.lstm = nn.LSTM(input_dim, hidden_dim, lstm_layers, batch_first=True, dropout=0.2 if lstm_layers > 1 else 0.0)
        self.norm = nn.LayerNorm(hidden_dim)

        self.actor = nn.Sequential(
            nn.Linear(hidden_dim, fc_dim),
            nn.ReLU(),
            nn.Linear(fc_dim, output_dim)
        )

        self.critic = nn.Sequential(
            nn.Linear(hidden_dim, fc_dim),
            nn.ReLU(),
            nn.Linear(fc_dim, 1)
        )

    def forward(self, tensordict: TensorDictBase) -> TensorDictBase:
        obs = tensordict["observation"]
        # Get the batch size and sequence length safely
        batch_size, seq_len = obs.size(0), obs.size(1)
        
        # If obs has more than 3 dimensions (batch, seq, features), flatten it
        if obs.ndim > 3:
            obs = obs.view(batch_size, seq_len, -1)
        
        output, _ = self.lstm(obs)
        output = self.norm(output)
        tensordict["_features"] = output
        return tensordict

    



In [179]:
def init_or_load(input_dim: int = 7,
                     output_dim: int = 3,
                     lr: float = 1e-3,
                     memory_size: int = 10_000):
    """
    Returns (model, optimizer, memory), loading pretrained weights if available.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)

    model = StockLSTM(input_dim=input_dim, output_dim=output_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    memory = collections.deque(maxlen=memory_size)

    if os.path.exists(MODEL_SAVE_PATH):
        model.load_state_dict(torch.load(MODEL_SAVE_PATH))
        print(f" Loaded existing model from {MODEL_SAVE_PATH}")
    else:
        print("  No existing model found — initialized new network.")

    return model, optimizer, memory, device

In [180]:

def convert_state(state):
    if isinstance(state, np.ndarray):
        state = torch.tensor(state, dtype=torch.float32)
    if isinstance(state, list):
        state = torch.tensor(state, dtype=torch.float32)
    if state.ndim == 2:
        state = state.unsqueeze(0)
    return state


In [181]:

def train(dataset, episodes=30, gamma=0.95, lr=1e-3):
    model, optimizer, _, device = init_or_load(input_dim=7, output_dim=4, lr=lr)
    model = model.to(device)
    

    if os.path.exists(MODEL_SAVE_PATH):
        model.load_state_dict(torch.load(MODEL_SAVE_PATH))
        print("Loaded existing model.")

    window_size = 7

    while True:
        stock_data, ticker = dataset.fetch_next_stock()
        if stock_data is None:
            print("All data segments processed, restarting training loop...")
            break

        if isinstance(stock_data, np.ndarray):
            stock_data = pd.DataFrame(stock_data)

        if len(stock_data) < window_size:
            print(f"Skipping stock {ticker}: only {len(stock_data)} rows (<{window_size}).")
            continue

        env = TradingEnv(stock_data, window_size=window_size)

        collector = SyncDataCollector(
            create_env_fn=lambda: env,
            policy=lambda td: TensorDict({"action": Categorical(logits=model(td)["_features"]).sample()}, batch_size=[]),
            frames_per_batch=episodes,
            total_frames=episodes
        ) 

        feature_extractor = TensorDictModule(
            module=model,
            in_keys=["observation"],
            out_keys=["_features"]
        )

        actor_net = TensorDictModule(
            module=model.actor,        # model.actor outputs logits
            in_keys=["_features"],
            out_keys=["logits"]
        )

        actor = ProbabilisticActor(
            module=actor_net,
            in_keys=["_features"],
            out_keys=["action"],
            spec=None  # optional if you have action spec
        )

        # Critic
        critic = TensorDictModule(
            module=model.critic,
            in_keys=["_features"],
            out_keys=["state_value"]
        )


        loss_module = A2CLoss(actor_network=actor, critic_network=critic)
        loss_module.make_value_estimator(ValueEstimators.TD0, gamma=gamma)
        buffer = ReplayBuffer(storage=LazyTensorStorage(max_size=episodes))

        for i, tensordict_data in enumerate(collector):

            tensordict_data = feature_extractor(tensordict_data)

            buffer.extend(tensordict_data)

            sampled_data = buffer.sample(batch_size=episodes)

            loss_td = loss_module(sampled_data)

            optimizer.zero_grad()
            loss_td["loss"].backward()
            optimizer.step()

            print(f"[{ticker}] Episode {i+1}/{episodes} | Loss: {loss_td['loss'].item():.4f}")

        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        print(f"[{ticker}] Training complete and model saved.")


In [182]:

# Run training
train(dataset, episodes=10)


Using device: cpu
  No existing model found — initialized new network.


RuntimeError: a Tensor with 7 elements cannot be converted to Scalar