<a href="https://colab.research.google.com/github/sh20022002/probability_of_change/blob/main/trader_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import StandardScaler
import pickle
import os
import matplotlib.pyplot as plt

import gym
from gym import spaces

import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions import Categorical

import random
import collections
import warnings


In [38]:

MODEL_SAVE_PATH = "trading_model.pth"
SP500_TICKERS = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"


class StockDataset:
    def __init__(self, tickers, num_segments=10):
        self.tickers = tickers
        self.num_segments = num_segments  # Number of parts to divide the dataset into
        self.segmented_data = []  # Preprocessed data
        self.current_ticker_idx = 0  # Keep track of which stock is being processed

        self._load_and_split_data()

    def _load_and_split_data(self):
        """Loads full dataset once and splits into shuffled segments"""
        scaler_saved = False
        scaler = None

        for ticker in self.tickers:
            try:
                stock_data = yf.download(ticker, period='max', progress=False)
                if stock_data.empty:
                    print(f"Warning: Empty data for ticker: {ticker}")
                    continue

                df = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']].dropna()
                df['Return'] = df['Close'].pct_change()
                df['SMA150'] = df['Close'].rolling(window=150).mean()
                df.dropna(inplace=True)

                features_df = df[['Open', 'Close', 'Return', 'Volume', 'SMA150']]
                if features_df.empty:
                    continue

                print(f"Processing ticker: {ticker}")

                variances = features_df.var()
                features_df = features_df[[col for col in features_df.columns if variances[col] != 0]]

                if scaler is None:
                    scaler = StandardScaler().fit(features_df)

                columns = features_df.columns
                index = features_df.index

                features_df = pd.DataFrame(
                    scaler.transform(features_df),
                    columns=columns,
                    index=index
                )

                scaled_values = features_df.astype(np.float32).values
                segment_size = len(scaled_values) // self.num_segments
                segments = [scaled_values[i * segment_size:(i + 1) * segment_size] for i in range(self.num_segments)]
                self.segmented_data.extend([(segment, ticker) for segment in segments])

            except Exception as e:
                print(f"Error downloading data for {ticker}: {e}")
                continue

        if scaler is not None and not scaler_saved:
            os.makedirs("data", exist_ok=True)
            with open("data/scaler.pkl", "wb") as f:
                pickle.dump(scaler, f)
            print("Scaler saved")

    def fetch_next_stock(self):
        """Returns a dataset segment in order, keeping track of stock index."""
        if not self.segmented_data:
            return None, None  # No more stocks

        if self.current_ticker_idx >= len(self.segmented_data):
            self.current_ticker_idx = 0  # Loop back to start

        segment, ticker = self.segmented_data[self.current_ticker_idx]
        self.current_ticker_idx += 1
        return segment, ticker

    def save(self, path):
        """
        Saves the StockDataset to a pickle file.

        Args:
            filepath (str): The path to the pickle file.
        """
        os.makedirs(os.path.dirname(path), exist_ok=True)
        with open(path, 'wb') as f:
            pickle.dump(self, f)
        print('Saved Dataset.')

    @staticmethod
    def load(path):
        """
        Loads the StockDataset from a pickle file.

        Args:
            filepath (str): The path to the pickle file.

        Returns:
            StockDataset: The loaded StockDataset object.
        """
        with open(path, 'rb') as f:
            return pickle.load(f)




In [39]:

# Cell 2: Preprocess Data
def preprocess_data(df):
    """Preprocesses stock data by adding features and scaling."""
    df['Return'] = df['Close'].pct_change()
    df['SMA150'] = df['Close'].rolling(window=150).mean()
    df.dropna(inplace=True)

    # Select features to scale
    features = df[['Open', 'Close', 'Return', 'Volume', 'SMA150']]
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(features)

    # Save the fitted scaler

    os.makedirs("data", exist_ok=True)
    with open("data/scaler.pkl", "wb") as f:
        pickle.dump(scaler, f)

    return scaled_data, scaler

In [None]:
class TradingEnv(gym.Env):
    def __init__(self, df, initial_balance=10000, scaler=None, window_size=30):
        super(TradingEnv, self).__init__()

        if isinstance(df, np.ndarray):
            df = pd.DataFrame(df)

        if len(df) < window_size:
            raise ValueError(f"Insufficient data: need at least {window_size} rows, got {len(df)}.")

        self.df = df.reset_index(drop=True) #?
        self.initial_balance = initial_balance
        self.scaler = scaler
        self.window_size = window_size
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, 
            shape=(window_size, df.shape[1] + 2), 
            dtype=np.float32
        )
        self.reset()

    def reset(self):
        self.current_step = self.window_size
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.positions = []
        self.position_type = None
        self.trades = []
        return self._get_observation()

    def _get_observation(self):
        frame = self.df.iloc[self.current_step - self.window_size:self.current_step]
        obs = frame.copy()
        obs['Balance'] = self.balance
        obs['NetWorth'] = self.net_worth

        try:
            if self.scaler is not None:
                obs = self.scaler.transform(obs)
        except Exception as e:
            warnings.warn(f"Scaler transform failed: {e}. Proceeding without scaling.")

        return obs.astype(np.float32)

    def step(self, action):
        done = False
        reward = 0
        current_price = self.df.loc[self.current_step, 'Close'] #?

        if action == 0:  # Buy
            if self.position_type is None:
                self.positions.append(current_price)
                self.position_type = 'long'
            elif self.position_type == 'short':
                entry = self.positions.pop(0)
                profit = entry - current_price
                reward += profit
                self.balance += profit
                if not self.positions:
                    self.position_type = None

        elif action == 1:  # Sell
            if self.position_type == 'long':
                entry = self.positions.pop(0)
                profit = current_price - entry
                reward += profit
                self.balance += profit
                if not self.positions:
                    self.position_type = None

        elif action == 2:  # Short
            if self.position_type is None:
                self.positions.append(current_price)
                self.position_type = 'short'
            elif self.position_type == 'long':
                entry = self.positions.pop(0)
                profit = current_price - entry
                reward += profit
                self.balance += profit
                if not self.positions:
                    self.position_type = None

        elif action == 3:  # Cover
            if self.position_type == 'short':
                entry = self.positions.pop(0)
                profit = entry - current_price
                reward += profit
                self.balance += profit
                if not self.positions:
                    self.position_type = None

        self.net_worth = self.balance
        self.trades.append((self.current_step, action, current_price, reward))

        self.current_step += 1
        if self.current_step >= len(self.df):
            done = True

        return self._get_observation(), reward, done, {}


In [41]:
def evaluate_model(stock_data, model):
    """Evaluates the model on stock data and returns total reward."""
    env = TradingEnv(stock_data)
    state = env.reset()
    done = False
    total_reward = 0
    actions = []

    while not done:
        with torch.no_grad():
            state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
            logits, value, _ = model(state_tensor)
            action = torch.argmax(logits).item()


        actions.append(action)
        state, reward, done, _ = env.step(action)
        total_reward += reward

    return total_reward, actions

# Function to plot evaluation results
def plot_evaluation_results(stock_data, actions):
    """Plots stock data and overlays model evaluation results."""
    close_prices = stock_data[:, 3]  # Close prices

    plt.figure(figsize=(12, 6))
    plt.plot(close_prices, label="Close Price", color='black')

    buy_signals = [i for i in range(len(actions)) if actions[i] == 1]
    sell_signals = [i for i in range(len(actions)) if actions[i] == 2]

    plt.scatter(buy_signals, close_prices[buy_signals], color='green', marker='^', label='Buy')
    plt.scatter(sell_signals, close_prices[sell_signals], color='red', marker='v', label='Sell')

    plt.xlabel("Time")
    plt.ylabel("Stock Price")
    plt.legend()
    plt.title(f"Model Evaluation Results")
    plt.show()



In [42]:
def init_or_load(input_dim: int = 7,
                     output_dim: int = 3,
                     lr: float = 1e-3,
                     memory_size: int = 10_000):
    """
    Returns (model, optimizer, memory), loading pretrained weights if available.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)

    model = StockLSTM(input_dim=input_dim, output_dim=output_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    memory = collections.deque(maxlen=memory_size)

    if os.path.exists(MODEL_SAVE_PATH):
        model.load_state_dict(torch.load(MODEL_SAVE_PATH))
        print(f" Loaded existing model from {MODEL_SAVE_PATH}")
    else:
        print("  No existing model found — initialized new network.")

    return model, optimizer, memory, device

In [43]:

if os.path.exists("data/stock_dataset.pkl"):
    dataset = StockDataset.load("data/stock_dataset.pkl")
else:
    tickers = pd.read_html(SP500_TICKERS)[0]['Symbol'].tolist()
    dataset = StockDataset(tickers, num_segments=10)
    dataset.save("data/stock_dataset.pkl")


In [44]:
class StockLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, lstm_layers=2, fc_dim=64, output_dim=3):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.lstm_layers = lstm_layers

        dropout = 0.2 if lstm_layers > 1 else 0.0
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=lstm_layers, batch_first=True, dropout=dropout)

        self.layer_norm = nn.LayerNorm(hidden_dim)

        # Actor head
        self.actor_fc1 = nn.Linear(hidden_dim, fc_dim)
        self.actor_fc2 = nn.Linear(fc_dim, output_dim)  # logits for actions

        # Critic head
        self.critic_fc1 = nn.Linear(hidden_dim, fc_dim)
        self.critic_fc2 = nn.Linear(fc_dim, 1)  # state-value

        self.dropout = nn.Dropout(0.1)

    def forward(self, x, hidden_state=None):
        # x: (batch_size, seq_len, input_dim)
        if hidden_state is None:
            h0 = torch.zeros(self.lstm_layers, x.size(0), self.hidden_dim).to(x.device)
            c0 = torch.zeros(self.lstm_layers, x.size(0), self.hidden_dim).to(x.device)
            hidden_state = (h0, c0)

        lstm_out, new_hidden = self.lstm(x, hidden_state)
        last_output = lstm_out[:, -1, :]  # use last output
        normed = self.layer_norm(last_output)
        dropped = self.dropout(normed)

        # Actor
        actor_hidden = F.relu(self.actor_fc1(dropped))
        policy_logits = self.actor_fc2(actor_hidden)

        # Critic
        critic_hidden = F.relu(self.critic_fc1(dropped))
        value = self.critic_fc2(critic_hidden)

        return policy_logits, value, new_hidden


# Assuming these functions/constants are defined elsewhere:
# init_or_load, MODEL_SAVE_PATH, TradingEnv, evaluate_model, plot_evaluation_results

def convert_state(state, device):
    """
    Ensure 'state' is a sequence with shape (window_size, feature_dim) 
    and convert to a torch tensor with added batch dimension.
    """
    # If state is not a list or np.ndarray, wrap it in a list.
    if not isinstance(state, (list, np.ndarray)):
        state = [state]
    # Convert to a NumPy array (if it isn't already)
    state = np.array(state)
    state_tensor = torch.tensor(state, dtype=torch.float32, device=device)
    # Check dimensions:
    #   If state_tensor is 1D, assume it's a single observation vector => unsqueeze twice: (1,1,feature_dim)
    #   If state_tensor is 2D, assume it's (window_size, feature_dim) => unsqueeze batch dim: (1, window_size, feature_dim)
    if state_tensor.ndim == 1:
        state_tensor = state_tensor.unsqueeze(0).unsqueeze(0)
    elif state_tensor.ndim == 2:
        state_tensor = state_tensor.unsqueeze(0)
    # If it's already 3D, assume batch dimension is present
    return state_tensor

def train_on_sp500(dataset, episodes=10, batch_size=64, gamma=0.95, lr=0.001, save_interval=30):
    model, optimizer, _, device = init_or_load(input_dim=7, output_dim=3, lr=lr)
    criterion = nn.MSELoss()

    if os.path.exists(MODEL_SAVE_PATH):
        model.load_state_dict(torch.load(MODEL_SAVE_PATH))
        print("Loaded existing model.")

    window_size = 7

    while True:
        stock_data, ticker = dataset.fetch_next_stock()
        if stock_data is None:
            print("All data segments processed, restarting training loop...")
            break

        if isinstance(stock_data, np.ndarray):
            stock_data = pd.DataFrame(stock_data)

        if len(stock_data) < window_size:
            print(f"Skipping stock {ticker}: only {len(stock_data)} rows (<{window_size}).")
            continue

        env = TradingEnv(stock_data, window_size=window_size)

        wins = 0
        action_counter = collections.Counter()

        for episode in range(episodes):
            state = env.reset()
            action_counter.clear()
            done = False
            total_reward = 0
            PROFIT = 0
            tax_credit = 0

            episode_states = []
            episode_actions = []
            episode_rewards = []

            while not done:
                # Ensure state is in the correct sequence format
                state_tensor = convert_state(state, device)
                logits, value, _ = model(state_tensor)
                dist = Categorical(logits=logits)
                action = dist.sample().item()

                next_state, reward, done, _ = env.step(action)

                episode_states.append(state)
                episode_actions.append(action)
                episode_rewards.append(reward)

                state = next_state
                action_counter[action] += 1

            # Compute returns
            returns = []
            G = 0
            for r in reversed(episode_rewards):
                G = r + gamma * G
                returns.insert(0, G)

            optimizer.zero_grad()

            # Here we update the policy using the collected episode data.
            for state, action, G in zip(episode_states, episode_actions, returns):
                # Convert the state appropriately. If state was already a sequence,
                # convert_state will produce a tensor of shape (1, window_size, feature_dim)
                state_tensor = convert_state(state, device)
                logits, value, _ = model(state_tensor)
                dist = Categorical(logits=logits)

                action_tensor = torch.tensor(action, device=device)
                log_prob = dist.log_prob(action_tensor)
                # Detach value because we don't backpropagate through the advantage
                advantage = G - value.squeeze().detach()

                policy_loss = -log_prob * advantage
                value_loss = F.mse_loss(value.squeeze(), torch.tensor(G, device=device))
                (policy_loss + value_loss).backward()

            optimizer.step()

            net_profit = env.net_worth - env.initial_balance

            if net_profit > 0:
                wins += 1
                tax_liability = 0.25 * net_profit
                tax_due = max(tax_liability - tax_credit, 0)
                tax_credit = max(tax_credit - tax_liability, 0)
            else:
                tax_due = 0
                tax_credit += abs(net_profit) * 0.25

            net_profit_after_tax = net_profit - tax_due
            PROFIT += net_profit_after_tax

            print(f"Ticker: {ticker} | Episode {episode+1}/{episodes} | Net PnL: {net_profit:.2f} | Wins: {wins}/{episode+1}")

            if episode % save_interval == 0:
                torch.save(model.state_dict(), MODEL_SAVE_PATH)
                print("Model progress saved.")

        total_reward, actions = evaluate_model(stock_data, model)
        plot_evaluation_results(stock_data, actions)

    print("Training complete.")
    torch.save(model.state_dict(), MODEL_SAVE_PATH)
    print("Final model saved.")


In [45]:

# Run training
train_on_sp500(dataset, episodes=10)


Using device: cpu
  No existing model found — initialized new network.


KeyError: 'Close'