In [13]:
import numpy as np
import pandas as pd
import yfinance as yf

from sklearn.preprocessing import StandardScaler

import os
import backtrader as bt
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
import gym
from gym import spaces
import torch
import torch.nn as nn
import torch.optim as optim
import random
import collections


In [14]:

MODEL_SAVE_PATH = "dqn_trading_model.pth"
SP500_TICKERS = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"


class StockDataset:
    def __init__(self, tickers, num_segments=10):
        self.tickers = tickers
        self.num_segments = num_segments  # Number of parts to divide the dataset into
        self.segmented_data = []  # Preprocessed data
        self.current_ticker_idx = 0  # Keep track of which stock is being processed
        
        self._load_and_split_data()
    
    def _load_and_split_data(self):
        """Loads full dataset once and splits into shuffled segments"""
        for ticker in self.tickers:
            stock_data = yf.download(ticker, period='max', progress=False)
            if stock_data.empty:
                continue
            
            values = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']].dropna().to_numpy(dtype=np.float32)
            segment_size = len(values) // self.num_segments

            # Split dataset into segments
            segments = [values[i * segment_size:(i + 1) * segment_size] for i in range(self.num_segments)]
            self.segmented_data.extend([(segment, ticker) for segment in segments])
        
        # Shuffle segments to ensure model trains on different periods randomly
        random.shuffle(self.segmented_data)

    def fetch_next_stock(self):
        """Returns a dataset segment in order, keeping track of stock index."""
        if not self.segmented_data:
            return None, None  # No more stocks
        
        if self.current_ticker_idx >= len(self.segmented_data):
            self.current_ticker_idx = 0  # Loop back to start
        
        segment, ticker = self.segmented_data[self.current_ticker_idx]
        self.current_ticker_idx += 1
        return segment, ticker



In [15]:

# Cell 2: Preprocess Data
def preprocess_data(df):
    """Preprocesses stock data by adding features and scaling."""
    df['Return'] = df['Close'].pct_change()
    df['SMA150'] = df['Close'].rolling(window=150).mean()
    df.dropna(inplace=True)
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(df[['Open', 'Close', 'Return' 'Volume', 'SMA150']])
    return scaled_data, scaler


In [16]:
class TradingEnv(gym.Env):
    def __init__(self, data, trading_mode='both', transaction_fee=5):
        super(TradingEnv, self).__init__()
        self.data = data
        self.trading_mode = trading_mode  # 'long', 'short', or 'both'
        self.transaction_fee = transaction_fee
        self.current_step = 0
        self.cash = 10000
        self.position = 0
        self.short_position = 0
        self.done = False
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(5,), dtype=np.float32)

    
    
    def reset(self):
        self.current_step = 0
        self.cash = 10000
        self.position = 0
        self.done = False
        return self._get_observation()
    
    def _get_observation(self):
        """Ensure proper extraction of scalar values to avoid deprecation warnings."""
        if self.current_step >= len(self.data):
            self.current_step = len(self.data) - 1
        return np.concatenate((self.data[self.current_step], np.array([self.position, self.cash], dtype=np.float32)))
    
    def step(self, action):
        current_price = self.data[self.current_step][3]  # Close price

        if action == 1 and self.trading_mode in ['long', 'both']:  # Buy
            shares_to_buy = self.cash // current_price
            self.cash -= shares_to_buy * current_price + self.transaction_fee
            self.position += shares_to_buy

        elif action == 2 and self.trading_mode in ['short', 'both']:  # Short sell
            shares_to_short = self.cash // current_price
            self.cash += shares_to_short * current_price - self.transaction_fee
            self.short_position += shares_to_short

        elif action == 2 and self.position > 0:  # Sell long position
            self.cash += self.position * current_price - self.transaction_fee
            self.position = 0

        elif action == 1 and self.short_position > 0:  # Cover short position
            self.cash -= self.short_position * current_price + self.transaction_fee
            self.short_position = 0
        
        self.current_step += 1
        reward = self.cash + (self.position - self.short_position) * self.data[self.current_step][3] - 10000

        if self.current_step >= len(self.data) - 1:
            self.done = True

        return self._get_observation(), reward, self.done, {}



In [17]:
def evaluate_model(stock_data, model):
    """Evaluates the model on stock data and returns total reward."""
    env = TradingEnv(stock_data)
    state = env.reset()
    done = False
    total_reward = 0
    actions = []
    
    while not done:
        with torch.no_grad():
            q_values = model(torch.tensor(state, dtype=torch.float32).unsqueeze(0))
            action = torch.argmax(q_values).item()
        
        actions.append(action)
        state, reward, done, _ = env.step(action)
        total_reward += reward
    
    return total_reward, actions

# Function to plot evaluation results
def plot_evaluation_results(stock_data, actions):
    """Plots stock data and overlays model evaluation results."""
    close_prices = stock_data[:, 3]  # Close prices
    
    plt.figure(figsize=(12, 6))
    plt.plot(close_prices, label="Close Price", color='black')
    
    buy_signals = [i for i in range(len(actions)) if actions[i] == 1]
    sell_signals = [i for i in range(len(actions)) if actions[i] == 2]
    
    plt.scatter(buy_signals, close_prices[buy_signals], color='green', marker='^', label='Buy')
    plt.scatter(sell_signals, close_prices[sell_signals], color='red', marker='v', label='Sell')
    
    plt.xlabel("Time")
    plt.ylabel("Stock Price")
    plt.legend()
    plt.title(f"Model Evaluation Results")
    plt.show()



In [None]:
def init_or_load(input_dim: int = 7,
                     output_dim: int = 3,
                     lr: float = 1e-3,
                     memory_size: int = 10_000):
    """
    Returns (model, optimizer, memory), loading pretrained weights if available.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)

    model = StockLSTM(input_dim=input_dim, output_dim=output_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    memory = collections.deque(maxlen=memory_size)

    if os.path.exists(MODEL_SAVE_PATH):
        model.load_state_dict(torch.load(MODEL_SAVE_PATH))
        print(f"✅ Loaded existing model from {MODEL_SAVE_PATH}")
    else:
        print("ℹ️  No existing model found — initialized new network.")

    return model, optimizer, memory, device

In [19]:
tickers = pd.read_html(SP500_TICKERS)[0]['Symbol'].tolist()
dataset = StockDataset(tickers, num_segments=10)  # Load full dataset, split into segments


1 Failed download:
['BRK.B']: YFTzMissingError('possibly delisted; no timezone found')

1 Failed download:
['BF.B']: YFPricesMissingError('possibly delisted; no price data found  (1d 1926-04-18 -> 2025-03-24)')


In [None]:
class StockLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, num_layers=3, output_dim=3):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        _, (h, _) = self.lstm(x)
        return self.fc(h[-1])


# Train the RL agent using StockDataset
def train_dqn_on_sp500(detaset, episodes=10, batch_size=64, gamma=0.95, lr=0.001, save_interval=9):
    
    model, optimizer, memory, devaice = init_or_load(input_dim=7, output_dim=3, lr=lr)

    # Load existing model if available
    if os.path.exists(MODEL_SAVE_PATH):
        model.load_state_dict(torch.load(MODEL_SAVE_PATH))
        print("Loaded existing model.")
    
    while True:  # Keep training as long as there are segments
        
        stock_data, ticker = dataset.fetch_next_stock()
        if stock_data is None:
            print("All data segments processed, restarting training loop...")
            break  # Exit training loop if all data is processed
        
        env = TradingEnv(stock_data)
        wins = 0
        
        for episode in range(episodes):
            state = env.reset()
            done = False
            total_reward = 0
            PROFIT = 0
            tax_credit = 0
            
            
            while not done:
                if random.random() < 0.1:
                    action = env.action_space.sample()
                else:
                    with torch.no_grad():
                        q_values = model(torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0))
                        action = torch.argmax(q_values).item()
                
                next_state, reward, done, _ = env.step(action)
                memory.append((state, action, reward, next_state, done))
                
                if len(memory) > batch_size:
                    batch = random.sample(memory, batch_size)
                    states, actions, rewards, next_states, dones = zip(*batch)
                    
                    states = torch.tensor(np.array(states), dtype=torch.float32, device=device)
                    actions = torch.tensor(actions, dtype=torch.int64, device=device)
                    rewards = torch.tensor(rewards, dtype=torch.float32, device=device)
                    next_states = torch.tensor(np.array(next_states), dtype=torch.float32, device=device)
                    dones = torch.tensor(dones, dtype=torch.float32, device=device)
                    
                    q_values = model(states).gather(1, actions.unsqueeze(1)).squeeze()
                    next_q_values = model(next_states).max(1)[0].detach()
                    expected_q_values = rewards + gamma * next_q_values * (1 - dones)
                    
                    loss = nn.MSELoss()(q_values, expected_q_values)
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                
                state = next_state
                total_reward += reward
            
            # Assume tax_credit is a running float (starts at 0)
            # PROFIT is your cumulative net profit

            if total_reward > 0:
                # Compute gross tax liability
                tax_liability = 0.25 * total_reward
                wins += 1

                # Apply credit
                tax_due = max(tax_liability - tax_credit, 0)

                # Update remaining credit (if credit > liability)
                tax_credit = max(tax_credit - tax_liability, 0)
            else:
                # Losses generate new credits (write‑offs)
                tax_credit += abs(total_reward) * 0.25
                tax_due = 0

            # Subtract tax from profit
            net_profit = total_reward - tax_due
            PROFIT += net_profit
            win_rate = wins / (episode + 1)  
            print(f"Ticker: {ticker} | Episode {episode+1}/{episodes} | Reward: {reward:.2f} |total reward: {total_reward:.2f} | wins: {win_rate}" )
            
            if episode % save_interval == 0:
                torch.save(model.state_dict(), MODEL_SAVE_PATH)
                print("Model progress saved.")
        
        # Evaluate model after training and plot results
        
        total_reward, actions = evaluate_model(stock_data, model)
        plot_evaluation_results(stock_data, actions)
    
    print("Training complete.")
    torch.save(model.state_dict(), MODEL_SAVE_PATH)
    print("Final model saved.")


In [21]:

# Run training
train_dqn_on_sp500(dataset, episodes=10)


ℹ️  No existing model found — initialized new network.


RuntimeError: input.size(-1) must be equal to input_size. Expected 5, got 7