# Without Softmax #

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random

# Constants
EPSILON = 0.1
EPSILON_MIN = 0.01
EPSILON_DECAY = 0.8
GAMMA = 0.99
WINDOW_SIZE = 3
BATCH_SIZE = 32
MEMORY_SIZE = 10000
ACTION_SPACE = np.array([-1.0, 0.0, 1.0])  # Sell, Hold, Buy

class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)
    
    def push(self, state, action, reward, next_state):
        self.buffer.append((state, action, reward, next_state))
    
    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)
    
    def __len__(self):
        return len(self.buffer)

class LSTMTrader(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMTrader, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=2, dropout=0.2)
        self.attention = nn.MultiheadAttention(hidden_size, num_heads=4)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        
        # Apply attention mechanism
        attn_output, _ = self.attention(lstm_out, lstm_out, lstm_out)
        
        # Get the final output
        final_output = attn_output[:, -1, :]
        
        # Pass through fully connected layers
        x = self.relu(self.fc1(final_output))
        return self.fc2(x)

def add_technical_indicators(df):
    # Calculate moving averages
    df['SMA_5'] = df['Close'].rolling(window=5).mean()
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    
    # Calculate RSI
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    
    # Calculate MACD
    exp1 = df['Close'].ewm(span=12, adjust=False).mean()
    exp2 = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = exp1 - exp2
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()
    
    # Calculate Bollinger Bands
    df['BB_middle'] = df['Close'].rolling(window=20).mean()
    df['BB_upper'] = df['BB_middle'] + 2 * df['Close'].rolling(window=20).std()
    df['BB_lower'] = df['BB_middle'] - 2 * df['Close'].rolling(window=20).std()
    
    # Add volume indicators
    df['Volume_SMA'] = df['Volume'].rolling(window=5).mean()
    
    return df

def load_data(ticker, start_date, end_date):
    df = yf.download(ticker, start=start_date, end=end_date)
    df = add_technical_indicators(df)
    df['Returns'] = df['Close'].pct_change()
    df = df.dropna()
    return df

def prepare_state(df, current_idx, window_size):
    """Prepare the state with technical indicators"""
    if current_idx < window_size:
        return None
    
    state = []
    for i in range(current_idx - window_size, current_idx):
        features = [
            df['Close'].iloc[i],
            df['SMA_5'].iloc[i],
            df['SMA_20'].iloc[i],
            df['RSI'].iloc[i],
            df['MACD'].iloc[i],
            df['Signal_Line'].iloc[i],
            df['BB_upper'].iloc[i],
            df['BB_lower'].iloc[i],
            df['Volume_SMA'].iloc[i],
            df['Returns'].iloc[i]
        ]
        state.append(features)
    return np.array(state)

def train_model(model, ticker, data, replay_buffer, optimizer, criterion):
    epsilon = EPSILON
    
    for episode in range(100):  # Number of episodes
        total_reward = 0
        state = prepare_state(data, WINDOW_SIZE, WINDOW_SIZE)
        
        for t in range(WINDOW_SIZE, len(data) - 1):
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            
            # Epsilon-greedy action selection
            if random.random() < epsilon:
                action_idx = random.randrange(len(ACTION_SPACE))
            else:
                with torch.no_grad():
                    q_values = model(state_tensor)
                    action_idx = q_values.max(1)[1].item()
            
            action = ACTION_SPACE[action_idx]
            
            # Get next state and reward
            next_state = prepare_state(data, t + 1, WINDOW_SIZE)
            reward = data['Returns'].iloc[t + 1] * action  # Reward based on return and action
            
            # Store transition in replay buffer
            replay_buffer.push(state, action_idx, reward, next_state)
            
            # Train on random batch from replay buffer
            if len(replay_buffer) > BATCH_SIZE:
                batch = replay_buffer.sample(BATCH_SIZE)
                state_batch = torch.FloatTensor([s[0] for s in batch])
                action_batch = torch.LongTensor([s[1] for s in batch])
                reward_batch = torch.FloatTensor([s[2] for s in batch])
                next_state_batch = torch.FloatTensor([s[3] for s in batch])
                
                # Compute Q values
                current_q_values = model(state_batch).gather(1, action_batch.unsqueeze(1))
                next_q_values = model(next_state_batch).max(1)[0].detach()
                target_q_values = reward_batch + GAMMA * next_q_values
                
                # Compute loss and update model
                loss = criterion(current_q_values.squeeze(), target_q_values)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            
            state = next_state
            total_reward += reward
            
        # Decay epsilon
        epsilon = max(EPSILON_MIN, epsilon * EPSILON_DECAY)
        
        print(f"Episode {episode + 1}, Total Reward: {total_reward:.2f}, Epsilon: {epsilon:.2f}")
    
    return model

class EnsembleTrader:
    def __init__(self, models, data):
        self.models = models
        self.data = data
        
    def get_ensemble_action(self, state, ticker):
        """Get action using ensemble of models"""
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        q_values = self.models[ticker](state_tensor)
        return ACTION_SPACE[q_values.max(1)[1].item()]
    
    def calculate_portfolio_value(self, cash, shares, current_prices):
        """Calculate total portfolio value"""
        value = cash
        for ticker in shares:
            value += shares[ticker] * current_prices[ticker]
        return value
    
    def simulate_trading(self, initial_cash=10000, commission=0.001):
        cash = initial_cash
        shares = {ticker: 0 for ticker in self.models.keys()}
        portfolio_values = []
        trades = []
        
        for t in range(WINDOW_SIZE, len(self.data)):
            # Get the current prices for each ticker, only if there are enough rows
            current_prices = {}
            for ticker in self.models.keys():
                ticker_data = self.data[self.data['Ticker'] == ticker]
                if t < len(ticker_data):  # Ensure t is within bounds
                    current_prices[ticker] = ticker_data['Close'].iloc[t]
                else:
                    current_prices[ticker] = None  # If out of bounds, set to None

            # Skip if any ticker’s data is unavailable at index t
            if any(price is None for price in current_prices.values()):
                continue

            current_prices = {ticker: self.data[self.data['Ticker'] == ticker]['Close'].iloc[t] 
                            for ticker in self.models.keys()}
            
            for ticker in self.models.keys():
                ticker_data = self.data[self.data['Ticker'] == ticker]
                state = prepare_state(ticker_data, t, WINDOW_SIZE)
                
                if state is not None:
                    action = self.get_ensemble_action(state, ticker)
                    
                    # Calculate maximum shares that can be bought
                    max_shares = int(cash / (current_prices[ticker] * (1 + commission)))
                    
                    if action > 0 and max_shares > 0:  # Buy
                        shares_to_buy = max_shares
                        cost = shares_to_buy * current_prices[ticker] * (1 + commission)
                        if cost <= cash:
                            cash -= cost
                            shares[ticker] += shares_to_buy
                            trades.append((t, ticker, 'BUY', shares_to_buy, current_prices[ticker]))
                    
                    elif action < 0 and shares[ticker] > 0:  # Sell
                        shares_to_sell = shares[ticker]
                        revenue = shares_to_sell * current_prices[ticker] * (1 - commission)
                        cash += revenue
                        shares[ticker] = 0
                        trades.append((t, ticker, 'SELL', shares_to_sell, current_prices[ticker]))
            
            # Record portfolio value
            portfolio_value = self.calculate_portfolio_value(cash, shares, current_prices)
            portfolio_values.append((self.data.index[t], portfolio_value))
        
        return trades, portfolio_values, cash, shares

def calculate_metrics(trades, portfolio_values, initial_cash):
    """Calculate trading metrics"""
    if not portfolio_values:
        return {}
    
    final_value = portfolio_values[-1][1]
    returns = [(v2[1] - v1[1]) / v1[1] for v1, v2 in zip(portfolio_values[:-1], portfolio_values[1:])]
    
    metrics = {
        'Final Balance': final_value,
        'Total Return': ((final_value - initial_cash) / initial_cash) * 100,
        'Win Rate': sum(1 for r in returns if r > 0) / len(returns) if returns else 0,
        'Volatility': np.std(returns) if returns else 0,
        'Sharpe Ratio': (np.mean(returns) / np.std(returns)) if returns else 0,
    }
    
    return metrics

if __name__ == "__main__":
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)
    random.seed(42)
    
    # Initialize parameters
    start_date = '2022-01-01'
    end_date = '2023-01-01'
    tickers = ['AAPL', 'IBM']
    initial_cash = 10000
    
    # Load and prepare training data
    combined_data = pd.concat([load_data(ticker, start_date, end_date).assign(Ticker=ticker) 
                             for ticker in tickers])
    
    # Initialize models and training components
    models = {}
    for ticker in tickers:
        model = LSTMTrader(input_size=10, hidden_size=64, output_size=len(ACTION_SPACE))
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.MSELoss()
        replay_buffer = ReplayBuffer(MEMORY_SIZE)
        
        # Train model
        print(f"\nTraining model for {ticker}...")
        ticker_data = combined_data[combined_data['Ticker'] == ticker].copy()
        models[ticker] = train_model(model, ticker, ticker_data, replay_buffer, optimizer, criterion)

        # Save model, optimizer, and additional parameters after training
        save_path = f"{ticker}_EnsembleModel.pth"
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'criterion': criterion,
            'replay_buffer': replay_buffer,
            'epsilon': EPSILON
        }, save_path)
        print(f"Model, optimizer, and parameters for {ticker} saved to {save_path}")
        
    # Set testing period
    test_start_date = '2023-01-01'
    test_end_date = '2024-01-01'
    
    # Load and prepare testing data
    combined_test_data = pd.concat([load_data(ticker, test_start_date, test_end_date).assign(Ticker=ticker) 
                                    for ticker in tickers])
    
    # Initialize ensemble trader and run testing simulation
    print("\nRunning trading simulation on test period...")
    ensemble_trader = EnsembleTrader(models, combined_test_data)
    trades, portfolio_values, final_cash, final_shares = ensemble_trader.simulate_trading(initial_cash)
    
    # Calculate and display evaluation metrics for test period
    metrics = calculate_metrics(trades, portfolio_values, initial_cash)
    
    print("\nTesting Results (Evaluation Metrics):")
    print("=" * 50)
    print(f"Final Balance: ${metrics['Final Balance']:.2f}")
    print(f"Total Returns: {metrics['Total Return']:.2f}%")
    print(f"Win Rate: {metrics['Win Rate']:.2f}")
    print(f"Volatility: {metrics['Volatility']:.4f}")
    print(f"Sharpe Ratio: {metrics['Sharpe Ratio']:.4f}")
    
    print("\nFinal Positions:")
    for ticker, shares in final_shares.items():
        print(f"{ticker}: {shares} shares")
    print(f"Cash: ${final_cash:.2f}")
    
    print("\nTrading History (first 10 trades in test period):")
    for t in trades[:10]:
        print(f"Day {t[0]}: {t[1]} - {t[2]} {t[3]} shares at ${t[4]:.2f}")


[*********************100%***********************]  1 of 1 completed


[*********************100%***********************]  1 of 1 completed



Training model for AAPL...
Episode 1, Total Reward: 0.16, Epsilon: 0.08
Episode 2, Total Reward: -0.18, Epsilon: 0.06
Episode 3, Total Reward: -0.03, Epsilon: 0.05
Episode 4, Total Reward: 0.20, Epsilon: 0.04
Episode 5, Total Reward: -0.39, Epsilon: 0.03
Episode 6, Total Reward: -0.01, Epsilon: 0.03
Episode 7, Total Reward: 0.23, Epsilon: 0.02
Episode 8, Total Reward: -0.25, Epsilon: 0.02
Episode 9, Total Reward: -0.10, Epsilon: 0.01
Episode 10, Total Reward: 0.19, Epsilon: 0.01
Episode 11, Total Reward: 0.52, Epsilon: 0.01
Episode 12, Total Reward: 0.13, Epsilon: 0.01
Episode 13, Total Reward: -0.06, Epsilon: 0.01
Episode 14, Total Reward: 0.13, Epsilon: 0.01
Episode 15, Total Reward: -0.13, Epsilon: 0.01
Episode 16, Total Reward: -0.03, Epsilon: 0.01
Episode 17, Total Reward: 0.27, Epsilon: 0.01
Episode 18, Total Reward: -0.04, Epsilon: 0.01
Episode 19, Total Reward: 0.58, Epsilon: 0.01
Episode 20, Total Reward: 0.01, Epsilon: 0.01
Episode 21, Total Reward: 0.03, Epsilon: 0.01
Episo

[*********************100%***********************]  1 of 1 completed

Episode 100, Total Reward: 0.09, Epsilon: 0.01
Model, optimizer, and parameters for IBM saved to IBM_EnsembleModel.pth



[*********************100%***********************]  1 of 1 completed



Running trading simulation on test period...

Testing Results (Evaluation Metrics):
Final Balance: $11582.65
Total Returns: 15.83%
Win Rate: 0.43
Volatility: 0.0089
Sharpe Ratio: 1.2291

Final Positions:
AAPL: 0 shares
IBM: 70 shares
Cash: $134.15

Trading History (first 10 trades in test period):
Day 3: IBM - BUY 72 shares at $136.94
Day 16: IBM - SELL 72 shares at $130.79
Day 21: IBM - BUY 73 shares at $128.93
Day 22: IBM - SELL 73 shares at $129.64
Day 23: IBM - BUY 73 shares at $130.19
Day 25: IBM - SELL 73 shares at $128.05
Day 27: IBM - BUY 74 shares at $125.45
Day 38: IBM - SELL 74 shares at $129.31
Day 41: IBM - BUY 74 shares at $129.22
Day 60: IBM - SELL 74 shares at $126.97


In [None]:
metrics = calculate_metrics(trades, portfolio_values, initial_cash)

print("\nTrading Results:")
print("=" * 50)
print(f"Final Balance: ${metrics['Final Balance']:.2f}")
print(f"Total Returns: {metrics['Total Return']:.2f}%")
print(f"Win Rate: {metrics['Win Rate']:.2f}")
print(f"Volatility: {metrics['Volatility']:.4f}")
print(f"Sharpe Ratio: {metrics['Sharpe Ratio']:.4f}")


Trading Results:
Final Balance: $11582.65
Total Returns: 15.83%
Win Rate: 0.43
Volatility: 0.0089
Sharpe Ratio: 0.0774


In [None]:
import torch

# Load model function
def load_model(ticker, input_size, hidden_size, output_size, path):
    model = LSTMTrader(input_size=input_size, hidden_size=hidden_size, output_size=output_size)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    checkpoint = torch.load(path)
    
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    replay_buffer = checkpoint['replay_buffer']
    epsilon = checkpoint['epsilon']
    
    return model, optimizer, replay_buffer, epsilon

# Initialize parameters
input_size = 10 
hidden_size = 64
output_size = len(ACTION_SPACE)
tickers = ['AAPL', 'IBM']
models = {}

# Load saved models for each ticker
for ticker in tickers:
    path = f"{ticker}_model.pth"
    model, optimizer, replay_buffer, epsilon = load_model(ticker, input_size, hidden_size, output_size, path)
    models[ticker] = model  # Add loaded model to the models dictionary

# Run the trading simulation
# Ensure `combined_data` has been loaded and processed as before
ensemble_trader = EnsembleTrader(models, combined_data)
initial_cash = 10000

print("\nRunning trading simulation with loaded models...")
trades, portfolio_values, final_cash, final_shares = ensemble_trader.simulate_trading(initial_cash)

# Calculate and display metrics
metrics = calculate_metrics(trades, portfolio_values, initial_cash)

print("\nTrading Results:")
print("=" * 50)
print(f"Final Balance: ${metrics['Final Balance']:.2f}")
print(f"Total Returns: {metrics['Total Return']:.2f}%")
print(f"Win Rate: {metrics['Win Rate']:.2f}")
print(f"Volatility: {metrics['Volatility']:.4f}")
print(f"Sharpe Ratio: {metrics['Sharpe Ratio']:.4f}")

print("\nFinal Positions:")
for ticker, shares in final_shares.items():
    print(f"{ticker}: {shares} shares")
print(f"Cash: ${final_cash:.2f}")

print("\nTrading History (first 10 trades):")
for t in trades[:]:
    print(f"Day {t[0]}: {t[1]} - {t[2]} {t[3]} shares at ${t[4]:.2f}")


  checkpoint = torch.load(path)



Running trading simulation with loaded models...

Trading Results:
Final Balance: $7538.77
Total Returns: -24.61%
Win Rate: 0.48
Volatility: 0.0225
Sharpe Ratio: -0.6924

Final Positions:
AAPL: 58 shares
IBM: 0 shares
Cash: $2.83

Trading History (first 10 trades):
Day 3: AAPL - BUY 57 shares at $172.90
Day 92: AAPL - BUY 1 shares at $131.88


In [None]:
if __name__ == "__main__":
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)
    random.seed(42)
    
    # Initialize parameters
    start_date = '2022-01-01'
    end_date = '2023-01-01'
    tickers = ['AAPL', 'IBM']
    initial_cash = 10000
    
    # Load and prepare training data
    combined_data = pd.concat([load_data(ticker, start_date, end_date).assign(Ticker=ticker) 
                             for ticker in tickers])
    
    # Initialize models and training components
    models = {}
    for ticker in tickers:
        model = LSTMTrader(input_size=10, hidden_size=64, output_size=len(ACTION_SPACE))
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.MSELoss()
        replay_buffer = ReplayBuffer(MEMORY_SIZE)
        
        # Train model
        print(f"\nTraining model for {ticker}...")
        ticker_data = combined_data[combined_data['Ticker'] == ticker].copy()
        models[ticker] = train_model(model, ticker, ticker_data, replay_buffer, optimizer, criterion)

        # Save model, optimizer, and additional parameters after training
        save_path = f"{ticker}_EnsembleModel.pth"
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'criterion': criterion,
            'replay_buffer': replay_buffer,
            'epsilon': EPSILON
        }, save_path)
        print(f"Model, optimizer, and parameters for {ticker} saved to {save_path}")
        
    # Set testing period
    test_start_date = '2023-01-01'
    test_end_date = '2024-01-01'
    
    # Load and prepare testing data
    combined_test_data = pd.concat([load_data(ticker, test_start_date, test_end_date).assign(Ticker=ticker) 
                                    for ticker in tickers])
    
    # Initialize ensemble trader and run testing simulation
    print("\nRunning trading simulation on test period...")
    ensemble_trader = EnsembleTrader(models, combined_test_data)
    trades, portfolio_values, final_cash, final_shares = ensemble_trader.simulate_trading(initial_cash)
    
    # Calculate and display evaluation metrics for test period
    metrics = calculate_metrics(trades, portfolio_values, initial_cash)
    
    print("\nTesting Results (Evaluation Metrics):")
    print("=" * 50)
    print(f"Final Balance: ${metrics['Final Balance']:.2f}")
    print(f"Total Returns: {metrics['Total Return']:.2f}%")
    print(f"Win Rate: {metrics['Win Rate']:.2f}")
    print(f"Volatility: {metrics['Volatility']:.4f}")
    print(f"Sharpe Ratio: {metrics['Sharpe Ratio']:.4f}")
    
    print("\nFinal Positions:")
    for ticker, shares in final_shares.items():
        print(f"{ticker}: {shares} shares")
    print(f"Cash: ${final_cash:.2f}")
    
    print("\nTrading History (first 10 trades in test period):")
    for t in trades[:10]:
        print(f"Day {t[0]}: {t[1]} - {t[2]} {t[3]} shares at ${t[4]:.2f}")


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed



Running trading simulation on test period...

Testing Results (Evaluation Metrics):
Final Balance: $12424.03
Total Returns: 24.24%
Win Rate: 0.54
Volatility: 0.0120
Sharpe Ratio: 1.3630

Final Positions:
AAPL: 64 shares
IBM: 0 shares
Cash: $102.11

Trading History (first 10 trades in test period):
Day 3: AAPL - BUY 64 shares at $154.50


# Softmax #

In [16]:
import numpy as np
import pandas as pd
import yfinance as yf
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random
import torch.nn.functional as F
# Constants
EPSILON = 0.1
EPSILON_MIN = 0.01
EPSILON_DECAY = 0.8
GAMMA = 0.99
WINDOW_SIZE = 3
BATCH_SIZE = 32
MEMORY_SIZE = 10000
ACTION_SPACE = np.array([-1.0, 0.0, 1.0])  # Sell, Hold, Buy

class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)
    
    def push(self, state, action, reward, next_state):
        self.buffer.append((state, action, reward, next_state))
    
    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)
    
    def __len__(self):
        return len(self.buffer)

class LSTMTrader(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMTrader, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=2, dropout=0.2)
        self.attention = nn.MultiheadAttention(hidden_size, num_heads=4)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        
        # Apply attention mechanism
        attn_output, _ = self.attention(lstm_out, lstm_out, lstm_out)
        
        # Get the final output
        final_output = attn_output[:, -1, :]
        
        # Pass through fully connected layers
        x = self.relu(self.fc1(final_output))
        return self.fc2(x)

def add_technical_indicators(df):
    # Calculate moving averages
    df['SMA_5'] = df['Close'].rolling(window=5).mean()
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    
    # Calculate RSI
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    
    # Calculate MACD
    exp1 = df['Close'].ewm(span=12, adjust=False).mean()
    exp2 = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = exp1 - exp2
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()
    
    # Calculate Bollinger Bands
    df['BB_middle'] = df['Close'].rolling(window=20).mean()
    df['BB_upper'] = df['BB_middle'] + 2 * df['Close'].rolling(window=20).std()
    df['BB_lower'] = df['BB_middle'] - 2 * df['Close'].rolling(window=20).std()
    
    # Add volume indicators
    df['Volume_SMA'] = df['Volume'].rolling(window=5).mean()
    
    return df

def load_data(ticker, start_date, end_date):
    df = yf.download(ticker, start=start_date, end=end_date)
    df = add_technical_indicators(df)
    df['Returns'] = df['Close'].pct_change()
    df = df.dropna()
    return df

def prepare_state(df, current_idx, window_size):
    """Prepare the state with technical indicators"""
    if current_idx < window_size:
        return None
    
    state = []
    for i in range(current_idx - window_size, current_idx):
        features = [
            df['Close'].iloc[i],
            df['SMA_5'].iloc[i],
            df['SMA_20'].iloc[i],
            df['RSI'].iloc[i],
            df['MACD'].iloc[i],
            df['Signal_Line'].iloc[i],
            df['BB_upper'].iloc[i],
            df['BB_lower'].iloc[i],
            df['Volume_SMA'].iloc[i],
            df['Returns'].iloc[i]
        ]
        state.append(features)
    return np.array(state)

def train_model(model, ticker, data, replay_buffer, optimizer, criterion):
    epsilon = EPSILON
    
    for episode in range(100):  # Number of episodes
        total_reward = 0
        state = prepare_state(data, WINDOW_SIZE, WINDOW_SIZE)
        
        for t in range(WINDOW_SIZE, len(data) - 1):
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            
            # Epsilon-greedy action selection
            if random.random() < epsilon:
                action_idx = random.randrange(len(ACTION_SPACE))
            else:
                with torch.no_grad():
                    q_values = model(state_tensor)
                    action_idx = q_values.max(1)[1].item()
            
            action = ACTION_SPACE[action_idx]
            
            # Get next state and reward
            next_state = prepare_state(data, t + 1, WINDOW_SIZE)
            reward = data['Returns'].iloc[t + 1] * action  # Reward based on return and action
            
            # Store transition in replay buffer
            replay_buffer.push(state, action_idx, reward, next_state)
            
            # Train on random batch from replay buffer
            if len(replay_buffer) > BATCH_SIZE:
                batch = replay_buffer.sample(BATCH_SIZE)
                state_batch = torch.FloatTensor([s[0] for s in batch])
                action_batch = torch.LongTensor([s[1] for s in batch])
                reward_batch = torch.FloatTensor([s[2] for s in batch])
                next_state_batch = torch.FloatTensor([s[3] for s in batch])
                
                # Compute Q values
                current_q_values = model(state_batch).gather(1, action_batch.unsqueeze(1))
                next_q_values = model(next_state_batch).max(1)[0].detach()
                target_q_values = reward_batch + GAMMA * next_q_values
                
                # Compute loss and update model
                loss = criterion(current_q_values.squeeze(), target_q_values)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            
            state = next_state
            total_reward += reward
            
        # Decay epsilon
        epsilon = max(EPSILON_MIN, epsilon * EPSILON_DECAY)
        
        print(f"Episode {episode + 1}, Total Reward: {total_reward:.2f}, Epsilon: {epsilon:.2f}")
    
    return model

class EnsembleTrader:
    def __init__(self, models, data):
        self.models = models
        self.data = data

    def get_ensemble_action(self, state, ticker):
        """Get action using ensemble of models with softmax."""
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        q_values = self.models[ticker](state_tensor)
        
        # Apply softmax to Q-values to get probabilities
        probabilities = F.softmax(q_values, dim=1).detach().numpy()
        
        # Sample an action based on the probabilities
        action_index = np.random.choice(len(probabilities[0]), p=probabilities[0])
        return ACTION_SPACE[action_index]

    def calculate_portfolio_value(self, cash, shares, current_prices):
        """Calculate total portfolio value."""
        value = cash
        for ticker in shares:
            value += shares[ticker] * current_prices[ticker]
        return value

    def simulate_trading(self, initial_cash=10000, commission=0.001):
        cash = initial_cash
        shares = {ticker: 0 for ticker in self.models.keys()}
        portfolio_values = []
        trades = []
        
        for t in range(WINDOW_SIZE, len(self.data)):
            # Get the current prices for each ticker
            current_prices = {}
            for ticker in self.models.keys():
                ticker_data = self.data[self.data['Ticker'] == ticker]
                if t < len(ticker_data):  # Ensure t is within bounds
                    current_prices[ticker] = ticker_data['Close'].iloc[t]
                else:
                    current_prices[ticker] = None  # If out of bounds, set to None

            # Skip if any ticker’s data is unavailable at index t
            if any(price is None for price in current_prices.values()):
                continue

            for ticker in self.models.keys():
                ticker_data = self.data[self.data['Ticker'] == ticker]
                state = prepare_state(ticker_data, t, WINDOW_SIZE)
                
                if state is not None:
                    action = self.get_ensemble_action(state, ticker)
                    
                    # Calculate maximum shares that can be bought
                    max_shares = int(cash / (current_prices[ticker] * (1 + commission)))
                    
                    if action > 0 and max_shares > 0:  # Buy
                        shares_to_buy = max_shares
                        cost = shares_to_buy * current_prices[ticker] * (1 + commission)
                        if cost <= cash:
                            cash -= cost
                            shares[ticker] += shares_to_buy
                            trades.append((t, ticker, 'BUY', shares_to_buy, current_prices[ticker]))
                    
                    elif action < 0 and shares[ticker] > 0:  # Sell
                        shares_to_sell = shares[ticker]
                        revenue = shares_to_sell * current_prices[ticker] * (1 - commission)
                        cash += revenue
                        shares[ticker] = 0
                        trades.append((t, ticker, 'SELL', shares_to_sell, current_prices[ticker]))

            # Record portfolio value
            portfolio_value = self.calculate_portfolio_value(cash, shares, current_prices)
            portfolio_values.append((self.data.index[t], portfolio_value))
        
        return trades, portfolio_values, cash, shares

def calculate_metrics(trades, portfolio_values, initial_cash):
    """Calculate trading metrics"""
    if not portfolio_values:
        return {}
    
    final_value = portfolio_values[-1][1]
    returns = [(v2[1] - v1[1]) / v1[1] for v1, v2 in zip(portfolio_values[:-1], portfolio_values[1:])]
    
    metrics = {
        'Final Balance': final_value,
        'Total Return': ((final_value - initial_cash) / initial_cash) * 100,
        'Win Rate': sum(1 for r in returns if r > 0) / len(returns) if returns else 0,
        'Volatility': np.std(returns) if returns else 0,
        'Sharpe Ratio': (np.mean(returns) / np.std(returns)) if returns else 0,
    }
    
    return metrics

if __name__ == "__main__":
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)
    random.seed(42)
    
    # Initialize parameters
    start_date = '2020-01-01'
    end_date = '2023-01-01'
    tickers = ['AAPL', 'IBM']
    initial_cash = 10000
    
    # Load and prepare training data
    combined_data = pd.concat([load_data(ticker, start_date, end_date).assign(Ticker=ticker) 
                             for ticker in tickers])
    
    # Initialize models and training components
    models = {}
    for ticker in tickers:
        model = LSTMTrader(input_size=10, hidden_size=64, output_size=len(ACTION_SPACE))
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.MSELoss()
        replay_buffer = ReplayBuffer(MEMORY_SIZE)
        
        # Train model
        print(f"\nTraining model for {ticker}...")
        ticker_data = combined_data[combined_data['Ticker'] == ticker].copy()
        models[ticker] = train_model(model, ticker, ticker_data, replay_buffer, optimizer, criterion)

        # Save model, optimizer, and additional parameters after training
        save_path = f"{ticker}_EnsembleModelSoftmax.pth"
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'criterion': criterion,
            'replay_buffer': replay_buffer,
            'epsilon': EPSILON
        }, save_path)
        print(f"Model, optimizer, and parameters for {ticker} saved to {save_path}")
        
    # Set testing period
    test_start_date = '2023-01-01'
    test_end_date = '2024-01-01'
    
    # Load and prepare testing data
    combined_test_data = pd.concat([load_data(ticker, test_start_date, test_end_date).assign(Ticker=ticker) 
                                    for ticker in tickers])
    
    # Initialize ensemble trader and run testing simulation
    print("\nRunning trading simulation on test period...")
    ensemble_trader = EnsembleTrader(models, combined_test_data)
    trades, portfolio_values, final_cash, final_shares = ensemble_trader.simulate_trading(initial_cash)
    
    # Calculate and display evaluation metrics for test period
    metrics = calculate_metrics(trades, portfolio_values, initial_cash)
    
    print("\nTesting Results (Evaluation Metrics):")
    print("=" * 50)
    print(f"Final Balance: ${metrics['Final Balance']:.2f}")
    print(f"Total Returns: {metrics['Total Return']:.2f}%")
    print(f"Win Rate: {metrics['Win Rate']:.2f}")
    print(f"Volatility: {metrics['Volatility']:.4f}")
    print(f"Sharpe Ratio: {metrics['Sharpe Ratio']:.4f}")
    
    print("\nFinal Positions:")
    for ticker, shares in final_shares.items():
        print(f"{ticker}: {shares} shares")
    print(f"Cash: ${final_cash:.2f}")
    
    print("\nTrading History (first 10 trades in test period):")
    for t in trades[:10]:
        print(f"Day {t[0]}: {t[1]} - {t[2]} {t[3]} shares at ${t[4]:.2f}")


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed



Training model for AAPL...
Episode 1, Total Reward: -0.04, Epsilon: 0.08
Episode 2, Total Reward: 0.00, Epsilon: 0.06
Episode 3, Total Reward: 0.29, Epsilon: 0.05
Episode 4, Total Reward: -0.02, Epsilon: 0.04
Episode 5, Total Reward: 0.37, Epsilon: 0.03
Episode 6, Total Reward: -0.66, Epsilon: 0.03
Episode 7, Total Reward: -0.67, Epsilon: 0.02
Episode 8, Total Reward: -0.13, Epsilon: 0.02
Episode 9, Total Reward: 0.58, Epsilon: 0.01
Episode 10, Total Reward: 0.24, Epsilon: 0.01
Episode 11, Total Reward: 0.06, Epsilon: 0.01
Episode 12, Total Reward: 0.48, Epsilon: 0.01
Episode 13, Total Reward: -0.09, Epsilon: 0.01
Episode 14, Total Reward: -0.01, Epsilon: 0.01
Episode 15, Total Reward: -0.60, Epsilon: 0.01
Episode 16, Total Reward: -0.42, Epsilon: 0.01
Episode 17, Total Reward: -0.01, Epsilon: 0.01
Episode 18, Total Reward: 0.90, Epsilon: 0.01
Episode 19, Total Reward: 0.19, Epsilon: 0.01
Episode 20, Total Reward: 0.59, Epsilon: 0.01
Episode 21, Total Reward: -0.69, Epsilon: 0.01
Epis

[*********************100%***********************]  1 of 1 completed

Episode 100, Total Reward: -0.15, Epsilon: 0.01
Model, optimizer, and parameters for IBM saved to IBM_EnsembleModelSoftmax.pth



[*********************100%***********************]  1 of 1 completed



Running trading simulation on test period...

Testing Results (Evaluation Metrics):
Final Balance: $12802.79
Total Returns: 28.03%
Win Rate: 0.38
Volatility: 0.0087
Sharpe Ratio: 0.1302

Final Positions:
AAPL: 0 shares
IBM: 0 shares
Cash: $12802.79

Trading History (first 10 trades in test period):
Day 3: IBM - BUY 72 shares at $136.94
Day 5: IBM - SELL 72 shares at $135.84
Day 6: IBM - BUY 72 shares at $135.98
Day 9: IBM - SELL 72 shares at $137.35
Day 15: IBM - BUY 76 shares at $130.97
Day 17: IBM - SELL 76 shares at $130.57
Day 19: IBM - BUY 76 shares at $129.30
Day 21: IBM - SELL 76 shares at $128.93
Day 22: AAPL - BUY 65 shares at $151.03
Day 23: AAPL - SELL 65 shares at $153.83


# load the model #

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random  # For generating random actions
import numpy as np  # For handling arrays and numerical operations
import torch  # For working with PyTorch models
import torch.nn as nn  # For defining neural network layers
import torch.optim as optim  # For optimization
import matplotlib.pyplot as plt  # For plotting the loss
import pandas as pd  # For handling CSV file operations
import gym  # For the Gym environment

WINDOW_SIZE = 3
ACTION_SPACE = np.array([-1.0, 0.0, 1.0])  # Sell, Hold, Buy

def load_model(model_path):
    checkpoint = torch.load(model_path)
    model = LSTMTrader(input_size=10, hidden_size=64, output_size=len(ACTION_SPACE))
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epsilon = checkpoint['epsilon']
    replay_buffer = checkpoint['replay_buffer']
    
    return model, optimizer, epsilon, replay_buffer

class EnsembleTrader:
    def __init__(self, models, data):
        self.models = models
        self.data = data
        
    def get_ensemble_action(self, state, ticker):
        #Get action using ensemble of models
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        q_values = self.models[ticker](state_tensor)
        return ACTION_SPACE[q_values.max(1)[1].item()]
    
    def calculate_portfolio_value(self, cash, shares, current_prices):
        #Calculate total portfolio value
        value = cash
        for ticker in shares:
            value += shares[ticker] * current_prices[ticker]
        return value
    
    def simulate_trading(self, initial_cash=10000, commission=0.001):
        cash = initial_cash
        shares = {ticker: 0 for ticker in self.models.keys()}
        portfolio_values = []
        trades = []
        
        for t in range(WINDOW_SIZE, len(self.data)):
            # Get the current prices for each ticker, only if there are enough rows
            current_prices = {}
            for ticker in self.models.keys():
                ticker_data = self.data[self.data['Ticker'] == ticker]
                if t < len(ticker_data):  # Ensure t is within bounds
                    current_prices[ticker] = ticker_data['Close'].iloc[t]
                else:
                    current_prices[ticker] = None  # If out of bounds, set to None

            # Skip if any ticker’s data is unavailable at index t
            if any(price is None for price in current_prices.values()):
                continue

            current_prices = {ticker: self.data[self.data['Ticker'] == ticker]['Close'].iloc[t] 
                            for ticker in self.models.keys()}
            
            for ticker in self.models.keys():
                ticker_data = self.data[self.data['Ticker'] == ticker]
                state = prepare_state(ticker_data, t, WINDOW_SIZE)
                
                if state is not None:
                    action = self.get_ensemble_action(state, ticker)
                    
                    # Calculate maximum shares that can be bought
                    max_shares = int(cash / (current_prices[ticker] * (1 + commission)))
                    
                    if action > 0 and max_shares > 0:  # Buy
                        shares_to_buy = max_shares
                        cost = shares_to_buy * current_prices[ticker] * (1 + commission)
                        if cost <= cash:
                            cash -= cost
                            shares[ticker] += shares_to_buy
                            trades.append((t, ticker, 'BUY', shares_to_buy, current_prices[ticker]))
                    
                    elif action < 0 and shares[ticker] > 0:  # Sell
                        shares_to_sell = shares[ticker]
                        revenue = shares_to_sell * current_prices[ticker] * (1 - commission)
                        cash += revenue
                        shares[ticker] = 0
                        trades.append((t, ticker, 'SELL', shares_to_sell, current_prices[ticker]))
            
            # Record portfolio value
            portfolio_value = self.calculate_portfolio_value(cash, shares, current_prices)
            portfolio_values.append((self.data.index[t], portfolio_value))
        
        return trades, portfolio_values, cash, shares
    
def calculate_td_loss(model, state, next_state, reward, done):
    """
    Calculate the Temporal Difference (TD) loss for testing.
    """
    state_tensor = torch.FloatTensor(state).unsqueeze(0)
    next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0)
    
    # Get Q-values for current state
    q_values = model(state_tensor)
    # Get Q-values for next state
    next_q_values = model(next_state_tensor)
    
    # Take the max Q-value for the next state (the action value we're predicting for next state)
    max_next_q_value = next_q_values.max(1)[0].detach()
    
    # Compute the target Q-value for this state-action pair
    target_q_value = reward + GAMMA * max_next_q_value * (1 - done)
    
    # Calculate the predicted Q-value for the current state-action pair
    current_q_value = q_values.max(1)[0]  # Best action for the current state
    
    # Calculate Temporal Difference (TD) loss
    td_loss = (current_q_value - target_q_value).pow(2).mean()  # MSE loss
    
    return td_loss.item()

def test_model_and_save_loss(models, data, file_name="td_loss.txt"):
    td_losses = []  # List to store TD loss values
    
    for t in range(WINDOW_SIZE, len(data) - 1):
        state = prepare_state(data, t, WINDOW_SIZE)
        next_state = prepare_state(data, t + 1, WINDOW_SIZE)
        
        if state is None or next_state is None:
            continue
        
        reward = data['Returns'].iloc[t + 1]  # Reward based on return
        done = 0  # Usually done = 0 during testing unless you have an episode end condition
        
        # For simplicity, we'll calculate TD loss for the first model (for now)
        td_loss = calculate_td_loss(models['AAPL'], state, next_state, reward, done)
        td_losses.append(td_loss)  # Append the loss to the list
    
    # Save the TD losses to a file (e.g., txt or csv)
    np.savetxt(file_name, td_losses, delimiter=",")
    print(f"TD Loss saved to {file_name}")

if __name__ == "__main__":
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)
    random.seed(42)
    
    # Initialize parameters
    start_date = '2023-01-01'
    end_date = '2024-01-01'
    tickers = ['AAPL', 'IBM']
    initial_cash = 10000
    
    # Load and prepare data
    combined_data = pd.concat([load_data(ticker, start_date, end_date).assign(Ticker=ticker) 
                             for ticker in tickers])

    # Load trained models for each ticker
    models = {}
    for ticker in tickers:
        model_path = f"{ticker}_model.pth"
        model, optimizer, epsilon, replay_buffer = load_model(model_path)
        models[ticker] = model
    
    test_model_and_save_loss(models, combined_data)

    # # Initialize ensemble trader and run simulation
    # print("\nRunning trading simulation...")
    # ensemble_trader = EnsembleTrader(models, combined_data)
    # trades, portfolio_values, final_cash, final_shares = ensemble_trader.simulate_trading(initial_cash)
    
    # # Calculate and display metrics
    # metrics = calculate_metrics(trades, portfolio_values, initial_cash)
    
    # print("\nTrading Results:")
    # print("=" * 50)
    # print(f"Final Balance: ${metrics['Final Balance']:.2f}")
    # print(f"Total Returns: {metrics['Total Return']:.2f}%")
    # print(f"Win Rate: {metrics['Win Rate']:.2f}")
    # print(f"Volatility: {metrics['Volatility']:.4f}")
    # print(f"Sharpe Ratio: {metrics['Sharpe Ratio']:.4f}")
    
    # print("\nFinal Positions:")
    # for ticker, shares in final_shares.items():
    #     print(f"{ticker}: {shares} shares")
    # print(f"Cash: ${final_cash:.2f}")
    
    # print("\nTrading History (first 10 trades):")
    # for t in trades[:10]:
    #     print(f"Day {t[0]}: {t[1]} - {t[2]} {t[3]} shares at ${t[4]:.2f}")


NameError: name 'random' is not defined

### plotting temporal losses ###

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Load the TD loss values from the CSV file
td_losses = np.loadtxt('td_loss.txt', delimiter=",")

# Plot the TD losses
plt.figure(figsize=(10, 6))
plt.plot(td_losses, label="Temporal Difference Loss")
plt.xlabel('Test Steps')
plt.ylabel('TD Loss')
plt.title('Temporal Difference Loss over Test Steps')
plt.legend()
plt.grid(True)
plt.show()

In [4]:
import numpy as np
import pandas as pd
import yfinance as yf
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import random

# Constants
EPSILON = 0.1
EPSILON_MIN = 0.01
EPSILON_DECAY = 0.8
GAMMA = 0.99
WINDOW_SIZE = 3
BATCH_SIZE = 32
MEMORY_SIZE = 10000
ACTION_SPACE = np.array([-1.0, 0.0, 1.0])  # Sell, Hold, Buy

class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)
    
    def push(self, state, action, reward, next_state):
        self.buffer.append((state, action, reward, next_state))
    
    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)
    
    def __len__(self):
        return len(self.buffer)

class LSTMTrader(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMTrader, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=2, dropout=0.2)
        self.attention = nn.MultiheadAttention(hidden_size, num_heads=4)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        
        # Apply attention mechanism
        attn_output, _ = self.attention(lstm_out, lstm_out, lstm_out)
        
        # Get the final output
        final_output = attn_output[:, -1, :]
        
        # Pass through fully connected layers
        x = self.relu(self.fc1(final_output))
        return self.fc2(x)

def add_technical_indicators(df):
    # Calculate moving averages
    df['SMA_5'] = df['Close'].rolling(window=5).mean()
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    
    # Calculate RSI
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    
    # Calculate MACD
    exp1 = df['Close'].ewm(span=12, adjust=False).mean()
    exp2 = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = exp1 - exp2
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()
    
    # Calculate Bollinger Bands
    df['BB_middle'] = df['Close'].rolling(window=20).mean()
    df['BB_upper'] = df['BB_middle'] + 2 * df['Close'].rolling(window=20).std()
    df['BB_lower'] = df['BB_middle'] - 2 * df['Close'].rolling(window=20).std()
    
    # Add volume indicators
    df['Volume_SMA'] = df['Volume'].rolling(window=5).mean()
    
    return df

def load_data(ticker, start_date, end_date):
    df = yf.download(ticker, start=start_date, end=end_date)
    df = add_technical_indicators(df)
    df['Returns'] = df['Close'].pct_change()
    df = df.dropna()
    return df

def prepare_state(df, current_idx, window_size):
    """Prepare the state with technical indicators"""
    if current_idx < window_size:
        return None
    
    state = []
    for i in range(current_idx - window_size, current_idx):
        features = [
            df['Close'].iloc[i],
            df['SMA_5'].iloc[i],
            df['SMA_20'].iloc[i],
            df['RSI'].iloc[i],
            df['MACD'].iloc[i],
            df['Signal_Line'].iloc[i],
            df['BB_upper'].iloc[i],
            df['BB_lower'].iloc[i],
            df['Volume_SMA'].iloc[i],
            df['Returns'].iloc[i]
        ]
        state.append(features)
    return np.array(state)

# def train_model(model, ticker, data, replay_buffer, optimizer, criterion):
#     epsilon = EPSILON
    
#     for episode in range(100):  # Number of episodes
#         total_reward = 0
#         state = prepare_state(data, WINDOW_SIZE, WINDOW_SIZE)
        
#         for t in range(WINDOW_SIZE, len(data) - 1):
#             state_tensor = torch.FloatTensor(state).unsqueeze(0)
            
#             # Epsilon-greedy action selection
#             if random.random() < epsilon:
#                 action_idx = random.randrange(len(ACTION_SPACE))
#             else:
#                 with torch.no_grad():
#                     q_values = model(state_tensor)
#                     action_idx = q_values.max(1)[1].item()
            
#             action = ACTION_SPACE[action_idx]
            
#             # Get next state and reward
#             next_state = prepare_state(data, t + 1, WINDOW_SIZE)
#             reward = data['Returns'].iloc[t + 1] * action  # Reward based on return and action
            
#             # Store transition in replay buffer
#             replay_buffer.push(state, action_idx, reward, next_state)
            
#             # Train on random batch from replay buffer
#             if len(replay_buffer) > BATCH_SIZE:
#                 batch = replay_buffer.sample(BATCH_SIZE)
#                 state_batch = torch.FloatTensor([s[0] for s in batch])
#                 action_batch = torch.LongTensor([s[1] for s in batch])
#                 reward_batch = torch.FloatTensor([s[2] for s in batch])
#                 next_state_batch = torch.FloatTensor([s[3] for s in batch])
                
#                 # Compute Q values
#                 current_q_values = model(state_batch).gather(1, action_batch.unsqueeze(1))
#                 next_q_values = model(next_state_batch).max(1)[0].detach()
#                 target_q_values = reward_batch + GAMMA * next_q_values
                
#                 # Compute loss and update model
#                 loss = criterion(current_q_values.squeeze(), target_q_values)
#                 optimizer.zero_grad()
#                 loss.backward()
#                 optimizer.step()
            
#             state = next_state
#             total_reward += reward
            
#         # Decay epsilon
#         epsilon = max(EPSILON_MIN, epsilon * EPSILON_DECAY)
        
#         print(f"Episode {episode + 1}, Total Reward: {total_reward:.2f}, Epsilon: {epsilon:.2f}")
    
#     return model

class EnsembleTrader:
    def __init__(self, models, data):
        self.models = models
        self.data = data
        
    def get_ensemble_action(self, state, ticker):
        """Get action using ensemble of models"""
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        q_values = self.models[ticker](state_tensor)
        return ACTION_SPACE[q_values.max(1)[1].item()]
    
    def calculate_portfolio_value(self, cash, shares, current_prices):
        """Calculate total portfolio value"""
        value = cash
        for ticker in shares:
            value += shares[ticker] * current_prices[ticker]
        return value
    
    def simulate_trading(self, initial_cash=10000, commission=0.001):
        cash = initial_cash
        shares = {ticker: 0 for ticker in self.models.keys()}
        portfolio_values = []
        trades = []
        
        for t in range(WINDOW_SIZE, len(self.data)):
            # Get the current prices for each ticker, only if there are enough rows
            current_prices = {}
            for ticker in self.models.keys():
                ticker_data = self.data[self.data['Ticker'] == ticker]
                if t < len(ticker_data):  # Ensure t is within bounds
                    current_prices[ticker] = ticker_data['Close'].iloc[t]
                else:
                    current_prices[ticker] = None  # If out of bounds, set to None

            # Skip if any ticker’s data is unavailable at index t
            if any(price is None for price in current_prices.values()):
                continue

            current_prices = {ticker: self.data[self.data['Ticker'] == ticker]['Close'].iloc[t] 
                            for ticker in self.models.keys()}
            
            for ticker in self.models.keys():
                ticker_data = self.data[self.data['Ticker'] == ticker]
                state = prepare_state(ticker_data, t, WINDOW_SIZE)
                
                if state is not None:
                    action = self.get_ensemble_action(state, ticker)
                    
                    # Calculate maximum shares that can be bought
                    max_shares = int(cash / (current_prices[ticker] * (1 + commission)))
                    
                    if action > 0 and max_shares > 0:  # Buy
                        shares_to_buy = max_shares
                        cost = shares_to_buy * current_prices[ticker] * (1 + commission)
                        if cost <= cash:
                            cash -= cost
                            shares[ticker] += shares_to_buy
                            trades.append((t, ticker, 'BUY', shares_to_buy, current_prices[ticker]))
                    
                    elif action < 0 and shares[ticker] > 0:  # Sell
                        shares_to_sell = shares[ticker]
                        revenue = shares_to_sell * current_prices[ticker] * (1 - commission)
                        cash += revenue
                        shares[ticker] = 0
                        trades.append((t, ticker, 'SELL', shares_to_sell, current_prices[ticker]))
            
            # Record portfolio value
            portfolio_value = self.calculate_portfolio_value(cash, shares, current_prices)
            portfolio_values.append((self.data.index[t], portfolio_value))
        
        return trades, portfolio_values, cash, shares

def calculate_metrics(trades, portfolio_values, initial_cash):
    """Calculate trading metrics"""
    if not portfolio_values:
        return {}
    
    final_value = portfolio_values[-1][1]
    returns = [(v2[1] - v1[1]) / v1[1] for v1, v2 in zip(portfolio_values[:-1], portfolio_values[1:])]
    
    metrics = {
        'Final Balance': final_value,
        'Total Return': ((final_value - initial_cash) / initial_cash) * 100,
        'Win Rate': sum(1 for r in returns if r > 0) / len(returns) if returns else 0,
        'Volatility': np.std(returns) if returns else 0,
        'Sharpe Ratio': (np.mean(returns) / np.std(returns)) * np.sqrt(252) if returns else 0,
    }
    
    return metrics

if __name__ == "__main__":
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)
    random.seed(42)
    
    # Initialize parameters
    start_date = '2022-01-01'
    end_date = '2023-01-01'
    tickers = ['AAPL', 'IBM']
    initial_cash = 10000
    
    # Load and prepare data
    combined_data = pd.concat([load_data(ticker, start_date, end_date).assign(Ticker=ticker) 
                             for ticker in tickers])
    
    # # Initialize models and training components
    # models = {}
    # for ticker in tickers:
    #     model = LSTMTrader(input_size=10, hidden_size=64, output_size=len(ACTION_SPACE))
    #     optimizer = optim.Adam(model.parameters(), lr=0.001)
    #     criterion = nn.MSELoss()
    #     replay_buffer = ReplayBuffer(MEMORY_SIZE)
        
    #     # Train model
    #     print(f"\nTraining model for {ticker}...")
    #     ticker_data = combined_data[combined_data['Ticker'] == ticker].copy()
    #     models[ticker] = train_model(model, ticker, ticker_data, replay_buffer, optimizer, criterion)

    #     # Save model, optimizer, and additional parameters after training
    #     save_path = f"{ticker}_model.pth"
    #     torch.save({
    #         'model_state_dict': model.state_dict(),
    #         'optimizer_state_dict': optimizer.state_dict(),
    #         'criterion': criterion,
    #         'replay_buffer': replay_buffer,
    #         'epsilon': EPSILON
    #     }, save_path)
    #     print(f"Model, optimizer, and parameters for {ticker} saved to {save_path}")
        
    # # Initialize ensemble trader and run simulation
    # print("\nRunning trading simulation...")
    # ensemble_trader = EnsembleTrader(models, combined_data)
    # trades, portfolio_values, final_cash, final_shares = ensemble_trader.simulate_trading(initial_cash)
    
    # # Calculate and display metrics
    # metrics = calculate_metrics(trades, portfolio_values, initial_cash)
    
    # print("\nTrading Results:")
    # print("=" * 50)
    # print(f"Final Balance: ${metrics['Final Balance']:.2f}")
    # print(f"Total Returns: {metrics['Total Return']:.2f}%")
    # print(f"Win Rate: {metrics['Win Rate']:.2f}")
    # print(f"Volatility: {metrics['Volatility']:.4f}")
    # print(f"Sharpe Ratio: {metrics['Sharpe Ratio']:.4f}")
    
    # print("\nFinal Positions:")
    # for ticker, shares in final_shares.items():
    #     print(f"{ticker}: {shares} shares")
    # print(f"Cash: ${final_cash:.2f}")
    
    # print("\nTrading History (first 10 trades):")
    # for t in trades[:10]:
    #     print(f"Day {t[0]}: {t[1]} - {t[2]} {t[3]} shares at ${t[4]:.2f}")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import torch
import torch.nn as nn
from datetime import datetime, timedelta

# Constants
EPSILON = 0.1
EPSILON_MIN = 0.01
EPSILON_DECAY = 0.8
GAMMA = 0.99
BATCH_SIZE = 32
MEMORY_SIZE = 10000
WINDOW_SIZE = 3
ACTION_SPACE = np.array([-1.0, 0.0, 1.0])  # Sell, Hold, Buy

class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)
    
    def push(self, state, action, reward, next_state):
        self.buffer.append((state, action, reward, next_state))
    
    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)
    
    def __len__(self):
        return len(self.buffer)

class LSTMTrader(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMTrader, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=2, dropout=0.2)
        self.attention = nn.MultiheadAttention(hidden_size, num_heads=4)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        attn_output, _ = self.attention(lstm_out, lstm_out, lstm_out)
        final_output = attn_output[:, -1, :]
        x = self.relu(self.fc1(final_output))
        return self.fc2(x)

def add_technical_indicators(df):
    # Calculate moving averages
    df['SMA_5'] = df['Close'].rolling(window=5).mean()
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    
    # Calculate RSI
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    
    # Calculate MACD
    exp1 = df['Close'].ewm(span=12, adjust=False).mean()
    exp2 = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = exp1 - exp2
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()
    
    # Calculate Bollinger Bands
    df['BB_middle'] = df['Close'].rolling(window=20).mean()
    df['BB_upper'] = df['BB_middle'] + 2 * df['Close'].rolling(window=20).std()
    df['BB_lower'] = df['BB_middle'] - 2 * df['Close'].rolling(window=20).std()
    
    # Add volume indicators
    df['Volume_SMA'] = df['Volume'].rolling(window=5).mean()
    
    return df

def prepare_state(df, current_idx, window_size):
    if current_idx < window_size:
        return None
    
    state = []
    for i in range(current_idx - window_size, current_idx):
        features = [
            df['Close'].iloc[i],
            df['SMA_5'].iloc[i],
            df['SMA_20'].iloc[i],
            df['RSI'].iloc[i],
            df['MACD'].iloc[i],
            df['Signal_Line'].iloc[i],
            df['BB_upper'].iloc[i],
            df['BB_lower'].iloc[i],
            df['Volume_SMA'].iloc[i],
            df['Returns'].iloc[i]
        ]
        state.append(features)
    return np.array(state)

def load_model(ticker, model_path):
    """Load a saved model for a specific ticker"""
    model = LSTMTrader(input_size=10, hidden_size=64, output_size=len(ACTION_SPACE))
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()  # Set to evaluation mode
    return model

def load_data(ticker, start_date, end_date):
    """Load and prepare data for a specific ticker"""
    df = yf.download(ticker, start=start_date, end=end_date)
    df = add_technical_indicators(df)
    df['Returns'] = df['Close'].pct_change()
    return df.dropna()

class TradingSimulator:
    def __init__(self, models, commission=0.001):
        self.models = models
        self.commission = commission
    
    def get_action(self, state, ticker):
        """Get trading action from model"""
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            q_values = self.models[ticker](state_tensor)
            return ACTION_SPACE[q_values.max(1)[1].item()]
    
    def calculate_portfolio_value(self, cash, positions, current_prices):
        """Calculate total portfolio value"""
        value = cash
        for ticker, shares in positions.items():
            value += shares * current_prices[ticker]
        return value
    
    def simulate(self, data_dict, initial_cash=10000, simulation_days=None):
        """
        Simulate trading using loaded models
        
        Parameters:
        data_dict: Dictionary of DataFrames for each ticker
        initial_cash: Initial cash amount
        simulation_days: Number of days to simulate (None for entire period)
        """
        cash = initial_cash
        positions = {ticker: 0 for ticker in self.models.keys()}
        portfolio_values = []
        trades = []
        trade_history = []
        
        # Determine simulation period
        min_length = min(len(df) for df in data_dict.values())
        start_idx = WINDOW_SIZE
        end_idx = min_length if simulation_days is None else min(start_idx + simulation_days, min_length)
        
        # Get common date range
        dates = list(data_dict[list(data_dict.keys())[0]].index[start_idx:end_idx])
        
        for t, current_date in enumerate(dates, start=start_idx):
            current_prices = {
                ticker: data_dict[ticker]['Close'].loc[current_date]
                for ticker in self.models.keys()
            }
            
            # Store daily portfolio value
            portfolio_value = self.calculate_portfolio_value(cash, positions, current_prices)
            portfolio_values.append((current_date, portfolio_value))
            
            # Make trading decisions for each ticker
            for ticker in self.models.keys():
                ticker_data = data_dict[ticker]
                state = prepare_state(ticker_data, t, WINDOW_SIZE)
                
                if state is not None:
                    action = self.get_action(state, ticker)
                    current_price = current_prices[ticker]
                    
                    # Execute trades based on action
                    if action > 0:  # Buy
                        max_shares = int(cash / (current_price * (1 + self.commission)))
                        if max_shares > 0:
                            cost = max_shares * current_price * (1 + self.commission)
                            cash -= cost
                            positions[ticker] += max_shares
                            trades.append({
                                'date': current_date,
                                'ticker': ticker,
                                'action': 'BUY',
                                'shares': max_shares,
                                'price': current_price,
                                'cost': cost
                            })
                    
                    elif action < 0:  # Sell
                        if positions[ticker] > 0:
                            shares_to_sell = positions[ticker]
                            revenue = shares_to_sell * current_price * (1 - self.commission)
                            cash += revenue
                            positions[ticker] = 0
                            trades.append({
                                'date': current_date,
                                'ticker': ticker,
                                'action': 'SELL',
                                'shares': shares_to_sell,
                                'price': current_price,
                                'revenue': revenue
                            })
            
            # Record daily state
            trade_history.append({
                'date': current_date,
                'portfolio_value': portfolio_value,
                'cash': cash,
                'positions': positions.copy()
            })
        
        return {
            'trades': trades,
            'portfolio_values': portfolio_values,
            'final_cash': cash,
            'final_positions': positions,
            'trade_history': trade_history
        }

def calculate_metrics(simulation_result, initial_cash):
    """Calculate performance metrics from simulation results"""
    portfolio_values = [pv[1] for pv in simulation_result['portfolio_values']]
    returns = np.diff(portfolio_values) / portfolio_values[:-1]
    
    if len(portfolio_values) < 2:
        return {}
    
    total_return = (portfolio_values[-1] - initial_cash) / initial_cash
    daily_returns = pd.Series(returns)
    
    metrics = {
        'Final Portfolio Value': portfolio_values[-1],
        'Total Return (%)': total_return * 100,
        'Number of Trades': len(simulation_result['trades']),
        'Win Rate (%)': (daily_returns > 0).mean() * 100,
        'Sharpe Ratio': np.sqrt(252) * daily_returns.mean() / daily_returns.std() if len(daily_returns) > 0 else 0,
        'Max Drawdown (%)': (pd.Series(portfolio_values).diff() / pd.Series(portfolio_values).shift(1)).min() * 100,
        'Final Cash': simulation_result['final_cash'],
        'Volatility (%)': daily_returns.std() * np.sqrt(252) * 100
    }
    
    return metrics

if __name__ == "__main__":
    # Configuration
    tickers = ['AAPL', 'IBM']  # Add more tickers as needed
    start_date = '2023-01-01'  # Simulation start date
    end_date = '2024-01-01'    # Simulation end date
    initial_cash = 10000
    simulation_days = 30  # Set to None for entire period
    
    # Load models
    models = {}
    for ticker in tickers:
        model_path = f"{ticker}_Ensemblemodel.pth"
        try:
            models[ticker] = load_model(ticker, model_path)
            print(f"Loaded model for {ticker}")
        except FileNotFoundError:
            print(f"No model found for {ticker}")
            continue
    
    # Load data
    data_dict = {}
    for ticker in models.keys():
        data_dict[ticker] = load_data(ticker, start_date, end_date)
        print(f"Loaded data for {ticker}")
    
    # Initialize simulator
    simulator = TradingSimulator(models)
    
    # Run simulation
    print("\nRunning trading simulation...")
    simulation_result = simulator.simulate(
        data_dict,
        initial_cash=initial_cash,
        simulation_days=simulation_days
    )
    
    # Calculate and display metrics
    metrics = calculate_metrics(simulation_result, initial_cash)
    
    print("\nSimulation Results:")
    print("=" * 50)
    for metric, value in metrics.items():
        print(f"{metric}: {value:,.2f}")
    
    print("\nFinal Positions:")
    for ticker, shares in simulation_result['final_positions'].items():
        if shares > 0:
            print(f"{ticker}: {shares} shares")
    
    print("\nRecent Trades:")
    for trade in simulation_result['trades'][-5:]:  # Show last 5 trades
        print(f"Date: {trade['date'].strftime('%Y-%m-%d')}")
        print(f"Ticker: {trade['ticker']}")
        print(f"Action: {trade['action']}")
        print(f"Shares: {trade['shares']}")
        print(f"Price: ${trade['price']:.2f}")
        print("-" * 30)