In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gym
from gymnasium import spaces
import tensorflow as tf
from tensorflow.keras import layers
import torch
import torch.nn as nn
import torch.optim as optim
import random
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
from stable_baselines3 import DDPG
from stable_baselines3.common.vec_env import DummyVecEnv
import ta
import shimmy
from collections import deque



In [11]:
# Define parameters such as:
START_DATE = '2010-01-01'
END_DATE = '2020-12-31'
STOCKS = ['AAPL','TSLA', 'AMZN', 'GOOGL', 'MSFT', 'META','NVDA']  # Mega 7 stocks
TRAIN_TEST_SPLIT = 0.8
WINDOW_SIZE = 60  # Number of past days to consider
INITIAL_BALANCE = 100000  # Starting cash
data = {}

In [12]:
def download_data(tickers, start, end):
    data = {}
    for ticker in tickers:
        data[ticker] = yf.download(ticker, start=start, end=end)
    return data

def clean_data(data):
    cleaned_data = {}
    for ticker, df in data.items():
        df = df.dropna()
        cleaned_data[ticker] = df
    return cleaned_data


In [13]:
data = download_data(STOCKS, START_DATE, END_DATE)
data = clean_data(data)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [14]:
print(data)

{'AAPL': Price        Adj Close       Close        High         Low        Open  \
Ticker            AAPL        AAPL        AAPL        AAPL        AAPL   
Date                                                                     
2010-01-04    6.447412    7.643214    7.660714    7.585000    7.622500   
2010-01-05    6.458560    7.656429    7.699643    7.616071    7.664286   
2010-01-06    6.355826    7.534643    7.686786    7.526786    7.656429   
2010-01-07    6.344078    7.520714    7.571429    7.466071    7.562500   
2010-01-08    6.386255    7.570714    7.571429    7.466429    7.510714   
...                ...         ...         ...         ...         ...   
2020-12-23  128.059906  130.960007  132.429993  130.779999  132.160004   
2020-12-24  129.047516  131.970001  133.460007  131.100006  131.320007   
2020-12-28  133.662994  136.690002  137.339996  133.509995  133.990005   
2020-12-29  131.883286  134.869995  138.789993  134.339996  138.050003   
2020-12-30  130.758774  133.7

In [15]:
def compute_RSI(series, period=14):
    delta = series.diff()

    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)

    avg_gain = gain.rolling(window=period, min_periods=period).mean()[:period+1]
    avg_loss = loss.rolling(window=period, min_periods=period).mean()[:period+1]

    rsi = pd.Series(index=series.index, dtype='float64')

    # Initialize RSI values
    for i in range(period, len(series)):
        if i == period:
            current_avg_gain = avg_gain.iloc[-1]
            current_avg_loss = avg_loss.iloc[-1]
        else:
            current_avg_gain = (current_avg_gain * (period - 1) + gain.iloc[i]) / period
            current_avg_loss = (current_avg_loss * (period - 1) + loss.iloc[i]) / period

        if current_avg_loss == 0:
            rs = np.inf
            rsi.iloc[i] = 100
        else:
            rs = current_avg_gain / current_avg_loss
            rsi.iloc[i] = 100 - (100 / (1 + rs))

    return rsi

def compute_MACD(series, span_short=12, span_long=26, span_signal=9):
    ema_short = series.ewm(span=span_short, adjust=False).mean()
    ema_long = series.ewm(span=span_long, adjust=False).mean()
    macd = ema_short - ema_long
    signal = macd.ewm(span=span_signal, adjust=False).mean()
    macd_diff = macd - signal
    return macd, signal, macd_diff

In [16]:
def add_technical_indicators(df):
    close = df['Close']
    
    # Ensure 'close' is a Series
    if isinstance(close, pd.DataFrame):
        close = close.squeeze()
    
    # Ensure 'close' is 1D
    if len(close.shape) > 1:
        close = close.flatten()
    
    # Compute indicators
    df['MA50'] = ta.trend.sma_indicator(close, window=50)
    df['MA200'] = ta.trend.sma_indicator(close, window=200)
    df['RSI'] = ta.momentum.RSIIndicator(close, window=14).rsi()
    df['MACD'] = ta.trend.MACD(close).macd()
    
    df = df.dropna()
    return df

In [17]:
for ticker in STOCKS:
    if ticker in data:
        df = data[ticker].copy()
        data[ticker] = add_technical_indicators(df)
    else:
        print(f"Data for {ticker} not available.")

# Verify Indicators
for ticker in STOCKS:
    if ticker in data:
        print(f"{ticker} DataFrame with indicators:")
        print(data[ticker].head())

AAPL DataFrame with indicators:
Price      Adj Close      Close       High        Low       Open      Volume  \
Ticker          AAPL       AAPL       AAPL       AAPL       AAPL        AAPL   
Date                                                                           
2010-10-18  9.580288  11.357143  11.392857  11.224643  11.373929  1093010800   
2010-10-19  9.323908  11.053214  11.206071  10.715000  10.835714  1232784000   
2010-10-20  9.355238  11.090357  11.223214  10.959643  11.035714   721624400   
2010-10-21  9.324812  11.054286  11.240714  10.957143  11.155714   551460000   
2010-10-22  9.263053  10.981071  11.072857  10.939286  11.038214   372778000   

Price           MA50     MA200        RSI      MACD  
Ticker                                               
Date                                                 
2010-10-18  9.659357  8.767198  83.388449  0.382372  
2010-10-19  9.693457  8.784248  70.061930  0.382695  
2010-10-20  9.729971  8.801418  70.678640  0.381549  
201

In [18]:
train_data = {}
test_data = {}
for ticker in STOCKS:
    df = data[ticker]
    split_idx = int(len(df) * TRAIN_TEST_SPLIT)
    train_data[ticker] = df[:split_idx]
    test_data[ticker] = df[split_idx:]


In [None]:
class TradingEnv(gym.Env):
    def __init__(self, df, initial_balance=100000, window_size=60):
        super(TradingEnv, self).__init__()
        self.df = df.reset_index()
        self.initial_balance = initial_balance
        self.window_size = window_size
        self.current_step = self.window_size
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.shares_held = 0
        self.max_steps = len(self.df) - 1

        # Define action and observation space
        # Actions: 0 = Hold, 1 = Buy, 2 = Sell
        self.action_space = spaces.Discrete(3)
        
        # Observation: window_size days of data + portfolio info
        # Assuming df has N columns, observation shape = (window_size, N + 3)
        self.observation_space = spaces.Box(
            low=-np.inf, 
            high=np.inf, 
            shape=(window_size, len(df.columns) + 3), 
            dtype=np.float32
        )

    def reset(self):
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.shares_held = 0
        self.current_step = self.window_size
        return self._next_observation()

    def _next_observation(self):
        window_data = self.df.iloc[self.current_step - self.window_size:self.current_step]
        obs = window_data.values  # Shape: (60, num_features)
        
        # Portfolio information
        portfolio = np.array([self.balance, self.shares_held, self.net_worth])  # Shape: (3,)
        portfolio_broadcasted = np.tile(portfolio, (self.window_size, 1))  # Shape: (60, 3)
        
        # Concatenate along the feature axis (axis=1)
        concatenated = np.concatenate((obs, portfolio_broadcasted), axis=1)  # Shape: (60, num_features + 3)
        
        # Ensure data type matches observation_space
        try:
            concatenated = concatenated.astype(np.float32)
        except TypeError as e:
            print("Error converting concatenated observation to float32.")
            print("Data types in concatenated array:", [type(item) for item in concatenated.flatten()])
            raise e
        return concatenated

    def step(self, action):
        done = False
        current_price = self.df.iloc[self.current_step]['Close']
        
        if action == 1:  # Buy
            # Define how many shares to buy (e.g., all available balance)
            shares_to_buy = self.balance // current_price
            if shares_to_buy > 0:
                self.balance -= shares_to_buy * current_price
                self.shares_held += shares_to_buy
        elif action == 2:  # Sell
            # Sell all shares held
            if self.shares_held > 0:
                self.balance += self.shares_held * current_price
                self.shares_held = 0
        # Else: Hold

        self.current_step += 1

        if self.current_step > self.max_steps:
            done = True

        # Update net worth
        self.net_worth = self.balance + self.shares_held * self.df.iloc[self.current_step]['Close']

        # Calculate reward (change in net worth)
        reward = self.net_worth - self.initial_balance

        return self._next_observation(), reward, done, {}

    def render(self, mode='human', close=False):
        profit = self.net_worth - self.initial_balance
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Net worth: {self.net_worth}')
        print(f'Profit: {profit}')

In [30]:
envs = {}
for ticker in STOCKS:
    envs[ticker] = TradingEnv(train_data[ticker])


In [31]:


class DQNAgent:
    def __init__(self, state_size, action_size, learning_rate=0.001, gamma=0.95, 
                 epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01, 
                 batch_size=64, memory_size=100000):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=memory_size)
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.batch_size = batch_size
        self.model = self._build_model(learning_rate)

    def _build_model(self, learning_rate):
        model = tf.keras.Sequential()
        model.add(layers.Dense(64, input_dim=self.state_size, activation='relu'))
        model.add(layers.Dense(64, activation='relu'))
        model.add(layers.Dense(self.action_size, activation='linear'))
        # Updated the parameter name from 'lr' to 'learning_rate'
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        minibatch = random.sample(self.memory, self.batch_size)
        states = np.array([m[0] for m in minibatch]).reshape(self.batch_size, self.state_size)
        actions = np.array([m[1] for m in minibatch])
        rewards = np.array([m[2] for m in minibatch])
        next_states = np.array([m[3] for m in minibatch]).reshape(self.batch_size, self.state_size)
        dones = np.array([m[4] for m in minibatch])

        # Predict Q-values for current states and next states
        target = rewards + self.gamma * np.amax(self.model.predict(next_states), axis=1) * (~dones)
        target_f = self.model.predict(states)

        # Update the Q-values for the actions taken
        for i in range(self.batch_size):
            target_f[i][actions[i]] = target[i]

        # Train the model on the updated Q-values
        self.model.fit(states, target_f, epochs=1, verbose=0)

        # Decay epsilon to reduce exploration over time
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [32]:
def train_dqn(agent, env, episodes=1000):
    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, agent.state_size])
        done = False
        total_reward = 0
        while not done:
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            next_state = np.reshape(next_state, [1, agent.state_size])
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward
            agent.replay()
        print(f"Episode {e+1}/{episodes}, Total Reward: {total_reward}, Epsilon: {agent.epsilon:.2f}")


In [34]:
dqn_agents = {}
for ticker in STOCKS:
    env = envs[ticker]
    state_size = env.observation_space.shape[0] * env.observation_space.shape[1]
    action_size = env.action_space.n
    agent = DQNAgent(state_size, action_size)
    train_dqn(agent, env, episodes=1000)
    dqn_agents[ticker] = agent


TypeError: float() argument must be a string or a real number, not 'Timestamp'

In [50]:
class PPOAgent:
    def __init__(self, state_dim, action_dim, lr=3e-4, gamma=0.99, eps_clip=0.2, K_epochs=80):
        self.gamma = gamma
        self.eps_clip = eps_clip
        self.K_epochs = K_epochs
        self.action_dim = action_dim

        self.policy = ActorCritic(state_dim, action_dim).to(device)
        self.optimizer = optim.Adam(self.policy.parameters(), lr=lr)
        self.policy_old = ActorCritic(state_dim, action_dim).to(device)
        self.policy_old.load_state_dict(self.policy.state_dict())
        self.MseLoss = nn.MSELoss()

    def select_action(self, state):
        state = torch.FloatTensor(state).to(device)
        action, _ = self.policy_old.act(state)
        return action.detach().cpu().numpy()

    def update(self, memory):
        # Implement PPO update logic
        pass

class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(ActorCritic, self).__init__()
        self.actor = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, action_dim),
            nn.Softmax(dim=-1)
        )
        self.critic = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def act(self, state):
        action_probs = self.actor(state)
        dist = torch.distributions.Categorical(action_probs)
        action = dist.sample()
        return action, dist.log_prob(action)

    def evaluate(self, state, action):
        action_probs = self.actor(state)
        dist = torch.distributions.Categorical(action_probs)
        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()
        state_values = self.critic(state)
        return action_logprobs, state_values, dist_entropy

In [78]:
# Example initialization
env = DummyVecEnv([lambda: TradingEnv(train_data['AAPL'])])
ddpg_model = DDPG('MlpPolicy', env, verbose=1)



NotImplementedError: Cannot convert space of type Box(-inf, inf, (60, 13), float32). Please upgrade your code to gymnasium.