In [None]:
!pip install gym



In [None]:
!pip install stable-baselines3[extra]

Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.1.0-py3-none-any.whl (178 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.7/178.7 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gymnasium<0.30,>=0.28.1 (from stable-baselines3[extra])
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
Collecting shimmy[atari]~=1.1.0 (from stable-baselines3[extra])
  Downloading Shimmy-1.1.0-py3-none-any.whl (37 kB)
Collecting autorom[accept-rom-license]~=0.6.1 (from stable-baselines3[extra])
  Downloading AutoROM-0.6.1-py3-none-any.whl (9.4 kB)
Collecting AutoROM.accept-rom-license (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra])
  Downloading AutoROM.accept-rom-license-0.6.1.tar.gz (434 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m434.7/434.7 kB[0m [31m33.8 MB/s[0m eta [

In [None]:
!pip install tensorflow



## Importing Dataset from Yahoo Finance

In [12]:
import yfinance as yf

# Define the ticker symbol for Coal India on Yahoo Finance
ticker = 'AXISBANK.NS'

# Define the time period for the historical data (you can adjust the start and end dates)
start_date = '2020-01-01'
end_date = '2023-01-01'

# Extract the historical data from Yahoo Finance
data = yf.download(ticker, start=start_date, end=end_date)
data['MA'] = data['Close'].rolling(window=20).mean()

# Print the first few rows of the data
print(data.head())

[*********************100%%**********************]  1 of 1 completed
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2020-01-01  754.900024  759.950012  747.200012  748.700012  746.799988   
2020-01-02  750.000000  759.000000  747.599976  756.950012  755.029053   
2020-01-03  753.150024  756.250000  740.500000  742.950012  741.064575   
2020-01-06  739.450012  739.599976  721.700012  723.250000  721.414551   
2020-01-07  728.000000  738.000000  721.049988  725.750000  723.908203   

             Volume  MA  
Date                     
2020-01-01  4917748 NaN  
2020-01-02  5156046 NaN  
2020-01-03  8489729 NaN  
2020-01-06  6356198 NaN  
2020-01-07  9103360 NaN  


In [13]:
#Importing necessary Libraries for Environment and DQN
import gym
import numpy as np
from gym import spaces
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv

## Mean Reversion Strategy

In [14]:
class MeanReversionEnvironment(gym.Env):
    def __init__(self, data):
        self.data = data
        self.current_step = 0
        self.max_steps = len(data) - 1
        self.initial_balance = 100000  # Example initial balance
        self.balance = self.initial_balance
        self.position = 0  # Current position
        self.profit = 0
        self.done = False
        self.action_space = spaces.Discrete(2)  # Buy or sell
        self.observation_space = spaces.Box(low=0, high=1, shape=(len(data.columns),), dtype=np.float32)
        self.reward_range = (-1, 1)  # Define the reward range

    def step(self, action):
      done = False
      reward = 0

      if self.current_step >= self.max_steps:
          done = True
          # Close any open positions
          if self.position > 0:
              self.balance += self.position * self.data['Close'].iloc[-1]
              self.position = 0
          elif self.position < 0:
              self.balance -= abs(self.position) * self.data['Close'].iloc[-1]
              self.position = 0
          return np.array(self.data.iloc[self.current_step - 1].values), 0, done, {}

      # Obtain the current data point
      current_data = self.data.iloc[self.current_step]

      # Implement mean reversion strategy logic for buying and selling
      if current_data['Close'] < current_data['MA']:
          if self.position == 0:
              # Buy
              self.position += 5
              self.balance -= current_data['Close']  # Deduct the buying price from the balance
              print("Bought at price: ", current_data['Close'])
          elif self.position < 0:
              # Buy to close the short position
              self.balance += self.position * 2 * current_data['Close']  # Profit from selling at the current price
              self.position = 5
              print("Bought to close at price: ", current_data['Close'])

      elif current_data['Close'] > current_data['MA']:
          if self.position == 0:
              # Short sell
              self.position = -5
              self.balance += current_data['Close']  # Add the selling price to the balance
              print("Short sold at price: ", current_data['Close'])
          elif self.position > 0:
              # Sell to close the long position
              self.balance -= self.position * 2 * current_data['Close']  # Profit from selling at the current price
              self.position = 5
              print("Sold to close at price: ", current_data['Close'])


      # the reward calculation logic
      if self.position > 0:
          profit_loss = current_data['Close'] - self.data['Close'].iloc[self.current_step - 1]
          if profit_loss > 0:
              reward = 5
          else:
              reward = -3  # Penalty for loss

      elif self.position < 0:
          profit_loss = self.data['Close'].iloc[self.current_step-1] - current_data['Close']
          if profit_loss > 0:
              reward = 5
          else:
              reward = -3  # Penalty for loss

      self.current_step += 1

      next_observation = self.data.iloc[self.current_step]

      return next_observation, reward, done, {}


    def reset(self):
        self.current_step = 0
        self.balance = self.initial_balance
        self.position = 0
        self.profit = 0
        self.done = False
        return self.data.iloc[self.current_step].values



In [15]:
#Initialising Environment

env = MeanReversionEnvironment(data)

# Define and initialize the DQN agent
model = DQN("MlpPolicy", env, verbose=1)

# Train the DQN agent
model.learn(total_timesteps=200000)

# Save the trained model
model.save("dqn_trading_agent")

#running the trained model
obs = env.reset()
total_reward = 0
for i in range(0,len(data)):
    print(f"Iteration {i}")
    print(f"Current observation shape: {obs.shape}")
    action, _ = model.predict(obs, deterministic=True)
    # print(f"Selected action: {action}")
    obs, reward, done, info = env.step(action)
    total_reward += reward
    print(f"New observation shape: {obs.shape}")
    print(f"Reward: {reward}")
    print(f"Done: {done}")
    print("--------------")

    if done:
        print("Episode is done. Exiting the loop.")
        break
# Print the final total reward and profit/loss
print(f"Final total reward: {total_reward}")
print(f"Profit/Loss: {env.balance - env.initial_balance}")


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Bought at price:  737.1500244140625
Sold to close at price:  730.9000244140625
Short sold at price:  729.2999877929688
Bought to close at price:  736.9500122070312
Bought at price:  748.1500244140625
Sold to close at price:  733.6500244140625
Bought at price:  741.0499877929688
Sold to close at price:  725.4500122070312
Short sold at price:  728.75
Bought to close at price:  735.8499755859375
Short sold at price:  697.2999877929688
Bought to close at price:  420.1499938964844
Bought at price:  418.95001220703125
Sold to close at price:  402.79998779296875
Short sold at price:  389.0
Bought to close at price:  390.95001220703125
Bought at price:  384.95001220703125
Sold to close at price:  381.54998779296875
Short sold at price:  389.6000061035156
Bought to close at price:  405.3999938964844
Bought at price:  417.04998779296875
Sold to close at price:  404.79998779296875
Short sold at price:  4



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Sold to close at price:  607.0999755859375
Short sold at price:  606.0999755859375
Bought to close at price:  610.2000122070312
Bought at price:  617.6500244140625
Sold to close at price:  644.5
Bought at price:  658.4000244140625
Sold to close at price:  632.0999755859375
Bought at price:  670.7000122070312
Sold to close at price:  719.4500122070312
Short sold at price:  715.9500122070312
Bought to close at price:  749.4000244140625
Bought at price:  770.6500244140625
Sold to close at price:  724.7999877929688
Short sold at price:  728.5499877929688
Bought to close at price:  753.9500122070312
Short sold at price:  736.7999877929688
Bought to close at price:  760.75
Bought at price:  750.5999755859375
Sold to close at price:  744.4000244140625
Short sold at price:  737.75
Bought to close at price:  700.4500122070312
Bought at price:  699.5499877929688
Sold to close at price:  691.2999877929688
Short sold at price:  685.0

## Stock Strategy 2 (EMA 8 AND EMA 55)

In [None]:
import pandas as pd

In [None]:
data2=data
data2.index = pd.to_datetime(data2.index)

data2['Year'] = data2.index.year
data2['Month'] = data2.index.month
data2['Day'] = data2.index.day

data2 = data2.reset_index()
data2.drop('Date', axis=1, inplace=True)
# Drop the original 'Date' column
data2 = data2.drop('MA', axis=1)

In [None]:
data2.head()

  and should_run_async(code)


Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Year,Month,Day
0,754.900024,759.950012,747.200012,748.700012,746.799988,4917748,2020,1,1
1,750.0,759.0,747.599976,756.950012,755.029114,5156046,2020,1,2
2,753.150024,756.25,740.5,742.950012,741.064636,8489729,2020,1,3
3,739.450012,739.599976,721.700012,723.25,721.414612,6356198,2020,1,6
4,728.0,738.0,721.049988,725.75,723.908264,9103360,2020,1,7


In [None]:
class CustomTradingEnvironment(gym.Env):
    def __init__(self, data, stop_loss=0.05):
        self.data = data
        self.current_step = 0
        self.max_steps = len(data) - 1
        self.initial_balance = 100000
        self.balance = self.initial_balance
        self.position = 0
        self.lot_size = 10
        self.stop_loss = stop_loss * data['Close'].iloc[0]  # Adaptive stop loss based on a fraction of the initial price
        self.profit = 0
        self.done = False
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(low=0, high=1, shape=(9,), dtype=np.float64)
        self.reward_range = (-1, 1)

    def calculate_ema(self, data, length):
        return data.ewm(span=length, adjust=False).mean()

    def step(self, action):
        self.current_step += 1
        done = False
        reward = 0

        if self.current_step >= len(self.data):
            done = True
            return np.array(self.data.iloc[self.current_step - 1].values), 0, done, {}

        # Obtain the current and previous candlesticks
        current_candle = self.data.iloc[self.current_step]
        previous_candle = self.data.iloc[self.current_step - 1]

        ema_8 = self.calculate_ema(self.data['Close'].iloc[:self.current_step], 8)
        ema_55 = self.calculate_ema(self.data['Close'].iloc[:self.current_step], 55)

        # Check for the crossover of EMA 8 and EMA 55 lines
        if (ema_8.iloc[-1] > ema_55.iloc[-1]) and (previous_candle['Close'] > previous_candle['Open']):
            if self.position <= 0:
                self.position = self.lot_size
                self.balance -= current_candle['Open'] * self.position
                reward = 0  # No immediate reward

        elif (ema_8.iloc[-1] < ema_55.iloc[-1]) and (previous_candle['Close'] < previous_candle['Open']):
            if self.position >= 0:
                self.position -= self.lot_size
                self.balance += current_candle['Open'] * abs(self.position)
                reward = 0  # No immediate reward

        # Check for profit/loss
        if self.position > 0:
            profit_loss = current_candle['Open'] - previous_candle['Open']
            if profit_loss >= 0:
                reward = 2
            else:
                reward = -1

        elif self.position < 0:
            profit_loss = previous_candle['Open'] - current_candle['Open']
            if profit_loss >= 0:
                reward = 2
            else:
                reward = -1

        current_candle = self.data.iloc[self.current_step+1]

        next_observation = current_candle.values.reshape(9,)
        return next_observation, reward, done, {}

    def reset(self):
        self.current_step = 0
        self.balance = self.initial_balance
        self.position = 0
        self.profit = 0
        self.done = False
        return self.data.iloc[self.current_step].values.reshape(9,)


  and should_run_async(code)


In [None]:
env = CustomTradingEnvironment(data2)

# Define and initialize the DQN agent
custom_network = [128, 64]
exploration_initial_eps = 1.0
exploration_final_eps = 0.05  # Adjusted value for exploration_final_eps
exploration_fraction = 0.2
model = DQN("MlpPolicy", env, verbose=1, exploration_fraction=exploration_fraction,
            exploration_initial_eps=exploration_initial_eps, exploration_final_eps=exploration_final_eps, policy_kwargs=dict(net_arch=custom_network))

# Train the DQN agent
model.learn(total_timesteps=200000)

# Save the trained model
model.save("dqn_trading_agent")

# Example of running the trained model
obs = env.reset()
total_reward=0
for i in range(0,len(data2)):
    print(f"Iteration {i}")
    print(f"Current observation shape: {obs.shape}")
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    total_reward += reward
    print(f"New observation shape: {obs.shape}")
    print(f"Reward: {reward}")
    print(f"Done: {done}")
    print("--------------")
print(f"Final total reward: {total_reward}")
print(f"Profit/Loss: {env.balance - env.initial_balance}")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




----------------------------------
| rollout/            |          |
|    ep_len_mean      | 747      |
|    ep_rew_mean      | 440      |
|    exploration_rate | 0.716    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 767      |
|    time_elapsed     | 3        |
|    total_timesteps  | 2988     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 747      |
|    ep_rew_mean      | 440      |
|    exploration_rate | 0.432    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 907      |
|    time_elapsed     | 6        |
|    total_timesteps  | 5976     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 747      |
|    ep_rew_mean      | 440      |
|    exploration_rate | 0.148    |
| time/               |          |
|    episodes       