In [2]:
!pip install tensorflow gym matplotlib


^C


In [6]:
!pip install gym 

^C


Collecting gym
  Using cached gym-0.26.2.tar.gz (721 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting cloudpickle>=1.2.0 (from gym)
  Using cached cloudpickle-3.1.2-py3-none-any.whl.metadata (7.1 kB)
Collecting gym_notices>=0.0.4 (from gym)
  Using cached gym_notices-0.1.0-py3-none-any.whl.metadata (1.2 kB)
Downloading cloudpickle-3.1.2-py3-none-any.whl (22 kB)
Downloading gym_notices-0.1.0-py3-none-any.whl (3.3 kB)
Building wheels for collected packages: gym
  Building wheel for gym (pyproject.toml): started
  Building wheel for gym (pyproject.toml): finished with status 'done'
  Created wheel for gym: filename=gym-0.26.2-py3-none-any.whl size=827741 sha256=77b2ae0155f7336075a3f0a0fe7104d04ddcc


[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from collections import deque
import random
import matplotlib.pyplot as plt



In [None]:

data = pd.read_csv("NEPSE.csv")
prices = data["Close"].values


In [None]:
#Normalize prices 
prices = (prices - np.min(prices)) / (np.max(prices) - np.min(prices))


In [None]:
class TradingENV:
    def __init__(self, prices, window_size = 10):
        self.prices = prices
        self.window_size = window_size
        self.reset()

    def reset(self):
        self.current_step = self.window_size
        self.balance = 10000
        self.position = 0  # 0 = none, 1 = long
        self.entry_price = 0
        self.total_profit = 0
        return self._get_state()

    def _get_state(self):
        return self.prices[self.current_step - self.window_size:self.current_step]

    def step(self, action):
        reward = 0
        done = False
        price = self.prices[self.current_step]

        # Action: 0=Hold, 1=Buy, 2=Sell
        if action == 1 and self.position == 0:
            self.position = 1
            self.entry_price = price

        elif action == 2 and self.position == 1:
            reward = (price - self.entry_price) * 100  # profit * 100 shares
            self.total_profit += reward
            self.position = 0

        self.current_step += 1
        if self.current_step >= len(self.prices) - 1:
            done = True

        next_state = self._get_state()
        return next_state, reward, done


In [None]:
# Building the Deep Q-Network

def build_model(input_shape, action_space):
    model = keras.Sequential(
        [
            keras.layers.Dense(64, input_shape=input_shape, activation="relu"),
            keras.layers.Dense(32, activation="relu"),
            keras.layers.Dense(action_space, activation="linear"),
        ]
    )

model.compile(optimizer = keras.optimizers.Adam(learning_rate = 0.001), loss = "mse")
return model

In [None]:
# Deep Q Learning Agent 

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen = 2000)
        self.gamma = 0.9 # discount rate 
        self.epsilon = 1.0 # exploration rate
        self.epsilon_min = 0.01 
        self.epsilon_decay = 0.995
        self.model = build_model((state_size,), action_size)

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])
    def replay(self, batch_size=32):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(np.array([next_state]), verbose=0)[0])
            target_f = self.model.predict(np.array([state]), verbose=0)
            target_f[0][action] = target
            self.model.fit(np.array([state]), target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    

In [None]:
# Train the Agent 
 
env = TradingEnv(prices)
state_size = env.window_size
action_size = 3
agent = DQNAgent(state_size, action_size)
episodes = 100

profits = []

In [None]:
for e in range(episodes):
    state = env.reset()
    total_reward = 0
    while True:
        action = agent.act(state)
        next_state, reward, done = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        if done:
            profits.append(env.total_profit)
            print(f"Episode {e+1}/{episodes}, Profit: {env.total_profit:.2f}, Epsilon: {agent.epsilon:.2f}")
            break
        agent.replay(32)

In [None]:
# Plotting the results
plt.plot(profits)
plt.xlabel("Episode")
plt.ylabel("Profit")
plt.title("DQN Trading Performance on NEPSE Data")
plt.show()