<a href="https://colab.research.google.com/github/kamal-gavel/DSAlgo2025/blob/main/%22LSTM_DQN_Stock_Trading_with_Epsilon_Decay%22.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Full script: LSTM-DQN + RMSE forecast + Equity Curve comparison
!pip install yfinance ta tensorflow scikit-learn matplotlib --quiet

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from collections import deque
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import ta
import math

# ---------------------------
# CONFIG
# ---------------------------
TICKER = "INFY.NS"
START = "2020-01-01"
END = "2024-12-31"
WINDOW = 30
TEST_RATIO = 0.2
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
random.seed(SEED)

# Epsilon schedule config (linear decay by default)
eps_start = 1.0
eps_end = 0.01
total_episodes = 50          # change to 500+ for serious training
decay_fraction = 0.6         # decay over first 60% of episodes
decay_episodes = int(decay_fraction * total_episodes)

# ---------------------------
# 1) FETCH & INDICATORS
# ---------------------------
df_raw = yf.download(TICKER, start=START, end=END)

# If MultiIndex columns, flatten
if isinstance(df_raw.columns, pd.MultiIndex):
    df_raw.columns = [col[0] for col in df_raw.columns]

print(df_raw.tail())

# keep a copy of true prices for RMSE & backtest plotting
df_raw['Price'] = df_raw['Close'].astype(float)

# Compute indicators
df = df_raw.copy()
df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
df['MFI'] = ta.volume.MFIIndicator(df['High'], df['Low'], df['Close'], df['Volume'], window=14).money_flow_index()
bb = ta.volatility.BollingerBands(df['Close'], window=20, window_dev=2)
df['BB_dist'] = (df['Close'] - bb.bollinger_mavg()) / (bb.bollinger_hband() - bb.bollinger_lband())

df = df.dropna().reset_index(drop=True)

# Save true prices for unscaled comparisons
true_prices = df['Close'].values.copy()

# ---------------------------
# 2) SCALING FEATURES
# ---------------------------
feature_cols = ['Close', 'RSI', 'MFI', 'BB_dist']
scaler = MinMaxScaler()
df_scaled = df.copy()
df_scaled[feature_cols] = scaler.fit_transform(df_scaled[feature_cols])

# ---------------------------
# 3) TRAIN / TEST SPLIT (time-based)
# ---------------------------
n = len(df_scaled)
train_n = int(n * (1 - TEST_RATIO))
train_df = df_scaled.iloc[:train_n].reset_index(drop=True)
test_df = df_scaled.iloc[train_n - WINDOW:].reset_index(drop=True)  # include overlap

# Also keep original price series for test plotting and RMSE invert
true_test_prices = df['Close'].iloc[train_n:].reset_index(drop=True)

# ---------------------------
# 4) Helper: create windowed dataset for LSTM forecaster
# ---------------------------
def create_sequence_dataset(data_df, feature_cols, window=30, horizon=1):
    X, y = [], []
    for i in range(window, len(data_df)-horizon+1):
        X.append(data_df[feature_cols].iloc[i-window:i].values)
        y.append(data_df['Close'].iloc[i + horizon - 1])
    return np.array(X), np.array(y)

# LSTM dataset: use scaled data
X_train, y_train = create_sequence_dataset(train_df, feature_cols, window=WINDOW)
X_test, y_test = create_sequence_dataset(test_df, feature_cols, window=WINDOW)

print("LSTM train shape:", X_train.shape, y_train.shape, "LSTM test shape:", X_test.shape, y_test.shape)

# ---------------------------
# 5) Standalone LSTM Forecaster (for RMSE)
# ---------------------------
lstm_forecaster = models.Sequential([
    layers.Input(shape=(WINDOW, len(feature_cols))),
    layers.LSTM(64, return_sequences=True),
    layers.LSTM(32),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='linear')
])
lstm_forecaster.compile(optimizer='adam', loss='mse')

# Train quickly (adjust epochs as needed)
lstm_forecaster.fit(X_train, y_train, epochs=8, batch_size=64, validation_split=0.05, verbose=1)

# Predict on test (these are scaled 'Close' values)
y_pred_scaled = lstm_forecaster.predict(X_test).flatten()

# Invert scaling for Close using scaler.data_min_ and data_max_
close_idx = feature_cols.index('Close')
close_min = scaler.data_min_[close_idx]
close_max = scaler.data_max_[close_idx]
y_test_unscaled = y_test * (close_max - close_min) + close_min
y_pred_unscaled = y_pred_scaled * (close_max - close_min) + close_min

# RMSE (on price scale)
rmse_val = np.sqrt(mean_squared_error(y_test_unscaled, y_pred_unscaled))
print(f"LSTM forecast RMSE on test (price units): {rmse_val:.4f}")

# ---------------------------
# 6) Trading Environment (Train on train_df, eval on test_df)
# ---------------------------
class TradingEnv:
    def __init__(self, df_scaled, true_df, window=30, initial_balance=100000, transaction_cost=0.001, margin_interest=0.0001):
        self.df = df_scaled.reset_index(drop=True)
        self.true_df = true_df.reset_index(drop=True)
        self.window = window
        self.init_balance = initial_balance
        self.transaction_cost = transaction_cost
        self.margin_interest = margin_interest
        self.reset()

    def reset(self, start_idx=None):
        self.current_step = self.window if start_idx is None else start_idx
        self.position = 0
        self.shares = 0
        self.cash = float(self.init_balance)
        self.portfolio_value = float(self.init_balance)
        self.max_portfolio = self.portfolio_value
        self.total_steps = len(self.df)
        return self._get_state()

    def _get_state(self):
        w = self.df.iloc[self.current_step - self.window:self.current_step][['Close','RSI','MFI','BB_dist']].values
        pos_col = np.full((self.window,1), self.position)
        return np.hstack([w, pos_col]).astype(np.float32)

    def step(self, action):
        done = False
        info = {}
        idx = self.current_step
        price_true = float(self.true_df['Close'].iloc[idx])
        if action == 1:
            if self.position == 0:
                shares_to_buy = int(self.cash // price_true)
                if shares_to_buy > 0:
                    cost = shares_to_buy * price_true
                    fee = cost * self.transaction_cost
                    self.shares += shares_to_buy
                    self.cash -= (cost + fee)
                    self.position = 1
            elif self.position == -1:
                cover_shares = abs(self.shares)
                cost = cover_shares * price_true
                fee = cost * self.transaction_cost
                self.cash -= (cost + fee)
                self.shares = 0
                self.position = 0

        elif action == 2:
            if self.position == 0:
                shares_to_short = int(self.cash // price_true)
                if shares_to_short > 0:
                    proceeds = shares_to_short * price_true
                    fee = proceeds * self.transaction_cost
                    self.shares -= shares_to_short
                    self.cash += (proceeds - fee)
                    self.position = -1
            elif self.position == 1:
                proceeds = self.shares * price_true
                fee = proceeds * self.transaction_cost
                self.cash += (proceeds - fee)
                self.shares = 0
                self.position = 0

        if self.position == -1:
            borrow_fee = abs(self.shares) * price_true * self.margin_interest
            self.cash -= borrow_fee

        self.portfolio_value = self.cash + self.shares * price_true
        self.max_portfolio = max(self.max_portfolio, self.portfolio_value)

        # reward: change in portfolio relative to init (simple)
        reward = (self.portfolio_value - self.init_balance) / (self.init_balance)

        self.current_step += 1
        if self.current_step >= len(self.df):
            done = True

        next_state = self._get_state() if not done else None
        info['portfolio_value'] = self.portfolio_value
        return next_state, reward, done, info

# ---------------------------
# 7) DQN Agent (LSTM architecture) with external epsilon schedule
# ---------------------------
class ReplayBuffer:
    def __init__(self, maxlen=10000):
        self.buf = deque(maxlen=maxlen)
    def add(self, x): self.buf.append(x)
    def sample(self, n): return random.sample(self.buf, n)
    def __len__(self): return len(self.buf)

class LSTMDQN:
    def __init__(self, state_shape, action_size=3, lr=1e-3, gamma=0.99, eps=1.0, eps_min=0.01):
        self.state_shape = state_shape
        self.action_size = action_size
        self.gamma = gamma
        self.epsilon = eps
        self.eps_min = eps_min
        self.buffer = ReplayBuffer()
        self.model = self._build(lr)
        self.target = self._build(lr)
        self.update_target()

    def _build(self, lr):
        inp = layers.Input(shape=self.state_shape)
        x = layers.LSTM(64, return_sequences=True)(inp)
        x = layers.LSTM(32)(x)
        x = layers.Dense(32, activation='relu')(x)
        out = layers.Dense(self.action_size, activation='linear')(x)
        m = models.Model(inp,out)
        m.compile(optimizer=optimizers.Adam(lr), loss='mse')
        return m

    def update_target(self): self.target.set_weights(self.model.get_weights())

    def act(self, state, greedy=False):
        if (not greedy) and (np.random.rand() < self.epsilon):
            return np.random.randint(self.action_size)
        q = self.model.predict(state[np.newaxis], verbose=0)[0]
        return int(np.argmax(q))

    def remember(self, s,a,r,ns,d): self.buffer.add((s,a,r,ns,d))

    def replay(self, batch=64):
        if len(self.buffer) < batch: return
        batch_samples = self.buffer.sample(batch)
        states = np.array([b[0] for b in batch_samples])
        next_states = np.array([b[3] if b[3] is not None else np.zeros_like(b[0]) for b in batch_samples])
        q = self.model.predict(states, verbose=0)
        q_next = self.target.predict(next_states, verbose=0)
        for i, (s,a,r,ns,d) in enumerate(batch_samples):
            if d:
                q[i,a] = r
            else:
                q[i,a] = r + self.gamma * np.max(q_next[i])
        self.model.train_on_batch(states, q)

# ---------------------------
# 8) TRAIN DQN on TRAIN SPLIT with epsilon schedule
# ---------------------------
train_true = df.iloc[:train_n].reset_index(drop=True)
train_env = TradingEnv(train_df, train_true, window=WINDOW)
state0 = train_env.reset()
state_shape = state0.shape
agent = LSTMDQN(state_shape, action_size=3, eps=eps_start, eps_min=eps_end)

# helper: linear epsilon schedule
def get_epsilon_linear(ep):
    if ep >= decay_episodes:
        return eps_end
    return eps_start - (eps_start - eps_end) * (ep / max(1, decay_episodes))

EPISODES = total_episodes
eps_history = []
port_history = []

for ep in range(EPISODES):
    # set epsilon for this episode from schedule
    agent.epsilon = get_epsilon_linear(ep)
    s = train_env.reset()
    done = False
    steps = 0
    while not done:
        a = agent.act(s)
        ns, r, done, info = train_env.step(a)
        agent.remember(s,a,r,ns,done)
        agent.replay(batch=64)
        s = ns
        steps += 1
        if steps > len(train_df) - WINDOW - 2:
            break
    agent.update_target()
    eps_history.append(agent.epsilon)
    port_history.append(train_env.portfolio_value)
    print(f"Train Episode {ep+1}/{EPISODES}, portfolio_value: {train_env.portfolio_value:.2f}, epsilon: {agent.epsilon:.3f}")

# ---------------------------
# 9) EVALUATE DQN on TEST SPLIT (greedy policy) & BUY & HOLD
# ---------------------------
# set agent to exploitation (use eps_end)
agent.epsilon = eps_end

test_true = df.iloc[train_n:].reset_index(drop=True)
test_scaled = df_scaled.iloc[train_n - WINDOW:].reset_index(drop=True)

eval_env = TradingEnv(test_scaled, test_true, window=WINDOW)
s = eval_env.reset()
dqn_values = []
bh_values = []
# initial buy-and-hold: buy at first day's true price on test
bh_cash = 100000
first_price = float(test_true['Close'].iloc[WINDOW])
bh_shares = int(bh_cash // first_price)
bh_cash -= bh_shares * first_price
bh_values.append(bh_cash + bh_shares * first_price)

while True:
    a = agent.act(s, greedy=True)
    ns, r, done, info = eval_env.step(a)
    dqn_values.append(info['portfolio_value'])
    idx = eval_env.current_step - 1
    if idx < len(test_true):
        cur_price = float(test_true['Close'].iloc[idx])
        bh_values.append(bh_cash + bh_shares * cur_price)
    s = ns
    if done:
        break

# Convert to numpy arrays and trim to same length
dqn_values = np.array(dqn_values)
bh_values = np.array(bh_values[:len(dqn_values)])

# Compute summary stats
dqn_return = (dqn_values[-1] / dqn_values[0] - 1.0) if len(dqn_values)>1 else 0.0
bh_return = (bh_values[-1] / bh_values[0] - 1.0) if len(bh_values)>1 else 0.0
print(f"DQN test cumulative return: {dqn_return*100:.2f}%")
print(f"Buy & Hold test cumulative return: {bh_return*100:.2f}%")

# Sharpe (simple)
def sharpe(arr):
    rets = np.diff(arr) / (arr[:-1] + 1e-9)
    if rets.std() == 0: return 0.0
    return (rets.mean() / rets.std()) * np.sqrt(252)

print(f"DQN Sharpe (approx): {sharpe(dqn_values):.3f}, BH Sharpe (approx): {sharpe(bh_values):.3f}")

# ---------------------------
# 10) PLOT equity curves and LSTM preds vs true (for a portion)
# ---------------------------
plt.figure(figsize=(12,5))
plt.plot(dqn_values, label='DQN portfolio value')
plt.plot(bh_values, label='Buy & Hold value')
plt.legend()
plt.title('Equity curves on test')
plt.show()

# plot epsilon & training portfolio history
plt.figure(figsize=(10,3))
plt.plot(eps_history, marker='o')
plt.title('Epsilon schedule over training episodes')
plt.xlabel('Episode')
plt.ylabel('Epsilon')
plt.show()

n_plot = min(200, len(y_test_unscaled))
plt.figure(figsize=(12,4))
plt.plot(y_pred_unscaled[:n_plot], label='LSTM predicted price (test)')
plt.plot(true_test_prices.values[:n_plot], label='True price (test)')
plt.legend()
plt.title(f'LSTM Forecast (RMSE={rmse_val:.4f})')
plt.show()
