<a href="https://colab.research.google.com/github/kritisinghh/Trading/blob/main/RL_Project_Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# !pip install --quiet yfinance pandas numpy matplotlib ipywidgets ta stable-baselines3==2.0.0


In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import ipywidgets as widgets
%matplotlib inline


In [None]:
def download_data(ticker, start='2018-01-01', end='2024-12-31'):
    df = yf.download(ticker, start=start, end=end, progress=False)
    if isinstance(df.columns, pd.MultiIndex):
        try:
            lvl0 = list(df.columns.get_level_values(0))
            lvl1 = list(df.columns.get_level_values(1))
            if len(set(lvl1)) == 1:
                df.columns = lvl0
        except Exception:
            df.columns = [c[0] if isinstance(c, tuple) else c for c in df.columns]
    df = df.dropna()
    return df

def add_basic_indicators(df):
    df = df.copy()
    df['return_1'] = df['Close'].pct_change()
    df['sma_5'] = df['Close'].rolling(5).mean()
    df['sma_20'] = df['Close'].rolling(20).mean()
    df = df.dropna().reset_index()
    return df

ticker = 'AAPL'
df_raw = download_data(ticker, start='2018-01-01', end='2024-12-31')
df = add_basic_indicators(df_raw)
print('Loaded', ticker, 'shape:', df.shape)
df.head()

In [None]:
import threading
import time

import gym
import numpy as np
from gym import spaces

class StockTradingEnv(gym.Env):
    def __init__(self, df, window_size=30, initial_cash=10000, transaction_cost_pct=0.001):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.window_size = window_size
        self.transaction_cost_pct = transaction_cost_pct
        self.initial_cash = initial_cash
        self.feature_cols = ['Close']
        obs_dim = window_size * len(self.feature_cols) + 3
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32)
        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)
        self.reset()

    def _get_obs(self):
        s = self.current_step
        window = self.df.loc[s-self.window_size+1:s, self.feature_cols].values
        flat = window.flatten().astype(np.float32)
        cash_norm = np.array([self.cash / self.initial_cash], dtype=np.float32)
        pos = np.array([self.position], dtype=np.float32)
        pos_price = np.array([self.position_price / (self.initial_cash+1e-9)], dtype=np.float32)
        return np.concatenate([flat, cash_norm, pos, pos_price]).astype(np.float32)

    def reset(self):
        self.current_step = self.window_size - 1
        self.cash = float(self.initial_cash)
        self.position = 0.0
        self.position_price = 0.0
        self.done = False
        return self._get_obs()

    def step(self, action):
        price = float(self.df.loc[self.current_step,'Close'])
        act = float(np.clip(action, -1, 1))
        if act > 0.01:
            frac = act
            available_cash = self.cash * frac
            qty = available_cash / (price * (1 + self.transaction_cost_pct))
            if qty > 0:
                cost = price * qty * (1 + self.transaction_cost_pct)
                self.cash -= cost
                self.position_price = (self.position_price * self.position + price * qty) / (self.position + qty) if self.position>0 else price
                self.position += qty
        elif act < -0.01:
            frac = -act
            qty = self.position * frac
            if qty > 0:
                proceeds = price * qty * (1 - self.transaction_cost_pct)
                self.cash += proceeds
                self.position -= qty
                if self.position <= 1e-12:
                    self.position = 0.0
                    self.position_price = 0.0

        prev_pf = self.cash + self.position * price
        self.current_step += 1
        if self.current_step >= len(self.df)-1:
            self.done = True
        next_price = float(self.df.loc[min(self.current_step, len(self.df)-1),'Close'])
        portfolio = self.cash + self.position * next_price
        reward = portfolio - prev_pf
        obs = self._get_obs()
        info = {'portfolio': portfolio}
        return obs, float(reward), self.done, info

class ManualAgentTrader:
    def __init__(self, df, initial_cash=10000, transaction_cost_pct=0.001):
        self.df = df.reset_index(drop=True)
        self.initial_cash = initial_cash
        self.transaction_cost_pct = transaction_cost_pct
        self.reset_manual()
        self.qty_box = widgets.BoundedFloatText(value=1.0, min=0.01, max=10000.0, step=0.01, description='Qty:')
        self.btn_buy = widgets.Button(description='Buy', button_style='success')
        self.btn_hold = widgets.Button(description='Hold')
        self.btn_sell = widgets.Button(description='Sell', button_style='danger')
        self.btn_next = widgets.Button(description='Auto-advance 10', button_style='info')
        self.btn_train = widgets.Button(description='Train Agent (demo)', button_style='warning')
        self.btn_run_agent = widgets.Button(description='Run Agent', button_style='primary')
        self.out = widgets.Output(layout={'border': '1px solid black'})
        self.btn_buy.on_click(self.on_buy); self.btn_hold.on_click(self.on_hold); self.btn_sell.on_click(self.on_sell); self.btn_next.on_click(self.on_advance10)
        self.btn_train.on_click(self.on_train); self.btn_run_agent.on_click(self.on_run_agent)
        self.controls = widgets.HBox([self.qty_box, self.btn_buy, self.btn_hold, self.btn_sell, self.btn_next])
        self.agent_controls = widgets.HBox([self.btn_train, self.btn_run_agent])
        display(self.controls, self.agent_controls, self.out)
        self.render()

        self.model = None
        self.model_path = '/content/ppo_trader_demo'
        self.trainer_thread = None

    def reset_manual(self):
        self.current_step = 0 + 30
        self.cash = float(self.initial_cash)
        self.position = 0.0
        self.position_price = 0.0
        self.portfolio_history = []
        self.trade_log = []

    def get_price(self, step=None):
        if step is None:
            step = self.current_step
        return float(self.df.loc[step, 'Close'])

    def portfolio_value(self):
        return self.cash + self.position * self.get_price()

    def step_action(self, action, qty=1.0):
        price = self.get_price()
        prev_value = self.portfolio_value()
        qty = max(0.0, float(qty))
        if action == 'buy' and qty>0:
            total_cost = price * qty * (1 + self.transaction_cost_pct)
            if self.cash >= total_cost:
                self.cash -= total_cost
                if self.position == 0:
                    self.position_price = price
                else:
                    self.position_price = (self.position_price * self.position + price * qty) / (self.position + qty)
                self.position += qty
                self.trade_log.append({'step': self.current_step, 'action': 'buy', 'qty': qty, 'price': price})
            else:
                affordable_qty = (self.cash / (price * (1 + self.transaction_cost_pct)))
                if affordable_qty > 1e-6:
                    total_cost2 = price * affordable_qty * (1 + self.transaction_cost_pct)
                    self.cash -= total_cost2
                    if self.position == 0:
                        self.position_price = price
                    else:
                        self.position_price = (self.position_price * self.position + price * affordable_qty) / (self.position + affordable_qty)
                    self.position += affordable_qty
                    self.trade_log.append({'step': self.current_step, 'action': 'buy_partial', 'qty': affordable_qty, 'price': price, 'requested_qty': qty})
                else:
                    self.trade_log.append({'step': self.current_step, 'action': 'buy_failed', 'qty': qty, 'price': price, 'reason': 'insufficient_cash'})
        elif action == 'sell' and qty>0:
            sell_qty = min(qty, self.position)
            if sell_qty > 0:
                proceeds = price * sell_qty * (1 - self.transaction_cost_pct)
                self.cash += proceeds
                self.position -= sell_qty
                self.trade_log.append({'step': self.current_step, 'action': 'sell', 'qty': sell_qty, 'price': price})
                if self.position <= 1e-12:
                    self.position = 0.0
                    self.position_price = 0.0
            else:
                self.trade_log.append({'step': self.current_step, 'action': 'sell_failed', 'qty': qty, 'price': price, 'reason': 'no_shares'})
        self.current_step += 1
        value = self.portfolio_value()
        self.portfolio_history.append(value)
        return value - prev_value

    def on_buy(self, _): self._handle_action('buy', self.qty_box.value)
    def on_hold(self, _): self._handle_action('hold', 0.0)
    def on_sell(self, _): self._handle_action('sell', self.qty_box.value)
    def on_advance10(self, _):
        for _ in range(10):
            if self.current_step >= len(self.df)-1: break
            self._handle_action('hold', 0.0)

    def _handle_action(self, action, qty):
        if self.current_step >= len(self.df)-1:
            with self.out:
                print('End of data reached.')
                self.print_final_summary()
            return
        reward = self.step_action(action, qty)
        self.render(action, reward)

    def render(self, last_action=None, reward=0.0):
        with self.out:
            clear_output(wait=True)
            step = self.current_step
            price = self.get_price(step-1) if step-1>=0 else self.get_price(step)
            fig, ax = plt.subplots(2,1, figsize=(10,6), gridspec_kw={'height_ratios':[2,1]})
            ax[0].plot(self.df['Date'][:step], self.df['Close'][:step], label='Close')
            if 'sma_5' in self.df.columns: ax[0].plot(self.df['Date'][:step], self.df['sma_5'][:step], label='SMA5', alpha=0.7)
            if 'sma_20' in self.df.columns: ax[0].plot(self.df['Date'][:step], self.df['sma_20'][:step], label='SMA20', alpha=0.7)
            ax[0].legend(loc='upper left')
            ax[0].set_title(f'Time {step-1} | Price: {price:.2f} | Last action: {last_action} | Reward: {reward:.2f}')
            if len(self.portfolio_history)>0: ax[1].plot(self.portfolio_history, label='Portfolio Value')
            ax[1].axhline(self.initial_cash, color='gray', linestyle='--', label='Initial Cash')
            ax[1].legend(loc='upper left')
            plt.tight_layout(); display(fig)
            print(f"Step: {step-1}  |  Price: {price:.2f}  |  Cash: {self.cash:.2f}  |  Position: {self.position:.6f}  |  Portfolio: {self.portfolio_value():.2f}")
            print('Recent trades (last 10):')
            for t in self.trade_log[-10:]: print(t)
            plt.close(fig)

    def print_final_summary(self):
        final_value = self.portfolio_value(); pnl = final_value - self.initial_cash; pct = (pnl / self.initial_cash)*100.0
        with self.out:
            print("=== FINAL SUMMARY ===")
            print(f"Initial cash: {self.initial_cash:.2f}")
            print(f"Final portfolio value: {final_value:.2f}")
            print(f"Net P/L: {pnl:+.2f} ({pct:+.2f}%)")
            try:
                import pandas as _pd, datetime as _dt, os as _os
                outdir = './trader_outputs'
                if not _os.path.exists(outdir): _os.makedirs(outdir)
                timestamp = _dt.datetime.now().strftime('%Y%m%d_%H%M%S')
                _pd.DataFrame(self.trade_log).to_csv(f"{outdir}/trade_log_{timestamp}.csv", index=False)
                _pd.DataFrame({'portfolio_value': self.portfolio_history}).to_csv(f"{outdir}/portfolio_{timestamp}.csv", index=False)
                print(f"Saved logs to {outdir}")
            except Exception:
                pass

    def on_train(self, _):
        def _train_job():
            with self.out:
                print("Training agent (demo). This may take a while...")
            try:
                from stable_baselines3 import PPO
                from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
                def make_env():
                    return StockTradingEnv(df=self.df, window_size=30, initial_cash=self.initial_cash, transaction_cost_pct=self.transaction_cost_pct)
                vec = DummyVecEnv([make_env])
                vec = VecNormalize(vec, norm_obs=True, norm_reward=False, clip_obs=10.)
                model = PPO('MlpPolicy', vec, verbose=0)
                TRAIN_STEPS = 10000
                model.learn(total_timesteps=TRAIN_STEPS)
                model.save(self.model_path)
                self.model = model
                with self.out:
                    print(f"Training finished and model saved to {self.model_path}")
            except Exception as e:
                with self.out:
                    print("Training failed (missing packages or runtime issue):", e)
                    print("To install SB3 and torch in Colab, run:")
                    print("!pip install stable-baselines3==2.0.0 gym==0.26.2")
                    print("!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu")
        self.trainer_thread = threading.Thread(target=_train_job, daemon=True)
        self.trainer_thread.start()

    def on_run_agent(self, _):
        def _agent_play():
            with self.out:
                clear_output(wait=True)
                print("Running agent on dataset...")
            try:
                from stable_baselines3 import PPO
                if self.model is None:
                    try:
                        self.model = PPO.load(self.model_path)
                        with self.out:
                            print("Loaded model from", self.model_path)
                    except Exception:
                        with self.out:
                            print("No saved model found. Train first or provide a model at", self.model_path)
                        return
                env = StockTradingEnv(df=self.df, window_size=30, initial_cash=self.initial_cash, transaction_cost_pct=self.transaction_cost_pct)
                obs = env.reset()
                port_history = []
                trades = []
                done = False
                step_limit = 100
                step_count = 0

                while not done and step_count < step_limit:
                    action, _ = self.model.predict(obs, deterministic=True)
                    obs, reward, done, info = env.step(action)
                    port_history.append(info.get('portfolio', env.cash + env.position * env.df.loc[env.current_step,'Close']))
                    step_count += 1
                with self.out:
                    clear_output(wait=True)
                    import matplotlib.pyplot as plt
                    plt.figure(figsize=(10,4))
                    plt.plot(port_history, label='Agent portfolio')
                    plt.axhline(self.initial_cash, color='gray', linestyle='--', label='Initial cash')
                    plt.title('Agent portfolio value over time')
                    plt.legend(); plt.grid(True); plt.show()
                    print("Agent run complete. Final portfolio:", port_history[-1] if port_history else self.initial_cash)
            except Exception as e:
                with self.out:
                    print("Agent run failed (missing SB3 or model):", e)
        threading.Thread(target=_agent_play, daemon=True).start()

trader_agent = ManualAgentTrader(df=df, initial_cash=10000)

In [None]:
!pip install stable-baselines3==2.0.0 gym==0.26.2
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu


In [None]:
import numpy as np
import pandas as pd
import datetime
import os

NUM_STEPS = 500
INITIAL_CASH = 10000.0
TRANSACTION_COST = 0.001
np.random.seed(42)

start_date = datetime.date(2020, 1, 1)
dates = [start_date + datetime.timedelta(days=i) for i in range(NUM_STEPS)]
prices = np.cumsum(np.random.normal(0.2, 1.0, NUM_STEPS)) + 100
prices = np.round(prices, 2)

cash = float(INITIAL_CASH)
position = 0
position_price = 0.0

rows = []
cumulative_loss = 0.0
loss_list = []

for step in range(NUM_STEPS):
    date = dates[step]
    price = float(prices[step])

    cash_before = float(cash)
    pos_before = int(position)
    portfolio_before = cash_before + pos_before * price

    action = int(np.random.choice([0, 1, 2]))

    executed = "hold"

    if action == 1 and position == 0:
        cost = price * (1 + TRANSACTION_COST)
        if cash >= cost:
            cash -= cost
            position = 1
            position_price = price
            executed = "buy"

    elif action == 2 and position == 1:
        proceeds = price * (1 - TRANSACTION_COST)
        cash += proceeds
        position = 0
        executed = "sell"

    cash_after = float(cash)
    pos_after = int(position)
    portfolio_after = cash_after + pos_after * price
    reward = portfolio_after - portfolio_before

    loss = -reward

    cumulative_loss += loss
    loss_list.append(loss)
    avg_loss = float(np.mean(loss_list))
    cum_loss = float(cumulative_loss)

    rows.append({
        "step": step,
        "date": date,
        "price": round(price, 2),
        "action_chosen": action,
        "executed": executed,
        "cash_before": round(cash_before, 2),
        "cash_after": round(cash_after, 2),
        "position_before": pos_before,
        "position_after": pos_after,
        "portfolio_before": round(portfolio_before, 2),
        "portfolio_after": round(portfolio_after, 2),
        "reward": round(reward, 4),
        "loss": round(loss, 4),
        "cumulative_loss": round(cum_loss, 4),
        "avg_loss": round(avg_loss, 6)
    })

df_random_loss = pd.DataFrame(rows)
csv_path = "/content/random_trading_with_loss.csv"
df_random_loss.to_csv(csv_path, index=False)

print("Saved CSV to:", csv_path)
print("Rows:", len(df_random_loss))
display(df_random_loss.head(15))

In [None]:
import numpy as np
import pandas as pd
import datetime

NUM_STEPS = 500
INITIAL_CASH = 10000
TRANSACTION_COST = 0.001
np.random.seed(42)

start_date = datetime.date(2020, 1, 1)
dates = [start_date + datetime.timedelta(days=i) for i in range(NUM_STEPS)]

prices = np.cumsum(np.random.normal(0.2, 1.0, NUM_STEPS)) + 100
prices = np.round(prices, 2)

cash = INITIAL_CASH
position = 0
position_price = 0.0

rows = []

for step in range(NUM_STEPS):
    date = dates[step]
    price = prices[step]

    cash_before = cash
    pos_before = position
    portfolio_before = cash_before + pos_before * price

    action = int(np.random.choice([0, 1, 2]))

    executed = "hold"

    if action == 1 and position == 0:
        cost = price * (1 + TRANSACTION_COST)
        if cash >= cost:
            cash -= cost
            position = 1
            position_price = price
            executed = "buy"

    elif action == 2 and position == 1:
        proceeds = price * (1 - TRANSACTION_COST)
        cash += proceeds
        position = 0
        executed = "sell"

    cash_after = cash
    pos_after = position
    portfolio_after = cash_after + pos_after * price
    reward = portfolio_after - portfolio_before

    rows.append({
        "step": step,
        "date": date,
        "price": price,
        "action_chosen": action,
        "executed": executed,
        "cash_before": round(cash_before, 2),
        "cash_after": round(cash_after, 2),
        "position_before": pos_before,
        "position_after": pos_after,
        "portfolio_before": round(portfolio_before, 2),
        "portfolio_after": round(portfolio_after, 2),
        "reward": round(reward, 2)
    })

df_random = pd.DataFrame(rows)
df_random.to_csv("/content/random_trading_data.csv", index=False)

df_random.head(15)