<a href="https://colab.research.google.com/github/kritisinghh/Trading/blob/main/RL_Interactive_Trading.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RL Interactive Trading Simulator — `RL_Interactive_Trading`

**What this notebook provides**
- A simple interactive trading simulator where **you** can step through historical price data (daily) and choose **Buy / Hold / Sell** at each step.
- Tracks portfolio (cash, position, portfolio value), shows price chart, and logs trades.
- Includes a small PPO baseline training cell you can run later to compare agent decisions with yours (optional).

**Why this is useful**
- It's a hands-on trading sandbox — you get to make decisions and see immediate consequences.
- Great for exploring reward shaping and for collecting human play data to train imitation learning later.





In [None]:
# (Optional) Install packages if running in Colab
# Uncomment to run in Colab if packages missing.
# !pip install --quiet yfinance pandas numpy matplotlib ipywidgets ta stable-baselines3==2.0.0


In [None]:
# Imports and display setup
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import ipywidgets as widgets
%matplotlib inline


In [None]:
# Download data and prepare indicators (robust to MultiIndex columns)
def download_data(ticker, start='2018-01-01', end='2024-12-31'):
    df = yf.download(ticker, start=start, end=end, progress=False)
    if isinstance(df.columns, pd.MultiIndex):
        # flatten if single-ticker
        try:
            lvl0 = list(df.columns.get_level_values(0))
            lvl1 = list(df.columns.get_level_values(1))
            if len(set(lvl1)) == 1:
                df.columns = lvl0
        except Exception:
            df.columns = [c[0] if isinstance(c, tuple) else c for c in df.columns]
    df = df.dropna()
    return df

def add_basic_indicators(df):
    df = df.copy()
    df['return_1'] = df['Close'].pct_change()
    df['sma_5'] = df['Close'].rolling(5).mean()
    df['sma_20'] = df['Close'].rolling(20).mean()
    df = df.dropna().reset_index()
    return df

# Load a default ticker (AAPL). You can change the ticker variable below.
ticker = 'AAPL'
df_raw = download_data(ticker, start='2018-01-01', end='2024-12-31')
df = add_basic_indicators(df_raw)
print('Loaded', ticker, 'shape:', df.shape)
df.head()

Loaded AAPL shape: (1741, 9)


  df = yf.download(ticker, start=start, end=end, progress=False)


Unnamed: 0,Date,Close,High,Low,Open,Volume,return_1,sma_5,sma_20
0,2018-01-30,39.103008,39.196684,38.571392,38.765771,184192800,-0.005894,39.89552,40.827605
1,2018-01-31,39.210739,39.447275,38.992942,39.079592,129915600,0.002755,39.577484,40.771047
2,2018-02-01,39.292706,39.489426,39.053829,39.149848,188923200,0.00209,39.42151,40.718939
3,2018-02-02,37.587799,39.063209,37.494124,38.875854,346375200,-0.04339,38.905821,40.572219
4,2018-02-05,36.648685,38.379363,36.53393,37.259926,290954000,-0.024985,38.368587,40.355474


In [None]:
# Interactive manual trading UI
class ManualTrader:
    def __init__(self, df, initial_cash=10000, transaction_cost_pct=0.001):
        self.df = df.reset_index(drop=True)
        self.initial_cash = initial_cash
        self.transaction_cost_pct = transaction_cost_pct
        self.reset()
        # build UI
        self.btn_buy = widgets.Button(description='Buy', button_style='success')
        self.btn_hold = widgets.Button(description='Hold')
        self.btn_sell = widgets.Button(description='Sell', button_style='danger')
        self.btn_next = widgets.Button(description='Auto-advance 10 steps', button_style='info')
        self.out = widgets.Output(layout={'border': '1px solid black'})
        self.btn_buy.on_click(self.on_buy)
        self.btn_hold.on_click(self.on_hold)
        self.btn_sell.on_click(self.on_sell)
        self.btn_next.on_click(self.on_advance10)
        self.controls = widgets.HBox([self.btn_buy, self.btn_hold, self.btn_sell, self.btn_next])
        display(self.controls, self.out)
        # initial render
        self.render()

    def reset(self):
        self.current_step = 0 + 30  # start after indicator warmup
        self.cash = float(self.initial_cash)
        self.position = 0  # number of shares (0 or 1 in this simple UI)
        self.position_price = 0.0
        self.portfolio_history = []
        self.trade_log = []

    def get_price(self, step=None):
        if step is None:
            step = self.current_step
        return float(self.df.loc[step, 'Close'])

    def portfolio_value(self):
        return self.cash + self.position * self.get_price()

    def step_action(self, action):  # action in {'buy','sell','hold'}
        price = self.get_price()
        prev_value = self.portfolio_value()
        if action == 'buy' and self.position == 0:
            cost = price * (1 + self.transaction_cost_pct)
            if self.cash >= cost:
                self.cash -= cost
                self.position = 1
                self.position_price = price
                self.trade_log.append({'step': self.current_step, 'action': 'buy', 'price': price})
        elif action == 'sell' and self.position == 1:
            proceeds = price * (1 - self.transaction_cost_pct)
            self.cash += proceeds
            self.position = 0
            self.trade_log.append({'step': self.current_step, 'action': 'sell', 'price': price})
        # hold does nothing
        self.current_step += 1
        value = self.portfolio_value()
        self.portfolio_history.append(value)
        return value - prev_value

    def on_buy(self, _):
        self._handle_action('buy')

    def on_hold(self, _):
        self._handle_action('hold')

    def on_sell(self, _):
        self._handle_action('sell')

    def on_advance10(self, _):
        for _ in range(10):
            if self.current_step >= len(self.df)-1:
                break
            self._handle_action('hold')

    def _handle_action(self, action):
        if self.current_step >= len(self.df)-1:
            with self.out:
                print('End of data reached. Resetting...')
            return
        reward = self.step_action(action)
        self.render(action, reward)

    def render(self, last_action=None, reward=0.0):
        with self.out:
            clear_output(wait=True)
            step = self.current_step
            price = self.get_price(step-1)  # price for last executed step
            fig, ax = plt.subplots(2,1, figsize=(10,6), gridspec_kw={'height_ratios':[2,1]})
            # price chart
            ax[0].plot(self.df['Date'][:step], self.df['Close'][:step], label='Close')
            ax[0].plot(self.df['Date'][:step], self.df['sma_5'][:step], label='SMA5', alpha=0.7)
            ax[0].plot(self.df['Date'][:step], self.df['sma_20'][:step], label='SMA20', alpha=0.7)
            ax[0].legend(loc='upper left')
            ax[0].set_title(f'Time {step-1} | Price: {price:.2f} | Last action: {last_action} | Reward: {reward:.2f}')
            # portfolio value chart
            if len(self.portfolio_history)>0:
                ax[1].plot(self.portfolio_history, label='Portfolio Value')
            ax[1].axhline(self.initial_cash, color='gray', linestyle='--', label='Initial Cash')
            ax[1].legend(loc='upper left')
            plt.tight_layout()
            display(fig)
            # summary
            print(f"Step: {step-1}  |  Price: {price:.2f}  |  Cash: {self.cash:.2f}  |  Position: {self.position}  |  Portfolio: {self.portfolio_value():.2f}")
            print('Recent trades (last 10):')
            for t in self.trade_log[-10:]:
                print(t)
            plt.close(fig)

# Create the UI
trader = ManualTrader(df=df, initial_cash=10000)


HBox(children=(Button(button_style='success', description='Buy', style=ButtonStyle()), Button(description='Hol…

Output(layout=Layout(border='1px solid black'))

In [None]:
# OPTIONAL: small PPO baseline (optional heavy cell—run only if you want)
# This cell trains a tiny PPO agent for demo. Increase timesteps for real training.
# Requires stable-baselines3 installed and may be slow in Colab.
try:
    from stable_baselines3 import PPO
    from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
    import gym
    # Simple wrapper env compatible with SB3
    class SimpleGymEnv(gym.Env):
        def __init__(self, df, window_size=30, initial_cash=10000, transaction_cost_pct=0.001):
            super().__init__()
            self.df = df.reset_index(drop=True)
            self.window_size = window_size
            self.transaction_cost_pct = transaction_cost_pct
            self.initial_cash = initial_cash
            self.feature_cols = ['Close']  # minimal for demo
            obs_dim = window_size * len(self.feature_cols) + 3
            self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32)
            self.action_space = gym.spaces.Discrete(3)
            self.reset()
        def _get_obs(self):
            start = self.current_step - self.window_size + 1
            window = self.df.loc[start:self.current_step, self.feature_cols].values
            flat = window.flatten()
            cash_norm = np.array([self.cash / self.initial_cash], dtype=np.float32)
            position = np.array([self.position], dtype=np.float32)
            position_price_norm = np.array([0.0], dtype=np.float32)
            if self.position == 1:
                position_price_norm = np.array([self.position_price / self.initial_cash], dtype=np.float32)
            return np.concatenate([flat, cash_norm, position, position_price_norm]).astype(np.float32)
        def reset(self):
            self.current_step = self.window_size - 1
            self.cash = self.initial_cash
            self.position = 0
            self.position_price = 0.0
            self.total_trades = 0
            return self._get_obs()
        def step(self, action):
            price = float(self.df.loc[self.current_step,'Close'])
            prev_portfolio = self.cash + self.position*price
            # execute
            if action==1 and self.position==0:
                cost = price*(1+self.transaction_cost_pct)
                if self.cash>=cost:
                    self.cash-=cost; self.position=1; self.position_price=price
            elif action==2 and self.position==1:
                proceeds=price*(1-self.transaction_cost_pct)
                self.cash+=proceeds; self.position=0; self.position_price=0.0
            self.current_step+=1
            done = self.current_step>=len(self.df)-1
            current_price = float(self.df.loc[self.current_step,'Close']) if not done else price
            portfolio = self.cash + self.position*current_price
            reward = portfolio - prev_portfolio
            obs = self._get_obs() if not done else np.zeros(self.observation_space.shape)
            info = {'portfolio_value': portfolio}
            return obs, float(reward), done, info

    # Create simple env for training
    train_env = DummyVecEnv([lambda: SimpleGymEnv(df=df, window_size=30)])
    train_env = VecNormalize(train_env, norm_obs=True, norm_reward=False, clip_obs=10.)
    model = PPO('MlpPolicy', train_env, verbose=1)
    TOTAL = 10000  # small demo timesteps
    model.learn(total_timesteps=TOTAL)
    model.save('/content/ppo_trader_demo')
    print('Saved model to /content/ppo_trader_demo')
except Exception as e:
    print('Skipping PPO demo (missing packages or runtime issue):', e)


Skipping PPO demo (missing packages or runtime issue): Missing shimmy installation. You an OpenAI Gym environment. Stable-Baselines3 (SB3) has transitioned to using Gymnasium internally. In order to use OpenAI Gym environments with SB3, you need to install shimmy (`pip install 'shimmy>=0.2.1'`).


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [None]:
!pip install stable-baselines3==2.0.0 gym==0.26.2
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu


Looking in indexes: https://download.pytorch.org/whl/cpu
