<a href="https://colab.research.google.com/github/kritisinghh/Trading/blob/main/RL_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# (Optional) Install packages if running in Colab
# Uncomment to run in Colab if packages missing.
# !pip install --quiet yfinance pandas numpy matplotlib ipywidgets ta stable-baselines3==2.0.0


In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import ipywidgets as widgets
%matplotlib inline


In [None]:
def download_data(ticker, start='2018-01-01', end='2024-12-31'):
    df = yf.download(ticker, start=start, end=end, progress=False)
    if isinstance(df.columns, pd.MultiIndex):
        try:
            lvl0 = list(df.columns.get_level_values(0))
            lvl1 = list(df.columns.get_level_values(1))
            if len(set(lvl1)) == 1:
                df.columns = lvl0
        except Exception:
            df.columns = [c[0] if isinstance(c, tuple) else c for c in df.columns]
    df = df.dropna()
    return df

def add_basic_indicators(df):
    df = df.copy()
    df['return_1'] = df['Close'].pct_change()
    df['sma_5'] = df['Close'].rolling(5).mean()
    df['sma_20'] = df['Close'].rolling(20).mean()
    df = df.dropna().reset_index()
    return df

ticker = 'AAPL'
df_raw = download_data(ticker, start='2018-01-01', end='2024-12-31')
df = add_basic_indicators(df_raw)
print('Loaded', ticker, 'shape:', df.shape)
df.head()

In [None]:
class ManualTrader:
    def __init__(self, df, initial_cash=10000, transaction_cost_pct=0.001):
        self.df = df.reset_index(drop=True)
        self.initial_cash = initial_cash
        self.transaction_cost_pct = transaction_cost_pct
        self.reset()
        self.btn_buy = widgets.Button(description='Buy', button_style='success')
        self.btn_hold = widgets.Button(description='Hold')
        self.btn_sell = widgets.Button(description='Sell', button_style='danger')
        self.btn_next = widgets.Button(description='Auto-advance 10 steps', button_style='info')
        self.out = widgets.Output(layout={'border': '1px solid black'})
        self.btn_buy.on_click(self.on_buy)
        self.btn_hold.on_click(self.on_hold)
        self.btn_sell.on_click(self.on_sell)
        self.btn_next.on_click(self.on_advance10)
        self.controls = widgets.HBox([self.btn_buy, self.btn_hold, self.btn_sell, self.btn_next])
        display(self.controls, self.out)
        self.render()

    def reset(self):
        self.current_step = 0 + 30
        self.cash = float(self.initial_cash)
        self.position = 0
        self.position_price = 0.0
        self.portfolio_history = []
        self.trade_log = []

    def get_price(self, step=None):
        if step is None:
            step = self.current_step
        return float(self.df.loc[step, 'Close'])

    def portfolio_value(self):
        return self.cash + self.position * self.get_price()

    def step_action(self, action):
        price = self.get_price()
        prev_value = self.portfolio_value()
        if action == 'buy' and self.position == 0:
            cost = price * (1 + self.transaction_cost_pct)
            if self.cash >= cost:
                self.cash -= cost
                self.position = 1
                self.position_price = price
                self.trade_log.append({'step': self.current_step, 'action': 'buy', 'price': price})
        elif action == 'sell' and self.position == 1:
            proceeds = price * (1 - self.transaction_cost_pct)
            self.cash += proceeds
            self.position = 0
            self.trade_log.append({'step': self.current_step, 'action': 'sell', 'price': price})
        self.current_step += 1
        value = self.portfolio_value()
        self.portfolio_history.append(value)
        return value - prev_value

    def on_buy(self, _):
        self._handle_action('buy')

    def on_hold(self, _):
        self._handle_action('hold')

    def on_sell(self, _):
        self._handle_action('sell')

    def on_advance10(self, _):
        for _ in range(10):
            if self.current_step >= len(self.df)-1:
                break
            self._handle_action('hold')

    def _handle_action(self, action):
        if self.current_step >= len(self.df)-1:
            with self.out:
                print('End of data reached. Resetting...')
            return
        reward = self.step_action(action)
        self.render(action, reward)

    def render(self, last_action=None, reward=0.0):
        with self.out:
            clear_output(wait=True)
            step = self.current_step
            price = self.get_price(step-1)
            fig, ax = plt.subplots(2,1, figsize=(10,6), gridspec_kw={'height_ratios':[2,1]})
            ax[0].plot(self.df['Date'][:step], self.df['Close'][:step], label='Close')
            ax[0].plot(self.df['Date'][:step], self.df['sma_5'][:step], label='SMA5', alpha=0.7)
            ax[0].plot(self.df['Date'][:step], self.df['sma_20'][:step], label='SMA20', alpha=0.7)
            ax[0].legend(loc='upper left')
            ax[0].set_title(f'Time {step-1} | Price: {price:.2f} | Last action: {last_action} | Reward: {reward:.2f}')
            if len(self.portfolio_history)>0:
                ax[1].plot(self.portfolio_history, label='Portfolio Value')
            ax[1].axhline(self.initial_cash, color='gray', linestyle='--', label='Initial Cash')
            ax[1].legend(loc='upper left')
            plt.tight_layout()
            display(fig)
            print(f"Step: {step-1}  |  Price: {price:.2f}  |  Cash: {self.cash:.2f}  |  Position: {self.position}  |  Portfolio: {self.portfolio_value():.2f}")
            print('Recent trades (last 10):')
            for t in self.trade_log[-10:]:
                print(t)
            plt.close(fig)

trader = ManualTrader(df=df, initial_cash=10000)


In [None]:
try:
    from stable_baselines3 import PPO
    from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
    import gym
    class SimpleGymEnv(gym.Env):
        def __init__(self, df, window_size=30, initial_cash=10000, transaction_cost_pct=0.001):
            super().__init__()
            self.df = df.reset_index(drop=True)
            self.window_size = window_size
            self.transaction_cost_pct = transaction_cost_pct
            self.initial_cash = initial_cash
            self.feature_cols = ['Close']
            obs_dim = window_size * len(self.feature_cols) + 3
            self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32)
            self.action_space = gym.spaces.Discrete(3)
            self.reset()
        def _get_obs(self):
            start = self.current_step - self.window_size + 1
            window = self.df.loc[start:self.current_step, self.feature_cols].values
            flat = window.flatten()
            cash_norm = np.array([self.cash / self.initial_cash], dtype=np.float32)
            position = np.array([self.position], dtype=np.float32)
            position_price_norm = np.array([0.0], dtype=np.float32)
            if self.position == 1:
                position_price_norm = np.array([self.position_price / self.initial_cash], dtype=np.float32)
            return np.concatenate([flat, cash_norm, position, position_price_norm]).astype(np.float32)
        def reset(self):
            self.current_step = self.window_size - 1
            self.cash = self.initial_cash
            self.position = 0
            self.position_price = 0.0
            self.total_trades = 0
            return self._get_obs()
        def step(self, action):
            price = float(self.df.loc[self.current_step,'Close'])
            prev_portfolio = self.cash + self.position*price
            if action==1 and self.position==0:
                cost = price*(1+self.transaction_cost_pct)
                if self.cash>=cost:
                    self.cash-=cost; self.position=1; self.position_price=price
            elif action==2 and self.position==1:
                proceeds=price*(1-self.transaction_cost_pct)
                self.cash+=proceeds; self.position=0; self.position_price=0.0
            self.current_step+=1
            done = self.current_step>=len(self.df)-1
            current_price = float(self.df.loc[self.current_step,'Close']) if not done else price
            portfolio = self.cash + self.position*current_price
            reward = portfolio - prev_portfolio
            obs = self._get_obs() if not done else np.zeros(self.observation_space.shape)
            info = {'portfolio_value': portfolio}
            return obs, float(reward), done, info

    train_env = DummyVecEnv([lambda: SimpleGymEnv(df=df, window_size=30)])
    train_env = VecNormalize(train_env, norm_obs=True, norm_reward=False, clip_obs=10.)
    model = PPO('MlpPolicy', train_env, verbose=1)
    TOTAL = 10000
    model.learn(total_timesteps=TOTAL)
    model.save('/content/ppo_trader_demo')
    print('Saved model to /content/ppo_trader_demo')
except Exception as e:
    print('Skipping PPO demo (missing packages or runtime issue):', e)


In [None]:
!pip install stable-baselines3==2.0.0 gym==0.26.2
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
