In [1]:
# 📦 Install all required libraries
!pip install stable-baselines3[extra] yfinance matplotlib gymnasium shimmy streamlit 




[notice] A new release of pip is available: 25.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import pandas as pd
import random
import yfinance as yf
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

In [4]:
import yfinance as yf
import pandas as pd

def load_price_data(symbols, start_date="2015-01-01", end_date="2024-01-01"):
    # Fetch market data
    prices = yf.download(symbols, start=start_date, end=end_date, group_by="ticker", auto_adjust=True)

    if isinstance(prices.columns, pd.MultiIndex):
        prices = prices[symbols[0]].copy()

    prices = prices.reset_index()

    # Add technical indicators
    close_prices = prices['Close']
    
    # Slightly modified RSI formula: adding +1 to denominator to prevent div/0
    prices['RSI_custom'] = close_prices.rolling(window=15).apply(
        lambda x: 100 - (100 / (1 + (x.diff().clip(lower=0).sum() / (-x.diff().clip(upper=0).sum() + 1))))
    )
    
    # 18-day SMA instead of 20
    prices['SmoothAvg18'] = close_prices.rolling(window=18).mean()

    # Slightly smoothed % change by using shifted value
    prices['Pct_Change'] = (close_prices - close_prices.shift(1)) / (close_prices.shift(1) + 1e-4)

    # Remove NaN rows
    prices.dropna(inplace=True)

    # Save to file
    prices.to_csv("cleaned_stock_data.csv", index=False)

    return prices


def enrich_indicators(frame):
    enriched = frame.copy()

    # Slight change in moving average window and column name
    enriched['MovingAvg_9'] = enriched['Close'].rolling(window=9).mean()

    # Use same RSI logic here too with 15 instead of 14
    enriched['RSI_custom'] = enriched['Close'].rolling(window=15).apply(
        lambda win: 100 - (100 / (1 + (win.diff().clip(lower=0).sum() / (-win.diff().clip(upper=0).sum() + 1))))
    )

    enriched.dropna(inplace=True)
    return enriched


# 🔁 Load and process
symbols_list = ["AAPL"]
raw_stock_data = load_price_data(symbols_list)
final_data = enrich_indicators(raw_stock_data)

# 🔍 Display first few rows
print(final_data.head())


[*********************100%***********************]  1 of 1 completed


Price       Date       Open       High        Low      Close     Volume  \
31    2015-02-18  28.465982  28.722473  28.425836  28.709091  179566800   
32    2015-02-19  28.655569  28.778239  28.622115  28.648878  149449600   
33    2015-02-20  28.686788  28.883060  28.559660  28.883060  195793600   
34    2015-02-23  28.999041  29.663685  28.918748  29.663685  283896400   
35    2015-02-24  29.650298  29.797502  29.255524  29.478559  276912400   

Price  RSI_custom  SmoothAvg18  Pct_Change  MovingAvg_9  
31      69.677782    26.701132    0.006962    27.645956  
32      63.479220    26.898322   -0.002097    27.856848  
33      70.955730    27.107041    0.008174    28.118791  
34      73.658102    27.408003    0.027027    28.447893  
35      70.940480    27.622529   -0.006241    28.699426  


In [15]:
final_data.to_csv("cleaned_stock_data.csv", index=False)


In [13]:
import gym
from gym import spaces
import numpy as np
import random

class StockTradingEnv(gym.Env):
    def __init__(self, market_df, starting_cash=10000):
        super(StockTradingEnv, self).__init__()
        self.market_df = market_df.reset_index(drop=True)
        self.starting_cash = starting_cash

        self.action_space = spaces.Discrete(3)  # 0 = hold, 1 = buy, 2 = sell
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(5,), dtype=np.float32)

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.step_index = random.randint(0, len(self.market_df) - 100)
        self.cash = self.starting_cash
        self.stock_units = 0
        self.portfolio_value = self.starting_cash

        obs = self._observe_state()
        info = {}
        return obs, info

    def _observe_state(self):
        self.step_index = min(self.step_index, len(self.market_df) - 1)
        row = self.market_df.iloc[self.step_index]
        return np.array([
            row['Close'],
            row['RSI_custom'],      # ✅ renamed from 'RSI'
            row['MovingAvg_9'],     # ✅ renamed from 'MA'
            self.stock_units,
            self.cash
        ], dtype=np.float32)

    def step(self, action):
        self._execute_action(action)
        self.step_index += 1

        done = self.step_index >= len(self.market_df) - 1
        current_price = self.market_df.iloc[self.step_index]['Close']
        portfolio_value = self.cash + self.stock_units * current_price
        reward = portfolio_value - self.starting_cash

        obs = self._observe_state()
        return obs, reward, done, False, {}

    def _execute_action(self, action):
        price_now = self.market_df.iloc[self.step_index]['Close']

        if action == 1:  # Buy
            if self.cash >= price_now:
                qty = self.cash // price_now
                self.stock_units += qty
                self.cash -= qty * price_now

        elif action == 2:  # Sell
            if self.stock_units > 0:
                self.cash += self.stock_units * price_now
                self.stock_units = 0
        # action == 0 means hold


In [14]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

# 🏗️ Create environment instance and vectorize it
vector_env = make_vec_env(lambda: StockTradingEnv(final_data), n_envs=1)

# 🧠 Initialize PPO agent
ppo_agent = PPO("MlpPolicy", vector_env, verbose=1)

# 🚀 Train the agent
ppo_agent.learn(total_timesteps=10000)

# 💾 Save the trained model
ppo_agent.save("trained_stock_ppo_model")




Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.29e+03 |
|    ep_rew_mean     | 3.27e+06 |
| time/              |          |
|    fps             | 978      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 674           |
|    ep_rew_mean          | 1.78e+06      |
| time/                   |               |
|    fps                  | 697           |
|    iterations           | 2             |
|    time_elapsed         | 5             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 1.4709367e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.1          |
|    explained_variance   |