In [6]:
!python -m pip install --upgrade pip


Collecting pip
  Using cached pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Using cached pip-25.1.1-py3-none-any.whl (1.8 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 25.0
    Uninstalling pip-25.0:
      Successfully uninstalled pip-25.0
Successfully installed pip-25.1.1




In [7]:

!pip install yfinance pandas numpy matplotlib transformers streamlit






In [10]:
!pip install quantstats


Collecting quantstats
  Using cached quantstats-0.0.64-py2.py3-none-any.whl.metadata (9.3 kB)
Collecting tabulate>=0.8.9 (from quantstats)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting yfinance>=0.2.65 (from quantstats)
  Downloading yfinance-0.2.65-py2.py3-none-any.whl.metadata (5.8 kB)
Downloading quantstats-0.0.64-py2.py3-none-any.whl (78 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Downloading yfinance-0.2.65-py2.py3-none-any.whl (119 kB)
Installing collected packages: tabulate, yfinance, quantstats

   ---------------------------------------- 0/3 [tabulate]
   ---------------------------------------- 0/3 [tabulate]
  Attempting uninstall: yfinance
   ---------------------------------------- 0/3 [tabulate]
    Found existing installation: yfinance 0.2.63
   ---------------------------------------- 0/3 [tabulate]
    Uninstalling yfinance-0.2.63:
   ---------------------------------------- 0/3 [tabulate]
      Successfully uninstalled yfinance-



In [11]:
import yfinance as yf
import pandas as pd
import numpy as np
import torch
import transformers
from transformers import BertTokenizer, BertForSequenceClassification
import torch.nn.functional as F
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import streamlit as st
import quantstats as qs

In [14]:
import yfinance as yf
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification
import torch.nn.functional as F
import torch

def load_market_data(symbols, start_date="2015-01-01", end_date="2024-01-01"):
    # Download data
    raw = yf.download(symbols, start=start_date, end=end_date, group_by="ticker", auto_adjust=True)

    if isinstance(raw.columns, pd.MultiIndex):
        raw = raw[symbols[0]].copy()

    raw = raw.reset_index()

    # Technical indicators
    raw['MA_18'] = raw['Close'].rolling(window=18).mean()  # changed from SMA 20
    raw['RSI_15'] = raw['Close'].rolling(window=15).apply(
        lambda win: 100 - (100 / (1 + (win.diff().clip(lower=0).sum() / (-win.diff().clip(upper=0).sum() + 1))))
    )
    raw['Daily_Return'] = raw['Close'].pct_change()

    # FinBERT Sentiment Analysis (dummy headlines)
    tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
    model = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone')

    dummy_news = ["Market outlook remains optimistic amid economic signals"] * len(raw)
    sentiments = []

    for sentence in dummy_news:
        enc = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True)
        out = model(**enc)
        prob = F.softmax(out.logits, dim=1)
        sentiments.append((prob[:, 2] - prob[:, 0]).item())  # Positive - Negative

    raw['Sentiment_Index'] = sentiments
    raw.dropna(inplace=True)

    return raw


def clean_preprocessed(df):
    df = df.copy()
    df.dropna(inplace=True)
    return df


In [16]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import torch.nn.functional as F

class StockSentimentEnv(gym.Env):
    metadata = {"render.modes": ["human"]}

    def __init__(self, market_data, starting_funds=10000):
        super(StockSentimentEnv, self).__init__()
        self.market_data = market_data.reset_index(drop=True)
        self.initial_cash = starting_funds

        # 🧠 Action choices: 0 = hold, 1 = buy, 2 = sell
        self.action_space = spaces.Discrete(3)

        # 🎯 Observation: [Close, MA, RSI, Cash, Holdings, Sentiment]
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(6,), dtype=np.float32
        )

        # Load FinBERT sentiment model
        self.tokenizer = BertTokenizer.from_pretrained("yiyanghkust/finbert-tone")
        self.sentiment_model = BertForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.sentiment_model.to(self.device)

    def _extract_sentiment(self, text=None):
        if not text:
            return 0.0  # fallback if no headline exists

        encoded = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        encoded = {key: val.to(self.device) for key, val in encoded.items()}

        with torch.no_grad():
            results = self.sentiment_model(**encoded)
            probs = F.softmax(results.logits, dim=-1)
            return (probs[0][2] - probs[0][0]).item()  # positive - negative

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.step_index = 0
        self.cash = self.initial_cash
        self.total_value = self.initial_cash
        self.shares = 0
        self.units_sold = 0
        self.sale_revenue = 0

        sentiment_score = self._extract_sentiment()

        obs = np.array([
            self.market_data.loc[self.step_index, "Close"],
            self.market_data.loc[self.step_index, "MA_18"],   # was SMA
            self.market_data.loc[self.step_index, "RSI_15"],  # was RSI
            self.cash,
            self.shares,
            sentiment_score
        ], dtype=np.float32)

        info = {}
        return obs, info

    def step(self, action):
        self._process_action(action)
        self.step_index += 1

        is_done = self.step_index >= len(self.market_data) - 1
        truncated = False

        headline = self.market_data.loc[self.step_index, "Headline"] if "Headline" in self.market_data.columns else ""
        sentiment_score = self._extract_sentiment(headline)

        obs = np.array([
            self.market_data.loc[self.step_index, "Close"],
            self.market_data.loc[self.step_index, "MA_18"],
            self.market_data.loc[self.step_index, "RSI_15"],
            self.cash,
            self.shares,
            sentiment_score
        ], dtype=np.float32)

        reward = self.total_value - self.initial_cash

        info = {
            "portfolio_value": self.total_value,
            "cash": self.cash,
            "shares": self.shares
        }

        return obs, reward, is_done, truncated, info

    def _process_action(self, action):
        price_now = self.market_data.loc[self.step_index, "Close"]

        if action == 1 and self.cash > price_now:
            self.shares += 1
            self.cash -= price_now

        elif action == 2 and self.shares > 0:
            self.shares -= 1
            self.cash += price_now
            self.units_sold += 1
            self.sale_revenue += price_now

        self.total_value = self.cash + self.shares * price_now

    def render(self, mode="human"):
        print(f"[Step {self.step_index}] 💰 Cash: ₹{self.cash:.2f} | 📈 Holdings: {self.shares} | 💼 Net Worth: ₹{self.total_value:.2f}")

    def close(self):
        pass



In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# 🧾 Load data
symbols = ["AAPL"]
raw_df = load_market_data(symbols)
final_df = clean_preprocessed(raw_df)

# 📰 Add dummy headlines for sentiment (so FinBERT won't crash)
final_df["Headline"] = "The stock market is stable."  # You can change this later

# 🧠 Initialize environment and train agent
vec_env = DummyVecEnv([lambda: StockSentimentEnv(final_df)])
ppo_agent = PPO("MlpPolicy", vec_env, verbose=1)
ppo_agent.learn(total_timesteps=10000)

# 📈 Test trained model and record performance
portfolio_log = []
test_env = StockSentimentEnv(final_df)
obs, _ = test_env.reset()
finished = False

while not finished:
    action, _ = ppo_agent.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = test_env.step(action)
    portfolio_log.append(info['portfolio_value'])
    finished = terminated or truncated

# ➕ Add portfolio results to DataFrame
final_df["Portfolio_Value"] = pd.Series(portfolio_log)

# 💾 Save trained model
ppo_agent.save("ppo_stock_agent")


[*********************100%***********************]  1 of 1 completed
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [None]:
import quantstats as qs

# 📈 Prepare daily returns from your processed data
price_returns = final_df.set_index("Date")["Daily_Return"].iloc[1:]

# 📊 Generate QuantStats performance report
qs.reports.full(price_returns, title="RL Agent Portfolio Analysis")
