In [1]:
import os
import time
import requests
import pandas as pd
import numpy as np

class StockDataHandler:
    def __init__(self, tickers, api_key, outputsize="full", cache_dir="cached_data"):
        self.tickers = tickers
        self.api_key = api_key
        self.outputsize = outputsize  # Use "full" to get long-term data
        self.cache_dir = cache_dir
        os.makedirs(cache_dir, exist_ok=True)  # Ensure cache directory exists
        self.data = {}

    def fetch_data(self):
        """Fetch and cache 10 years of stock data from Alpha Vantage."""
        base_url = "https://www.alphavantage.co/query"
    
        for ticker in self.tickers:
            cache_file = os.path.join(self.cache_dir, f"{ticker}.csv")
    
            # ✅ **Check Cache First**
            if os.path.exists(cache_file):
                print(f"📂 Loading cached data for {ticker}...")
                df = pd.read_csv(cache_file, index_col=0, parse_dates=True).squeeze("columns")  # 🔥 **Fix Here**
                recent_date = df.index.max()
    
                # **If cached data is recent, skip API call**
                if recent_date >= pd.Timestamp.today() - pd.DateOffset(days=7):  
                    self.data[ticker] = df  # ✅ Store properly formatted data
                    continue
    
            print(f"🌐 Fetching 10 years of data for {ticker} from API...")
            params = {
                "function": "TIME_SERIES_DAILY",
                "symbol": ticker,
                "outputsize": self.outputsize,  # "full" gets 20+ years
                "datatype": "json",
                "apikey": self.api_key
            }
    
            response = requests.get(base_url, params=params)
            data = response.json()
    
            if "Time Series (Daily)" not in data:
                print(f"❌ Error fetching {ticker}: {data.get('Note', 'No data available.')}")
                continue
    
            # ✅ Convert API Response to DataFrame
            df = pd.DataFrame.from_dict(data["Time Series (Daily)"], orient="index")
            df.index = pd.to_datetime(df.index)
            df = df.sort_index()
    
            # ✅ Keep Only the Last **10 Years**
            ten_years_ago = pd.Timestamp.today() - pd.DateOffset(years=10)
            df = df[df.index >= ten_years_ago]
    
            # ✅ Store Only "Close" Prices & Save to Cache
            df = df["4. close"].astype(float).squeeze("columns")  # 🔥 **Ensure 1D**
            df.to_csv(cache_file)
            self.data[ticker] = df  # ✅ Store correctly
    
            # 🕒 **Wait to avoid hitting API limit**
            time.sleep(12)  # Alpha Vantage allows ~5 calls/min


    def get_data(self):
        """Return stock data as a pandas DataFrame."""
        if not self.data:
            raise ValueError("No valid stock data found. Check API key and ticker symbols.")
    
        # Convert each ticker's cached data into a Pandas Series and flatten if needed
        for ticker in self.data:
            if isinstance(self.data[ticker], np.ndarray):
                self.data[ticker] = self.data[ticker].flatten()  # Ensure 1D
            if not isinstance(self.data[ticker], pd.Series):
                self.data[ticker] = pd.Series(self.data[ticker])  
    
        df = pd.DataFrame(self.data)
        df.dropna(how="all", inplace=True)  # Drop rows where all tickers have NaN
    
        if df.empty:
            raise ValueError("Stock data is empty after processing. Check cache or API response.")
    
        return df




# ✅ **Example Usage**
if __name__ == "__main__":
    tickers = ["AAPL", "GOOGL", "MSFT", "TSLA"]
    api_key = "I3QFMBJXM0I1YYSF"  # 🔑 Replace with your Alpha Vantage API key

    data_handler = StockDataHandler(tickers, api_key, outputsize="full")
    data_handler.fetch_data()

    try:
        df = data_handler.get_data()
        print(df.head())  # 📊 Preview stock data
    except ValueError as e:
        print(f"Error: {e}")


📂 Loading cached data for AAPL...
📂 Loading cached data for GOOGL...
📂 Loading cached data for MSFT...
📂 Loading cached data for TSLA...
              AAPL    GOOGL   MSFT    TSLA
2015-02-02  118.63  532.200  41.28  210.94
2015-02-03  118.65  533.300  41.60  218.36
2015-02-04  119.56  526.100  41.84  218.55
2015-02-05  119.94  529.830  42.45  220.99
2015-02-06  118.93  533.875  42.41  217.36


In [2]:
import gym
import numpy as np
from gym import spaces

class StockTradingEnv(gym.Env):
    def __init__(self, tickers, stock_data, initial_cash=10000):
        super(StockTradingEnv, self).__init__()
        self.tickers = [t for t in tickers if t in stock_data.columns]  # Only use tickers with data
        self.df = stock_data[self.tickers].copy().sort_index()  # Ensure oldest data first
        self.initial_cash = initial_cash
        self.cash = initial_cash
        self.shares = {ticker: 0 for ticker in self.tickers}
        self.current_step = 0

        # ✅ Check if we have data before proceeding
        if self.df.empty:
            raise ValueError("❌ No stock data available in the DataFrame!")

        # ✅ Define action and observation spaces
        self.action_space = spaces.MultiDiscrete([3] * len(self.tickers))  # 0 = Hold, 1 = Buy, 2 = Sell
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(len(self.tickers) * 2 + 1,), dtype=np.float32)

    def reset(self):
        """Reset environment to initial state."""
        self.cash = self.initial_cash
        self.shares = {ticker: 0 for ticker in self.tickers}
        self.current_step = 0
        return self._get_obs()

    def step(self, action):
        """Execute a step in the environment based on the action taken for multiple stocks."""
        if self.current_step >= len(self.df) - 1:
            return self._get_obs(), 0, True, {}  # Return last state if we're out of data

        prices = self.df.iloc[self.current_step]

        for i, ticker in enumerate(self.tickers):
            if action[i] == 1 and self.cash >= prices[ticker]:  # Buy stock
                self.shares[ticker] += 1
                self.cash -= prices[ticker]
            elif action[i] == 2 and self.shares[ticker] > 0:  # Sell stock
                self.shares[ticker] -= 1
                self.cash += prices[ticker]

        # Move to next step
        self.current_step += 1
        done = self.current_step >= len(self.df) - 1

        # ✅ Reward = Portfolio Value Change (to encourage profit)
        portfolio_value = self.cash + sum(self.shares[t] * prices[t] for t in self.tickers)
        reward = portfolio_value - self.initial_cash  # Profit = reward

        return self._get_obs(), reward, done, {}

    def _get_obs(self):
        """Return observation: cash balance + stock prices + shares owned for all stocks."""
        prices = np.array([self.df.iloc[self.current_step][t] for t in self.tickers], dtype=np.float32)
        holdings = np.array([self.shares[t] for t in self.tickers], dtype=np.float32)

        obs = np.concatenate(([self.cash], prices, holdings), axis=0)
        return obs

    def render(self):
        """Prints current cash, stock holdings, and total portfolio value."""
        portfolio_value = self.cash + sum(self.shares[t] * self.df.iloc[self.current_step][t] for t in self.tickers)

        print(f"📊 Step: {self.current_step}")
        print(f"💰 Cash: ${self.cash:.2f}")
        print("📈 Holdings:", {t: self.shares[t] for t in self.tickers})
        print(f"📉 Total Portfolio Value: ${portfolio_value:.2f}")

    def seed(self, seed=None):
        """Sets the random seed for reproducibility."""
        np.random.seed(seed)

In [3]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
#from data_handler import StockDataHandler
#from trading_env import StockTradingEnv

# ✅ Step 1: Load or Fetch Stock Data
tickers = ["AAPL", "GOOGL", "MSFT", "TSLA"]
api_key = "I3QFMBJXM0I1YYSF"

data_handler = StockDataHandler(tickers, api_key, outputsize="full")

try:
    df = data_handler.get_data()  # Try loading cached data
except ValueError:
    data_handler.fetch_data()  # Fetch new data if cache is missing
    df = data_handler.get_data()

# ✅ Step 2: Create the RL Training Environment
env = make_vec_env(lambda: StockTradingEnv(tickers, df), n_envs=1)

# ✅ Step 3: Train the AI Model
model = PPO(
    "MlpPolicy",           # MLP (Multi-Layer Perceptron) policy network
    env,                    # The environment you're training in
    learning_rate=2.5e-4,   # The learning rate of the optimizer
    n_steps=2048,           # The number of steps to run for each environment per update
    batch_size=64,          # The batch size used for training
    n_epochs=10,            # Number of epochs per update (how many times to optimize the model on the sampled data)
    gamma=0.99,             # The discount factor for future rewards
    gae_lambda=0.95,        # The factor for Generalized Advantage Estimation (GAE)
    ent_coef=0.01,          # Coefficient for the entropy bonus (encourages exploration)
    vf_coef=0.5,            # Coefficient for the value function loss
    max_grad_norm=0.5,      # Gradient clipping to prevent gradient explosion
    clip_range=0.2,         # The clip range for the PPO objective to stabilize training
    target_kl=0.01,         # Target KL divergence for early stopping
    verbose=1,              # Display training information
    tensorboard_log="./ppo_tensorboard", # Optional: Log to TensorBoard for monitoring
    seed=42,                # For reproducibility, set the random seed
)
model.learn(total_timesteps=500000)  # Adjust timesteps for better learning
model.save("stock_trading_ppo")

# ✅ Step 4: Load the Trained Model for Evaluation or Further Training
model = PPO.load("stock_trading_ppo")



📂 Loading cached data for AAPL...
📂 Loading cached data for GOOGL...
📂 Loading cached data for MSFT...
📂 Loading cached data for TSLA...
Using cpu device




Logging to ./ppo_tensorboard\PPO_2
-----------------------------
| time/              |      |
|    fps             | 1464 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.51e+03      |
|    ep_rew_mean          | 1.71e+07      |
| time/                   |               |
|    fps                  | 1153          |
|    iterations           | 2             |
|    time_elapsed         | 3             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 1.3405806e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -4.39         |
|    explained_variance   | 1.19e-07      |
|    learning_rate        | 0.00025       |
|    loss                 | 7.01e+09      |
|    n_

In [4]:
from stable_baselines3 import PPO

class TradingAgent:
    def __init__(self, model_path="stock_trading_ppo"):
        """Initialize the trading agent with a trained PPO model."""
        try:
            self.model = PPO.load(model_path)  # ✅ Load trained AI model
            print("✅ Successfully loaded AI trading model.")
        except Exception as e:
            print(f"❌ Error loading model: {e}")
            self.model = None  # Ensure model is None if loading fails

    def act(self, obs):
        """Use the trained RL model to make decisions instead of random choices."""
        if self.model is None:
            raise ValueError("❌ No trained AI model found. Ensure PPO model is trained and saved.")

        action, _ = self.model.predict(obs)  # ✅ Predict using PPO
        return action




In [26]:
# Load stock data (Use Cached Data if Available)
tickers = ["AAPL", "GOOGL", "MSFT", "TSLA"]
api_key = "I3QFMBJXM0I1YYSF"  # Replace with your real API key
data_handler = StockDataHandler(tickers, api_key, outputsize="full")

# ✅ Check if cached data exists, otherwise fetch new data
try:
    df = data_handler.get_data()  # Load cached data
except ValueError:
    data_handler.fetch_data()  # Fetch new data
    df = data_handler.get_data()

# ✅ Create Trading Environment
env = StockTradingEnv(tickers, df)
agent = TradingAgent("stock_trading_ppo")  # Change to "llm" to use an LLM-powered agent

# ✅ Run Simulation
obs = env.reset()
for _ in range(1000):
    action = agent.act(obs)  # Get action from agent
    obs, reward, done, _ = env.step(action)  # Take a step
    env.render()  # Show results
    if done:
        break

# ✅ Calculate Final Portfolio Value
final_cash = env.cash
portfolio_value = final_cash + sum(env.shares[t] * df.iloc[-1][t] for t in tickers)

print("\n==== 📊 Final Results ====")
print(f"💰 Final Cash: ${final_cash:.2f}")
print(f"📈 Total Portfolio Value: ${portfolio_value:.2f}")
print(f"💵 Profit/Loss: ${portfolio_value - env.initial_cash:.2f}")
print(f"📅 Trading Days Simulated: {env.current_step}")


📂 Loading cached data for AAPL...
📂 Loading cached data for GOOGL...
📂 Loading cached data for MSFT...
📂 Loading cached data for TSLA...
✅ Successfully loaded AI trading model.
📊 Step: 1
💰 Cash: $10000.00
📈 Holdings: {'AAPL': 0, 'GOOGL': 0, 'MSFT': 0, 'TSLA': 0}
📉 Total Portfolio Value: $10000.00
📊 Step: 2
💰 Cash: $9348.05
📈 Holdings: {'AAPL': 1, 'GOOGL': 1, 'MSFT': 0, 'TSLA': 0}
📉 Total Portfolio Value: $9993.71
📊 Step: 3
💰 Cash: $9874.15
📈 Holdings: {'AAPL': 1, 'GOOGL': 0, 'MSFT': 0, 'TSLA': 0}
📉 Total Portfolio Value: $9994.09
📊 Step: 4
💰 Cash: $9773.10
📈 Holdings: {'AAPL': 0, 'GOOGL': 0, 'MSFT': 0, 'TSLA': 1}
📉 Total Portfolio Value: $9990.46
📊 Step: 5
💰 Cash: $9555.74
📈 Holdings: {'AAPL': 0, 'GOOGL': 0, 'MSFT': 0, 'TSLA': 2}
📉 Total Portfolio Value: $9990.70
📊 Step: 6
💰 Cash: $9513.38
📈 Holdings: {'AAPL': 0, 'GOOGL': 0, 'MSFT': 1, 'TSLA': 2}
📉 Total Portfolio Value: $9988.56
📊 Step: 7
💰 Cash: $8799.53
📈 Holdings: {'AAPL': 0, 'GOOGL': 1, 'MSFT': 0, 'TSLA': 3}
📉 Total Portfolio Valu