# Project 3: Stock Trading Baseline

Train an RL agent to trade stocks using historical data.

**Runtime:** ~2 minutes for baseline

## Setup

In [None]:
!pip install stable-baselines3 gym-anytrading pandas matplotlib yfinance -q

In [None]:
import gymnasium as gym
import gym_anytrading
from gym_anytrading.envs import StocksEnv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import DummyVecEnv
import yfinance as yf

## Load Historical Stock Data

Download Apple (AAPL) stock data from Yahoo Finance:

In [None]:
# Download data
ticker = 'AAPL'
start_date = '2020-01-01'
end_date = '2023-12-31'

print(f"Downloading {ticker} data from {start_date} to {end_date}...")
df = yf.download(ticker, start=start_date, end=end_date, progress=False)

# Prepare data
df = df[['Open', 'High', 'Low', 'Close', 'Volume']]
df = df.dropna()

print(f"\nData loaded: {len(df)} days")
print(f"Date range: {df.index[0]} to {df.index[-1]}")

# Show sample
print(f"\nFirst 5 days:")
print(df.head())

In [None]:
# Plot price history
plt.figure(figsize=(12, 5))
plt.plot(df.index, df['Close'])
plt.title(f'{ticker} Stock Price History')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"\nPrice range: ${df['Close'].min():.2f} - ${df['Close'].max():.2f}")
print(f"Buy-and-hold return: {((df['Close'].iloc[-1] / df['Close'].iloc[0]) - 1) * 100:.1f}%")

## Split Data: Train/Test

Use first 80% for training, last 20% for testing:

In [None]:
# Split data
split_idx = int(len(df) * 0.8)
train_df = df.iloc[:split_idx]
test_df = df.iloc[split_idx:]

print(f"Training data: {len(train_df)} days ({train_df.index[0]} to {train_df.index[-1]})")
print(f"Test data: {len(test_df)} days ({test_df.index[0]} to {test_df.index[-1]})")

## Create Trading Environment

Environment features:
- **Actions**: 0=sell, 1=buy, 2=hold
- **Observation**: OHLC prices + position
- **Reward**: Profit from price changes

In [None]:
# Create environment
env = gym.make(
    'stocks-v0',
    df=train_df,
    frame_bound=(5, len(train_df)),
    window_size=5
)

print(f"Environment created:")
print(f"  Observation space: {env.observation_space.shape}")
print(f"    (5 days x 5 features: OHLCV)")
print(f"  Action space: {env.action_space}")
print(f"    0=sell, 1=buy")

## Train Trading Agent

In [None]:
# Wrap environment
env = DummyVecEnv([lambda: gym.make('stocks-v0', df=train_df, 
                                     frame_bound=(5, len(train_df)), 
                                     window_size=5)])

# Create A2C agent
model = A2C(
    'MlpPolicy',
    env,
    learning_rate=7e-4,
    gamma=0.99,
    verbose=1,
    seed=42
)

print("\nTraining agent (~2 minutes)...\n")
model.learn(total_timesteps=50_000)
print("\nTraining complete!")

## Backtest on Test Data

In [None]:
# Create test environment
test_env = gym.make(
    'stocks-v0',
    df=test_df,
    frame_bound=(5, len(test_df)),
    window_size=5
)

# Run agent on test data
obs, _ = test_env.reset()
done = False
actions = []
rewards = []

while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = test_env.step(action)
    done = terminated or truncated
    
    actions.append(action)
    rewards.append(reward)

# Calculate metrics
total_reward = sum(rewards)
total_return = (test_env.unwrapped.total_profit / test_env.unwrapped.initial_balance) * 100

print(f"\n{'='*50}")
print(f"Backtest Results:")
print(f"{'='*50}")
print(f"Total reward: {total_reward:.2f}")
print(f"Total return: {total_return:.2f}%")
print(f"Final balance: ${test_env.unwrapped.balance:.2f}")
print(f"Number of trades: {len(actions)}")
print(f"{'='*50}")

## Visualize Trading Strategy

In [None]:
# Plot results
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

# Plot 1: Price and trades
prices = test_df['Close'].values[5:]  # Skip window
dates = test_df.index[5:]

axes[0].plot(dates, prices, label='Price', alpha=0.7)

# Mark buy/sell signals
buy_signals = [i for i, a in enumerate(actions) if a == 1]
sell_signals = [i for i, a in enumerate(actions) if a == 0]

if buy_signals:
    axes[0].scatter([dates[i] for i in buy_signals], 
                    [prices[i] for i in buy_signals], 
                    color='green', marker='^', s=100, label='Buy', zorder=5)

if sell_signals:
    axes[0].scatter([dates[i] for i in sell_signals], 
                    [prices[i] for i in sell_signals], 
                    color='red', marker='v', s=100, label='Sell', zorder=5)

axes[0].set_ylabel('Price ($)')
axes[0].set_title('Trading Strategy on Test Data')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot 2: Cumulative rewards
cumulative_rewards = np.cumsum(rewards)
axes[1].plot(dates, cumulative_rewards, label='Agent', color='blue')
axes[1].axhline(y=0, color='black', linestyle='--', alpha=0.5)
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Cumulative Reward')
axes[1].set_title('Agent Performance Over Time')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Calculate buy-and-hold for comparison
bah_return = ((test_df['Close'].iloc[-1] / test_df['Close'].iloc[0]) - 1) * 100
print(f"\nComparison:")
print(f"  Agent return: {total_return:.2f}%")
print(f"  Buy-and-hold return: {bah_return:.2f}%")

if total_return > bah_return:
    print(f"  ✓ Agent outperformed buy-and-hold!")
else:
    print(f"  Agent underperformed. Try:")
    print(f"    - Training longer")
    print(f"    - Adding features (technical indicators)")
    print(f"    - Tuning hyperparameters")

## Next Steps

Improve your trading agent:

1. **Add features:** Technical indicators (RSI, MACD, moving averages)
2. **Transaction costs:** Make it more realistic
3. **Risk management:** Add position sizing and stop-losses
4. **Multiple stocks:** Build a portfolio strategy
5. **Different algorithms:** Try DQN or PPO

See `project3_trading_README.md` for detailed ideas!

**⚠️ Remember:** This is for education only. Not financial advice!