# Enhanced RL Portfolio Training with Sentiment Features

This notebook trains the enhanced PPO agent with sentiment features on Google Colab GPU.

**Training Configuration:**
- 1.5M timesteps (~25-45 minutes on T4 GPU)
- 181-dim state space (10 tech + 6 sentiment features)
- Larger network architecture [128, 128]

**Steps:**
1. Clone repository and install dependencies
2. Upload or download data files
3. Train enhanced model
4. Evaluate and compare with baseline
5. Download trained model

## 1. Setup Environment

In [None]:
# Check GPU availability
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Clone repository
!git clone https://github.com/nimeshk03/enhanced-rl-portfolio.git
%cd enhanced-rl-portfolio

In [None]:
# Install dependencies
!pip install -q stable-baselines3[extra] gymnasium pandas numpy yfinance tensorboard

## 2. Upload Data Files

Upload the following files to the `data/` directory:
- `processed_data.csv` - Price data with technical indicators
- `historical_sentiment_complete.csv` - Sentiment data

In [None]:
# Create data directory
!mkdir -p data

# Option 1: Upload files manually using Colab file browser
# Option 2: Download from Google Drive (uncomment below)
# from google.colab import drive
# drive.mount('/content/drive')
# !cp /content/drive/MyDrive/rl_portfolio_data/*.csv data/

In [None]:
# Upload files using Colab uploader
from google.colab import files
import os

# Check if files exist
price_exists = os.path.exists('data/processed_data.csv')
sentiment_exists = os.path.exists('data/historical_sentiment_complete.csv')

if not price_exists or not sentiment_exists:
    print("Please upload the data files:")
    print("  1. processed_data.csv")
    print("  2. historical_sentiment_complete.csv")
    uploaded = files.upload()
    
    # Move to data directory
    for filename in uploaded.keys():
        !mv {filename} data/
else:
    print("Data files already exist!")

In [None]:
# Verify data files
import pandas as pd

price_df = pd.read_csv('data/processed_data.csv')
sentiment_df = pd.read_csv('data/historical_sentiment_complete.csv')

print(f"Price data: {len(price_df)} records")
print(f"  Date range: {price_df['date'].min()} to {price_df['date'].max()}")
print(f"  Tickers: {price_df['tic'].nunique()}")
print(f"\nSentiment data: {len(sentiment_df)} records")
print(f"  Date range: {sentiment_df['date'].min()} to {sentiment_df['date'].max()}")

## 3. Train Enhanced Model

In [None]:
import os
import sys
import json
from datetime import datetime
import numpy as np
import pandas as pd

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.monitor import Monitor

# Add project to path
sys.path.insert(0, '.')

from src.env.enhanced_portfolio_env import EnhancedPortfolioEnv
from src.data.enhanced_processor import EnhancedDataProcessor, ProcessorConfig

print("Imports successful!")

In [None]:
# Training Configuration
EXPERIMENT_NAME = "enhanced_v1_colab"
TOTAL_TIMESTEPS = 1_500_000  # Full training

# Time periods
TRAIN_START = "2015-01-01"
TRAIN_END = "2024-06-30"
TEST_START = "2024-07-01"
TEST_END = "2025-11-30"

# Environment config
ENV_CONFIG = {
    "hmax": 100,
    "initial_amount": 100000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "reward_scaling": 1e-4,
    "sentiment_reward_weight": 0.0,
}

# PPO config (optimized for sentiment features)
PPO_CONFIG = {
    "learning_rate": 1e-4,
    "n_steps": 2048,
    "batch_size": 64,
    "n_epochs": 10,
    "gamma": 0.99,
    "gae_lambda": 0.95,
    "clip_range": 0.2,
    "ent_coef": 0.05,
    "vf_coef": 0.5,
    "max_grad_norm": 0.5,
    "verbose": 1,
}

# Network architecture
POLICY_KWARGS = {
    "net_arch": dict(pi=[128, 128], vf=[128, 128]),
}

print(f"Experiment: {EXPERIMENT_NAME}")
print(f"Timesteps: {TOTAL_TIMESTEPS:,}")

In [None]:
# Load and process data
print("Loading and processing data...")

config = ProcessorConfig(
    normalize_features=True,
    normalization_window=60,
)

processor = EnhancedDataProcessor(
    price_path='data/processed_data.csv',
    sentiment_path='data/historical_sentiment_complete.csv',
    config=config,
)

train_df, test_df = processor.get_train_test_split(
    train_end=TRAIN_END,
    test_start=TEST_START,
    test_end=TEST_END,
)

feature_info = processor.get_feature_info()

print(f"\nTrain: {len(train_df)} records")
print(f"Test: {len(test_df)} records")
print(f"Features: {feature_info['n_tech_indicators']} tech + {feature_info['n_sentiment_features']} sentiment")

In [None]:
# Create environments
def prepare_df_for_env(df):
    """Add day index for environment."""
    df = df.copy()
    df = df.sort_values(['date', 'tic']).reset_index(drop=True)
    dates = sorted(df['date'].unique())
    date_to_day = {date: i for i, date in enumerate(dates)}
    df['day'] = df['date'].map(date_to_day)
    return df.set_index('day')

train_df_indexed = prepare_df_for_env(train_df)
test_df_indexed = prepare_df_for_env(test_df)

# Create training environment
train_env = EnhancedPortfolioEnv(
    df=train_df_indexed,
    stock_dim=feature_info['n_tickers'],
    tech_indicator_list=feature_info['tech_indicators'],
    sentiment_feature_list=feature_info['sentiment_features'],
    include_sentiment=True,
    print_verbosity=0,
    **ENV_CONFIG,
)

# Create test environment
test_env = EnhancedPortfolioEnv(
    df=test_df_indexed,
    stock_dim=feature_info['n_tickers'],
    tech_indicator_list=feature_info['tech_indicators'],
    sentiment_feature_list=feature_info['sentiment_features'],
    include_sentiment=True,
    print_verbosity=0,
    **ENV_CONFIG,
)

print(f"State space: {train_env.state_space}")
print(f"Action space: {train_env.action_space.shape[0]} stocks")

In [None]:
# Evaluate on test period (FIXED: use env directly, not wrapper)
print("Evaluating on test period...")

# Reset test environment and run evaluation directly
obs, info = test_env.reset()
done = False

while not done:
    # Model expects batch dimension
    action, _ = model.predict(obs.reshape(1, -1), deterministic=True)
    action = action[0]  # Remove batch dimension
    obs, reward, done, truncated, info = test_env.step(action)

# Get statistics from the actual environment used
stats = test_env.get_portfolio_stats()

print("\n" + "="*50)
print("TEST PERIOD RESULTS")
print("="*50)
print(f"Final Portfolio Value: ${stats['final_value']:,.2f}")
print(f"Total Return: {stats['total_return']*100:.2f}%")
print(f"Sharpe Ratio: {stats['sharpe_ratio']:.3f}")
print(f"Max Drawdown: {stats['max_drawdown']*100:.2f}%")
print(f"Total Trades: {stats['total_trades']}")
print(f"Transaction Costs: ${stats['total_cost']:,.2f}")

In [None]:
# Save the trained model
model_path = f'models/ppo_{EXPERIMENT_NAME}'
model.save(model_path)
print(f"Model saved to {model_path}.zip")

# Optional: Train baseline without sentiment for comparison
TRAIN_BASELINE = True  # Set to True to train baseline

if TRAIN_BASELINE:
    print("="*70)
    print("TRAINING BASELINE MODEL (NO SENTIMENT)")
    print("="*70)
    
    # Create baseline environment (no sentiment)
    baseline_train_env = EnhancedPortfolioEnv(
        df=train_df_indexed,
        stock_dim=feature_info['n_tickers'],
        tech_indicator_list=feature_info['tech_indicators'],
        sentiment_feature_list=[],
        include_sentiment=False,
        print_verbosity=0,
        **ENV_CONFIG,
    )
    
    baseline_test_env = EnhancedPortfolioEnv(
        df=test_df_indexed,
        stock_dim=feature_info['n_tickers'],
        tech_indicator_list=feature_info['tech_indicators'],
        sentiment_feature_list=[],
        include_sentiment=False,
        print_verbosity=0,
        **ENV_CONFIG,
    )
    
    print(f"Baseline state space: {baseline_train_env.state_space}")
    
    # Train baseline
    baseline_env_wrapped = DummyVecEnv([lambda: Monitor(baseline_train_env)])
    
    baseline_model = PPO(
        "MlpPolicy",
        baseline_env_wrapped,
        policy_kwargs=POLICY_KWARGS,
        **PPO_CONFIG,
    )
    
    baseline_model.learn(total_timesteps=TOTAL_TIMESTEPS, progress_bar=True)
    baseline_model.save('models/ppo_baseline_v1_colab')
    
    # Evaluate baseline (FIXED: use env directly)
    obs, info = baseline_test_env.reset()
    done = False
    while not done:
        action, _ = baseline_model.predict(obs.reshape(1, -1), deterministic=True)
        action = action[0]
        obs, reward, done, truncated, info = baseline_test_env.step(action)
    
    baseline_stats = baseline_test_env.get_portfolio_stats()
    
    print("\nBASELINE RESULTS:")
    print(f"  Sharpe Ratio: {baseline_stats['sharpe_ratio']:.3f}")
    print(f"  Total Return: {baseline_stats['total_return']*100:.2f}%")

In [None]:
# Load the trained model (uncomment if loading a previously saved model)
# model = PPO.load('models/ppo_enhanced_v1_colab')
# print("Model loaded successfully!")

# Create a fresh test environment for evaluation
eval_test_env = EnhancedPortfolioEnv(
    df=test_df_indexed,
    stock_dim=feature_info['n_tickers'],
    tech_indicator_list=feature_info['tech_indicators'],
    sentiment_feature_list=feature_info['sentiment_features'],
    include_sentiment=True,
    print_verbosity=0,
    **ENV_CONFIG,
)

# Run evaluation
print("Evaluating on test period...")
obs, info = eval_test_env.reset()
done = False

while not done:
    action, _ = model.predict(obs.reshape(1, -1), deterministic=True)
    action = action[0]
    obs, reward, done, truncated, info = eval_test_env.step(action)

# Get results
stats = eval_test_env.get_portfolio_stats()

print("\n" + "="*50)
print("TEST PERIOD RESULTS")
print("="*50)
print(f"Final Portfolio Value: ${stats['final_value']:,.2f}")
print(f"Total Return: {stats['total_return']*100:.2f}%")
print(f"Sharpe Ratio: {stats['sharpe_ratio']:.3f}")
print(f"Max Drawdown: {stats['max_drawdown']*100:.2f}%")
print(f"Total Trades: {stats['total_trades']}")
print(f"Transaction Costs: ${stats['total_cost']:,.2f}")

In [None]:
# Save experiment results
results = {
    "experiment_name": EXPERIMENT_NAME,
    "timestamp": datetime.now().isoformat(),
    "training_time": str(training_time),
    "total_timesteps": TOTAL_TIMESTEPS,
    "config": {
        "env_config": ENV_CONFIG,
        "ppo_config": PPO_CONFIG,
        "policy_kwargs": POLICY_KWARGS,
    },
    "feature_info": feature_info,
    "results": {
        "final_value": float(stats['final_value']),
        "total_return": float(stats['total_return']),
        "sharpe_ratio": float(stats['sharpe_ratio']),
        "max_drawdown": float(stats['max_drawdown']),
        "total_trades": int(stats['total_trades']),
        "total_cost": float(stats['total_cost']),
    },
}

with open(f'experiments/{EXPERIMENT_NAME}/results.json', 'w') as f:
    json.dump(results, f, indent=2)

print(f"Results saved to experiments/{EXPERIMENT_NAME}/results.json")

## 5. Train Baseline (Without Sentiment) for Comparison

In [None]:
# Optional: Train baseline without sentiment for comparison
TRAIN_BASELINE = True  # Set to True to train baseline

if TRAIN_BASELINE:
    print("="*70)
    print("TRAINING BASELINE MODEL (NO SENTIMENT)")
    print("="*70)
    
    # Create baseline environment (no sentiment)
    baseline_train_env = EnhancedPortfolioEnv(
        df=train_df_indexed,
        stock_dim=feature_info['n_tickers'],
        tech_indicator_list=feature_info['tech_indicators'],
        sentiment_feature_list=[],
        include_sentiment=False,
        print_verbosity=0,
        **ENV_CONFIG,
    )
    
    baseline_test_env = EnhancedPortfolioEnv(
        df=test_df_indexed,
        stock_dim=feature_info['n_tickers'],
        tech_indicator_list=feature_info['tech_indicators'],
        sentiment_feature_list=[],
        include_sentiment=False,
        print_verbosity=0,
        **ENV_CONFIG,
    )
    
    print(f"Baseline state space: {baseline_train_env.state_space}")
    
    # Train baseline
    baseline_env_wrapped = DummyVecEnv([lambda: Monitor(baseline_train_env)])
    
    baseline_model = PPO(
        "MlpPolicy",
        baseline_env_wrapped,
        policy_kwargs=POLICY_KWARGS,
        **PPO_CONFIG,
    )
    
    baseline_model.learn(total_timesteps=TOTAL_TIMESTEPS, progress_bar=True)
    baseline_model.save('models/ppo_baseline_v1_colab')
    
    # Evaluate baseline (FIXED: use env directly)
    eval_baseline_env = EnhancedPortfolioEnv(
        df=test_df_indexed,
        stock_dim=feature_info['n_tickers'],
        tech_indicator_list=feature_info['tech_indicators'],
        sentiment_feature_list=[],
        include_sentiment=False,
        print_verbosity=0,
        **ENV_CONFIG,
    )
    
    obs, info = eval_baseline_env.reset()
    done = False
    while not done:
        action, _ = baseline_model.predict(obs.reshape(1, -1), deterministic=True)
        action = action[0]
        obs, reward, done, truncated, info = eval_baseline_env.step(action)
    
    baseline_stats = eval_baseline_env.get_portfolio_stats()
    
    print("\nBASELINE RESULTS:")
    print(f"  Sharpe Ratio: {baseline_stats['sharpe_ratio']:.3f}")
    print(f"  Total Return: {baseline_stats['total_return']*100:.2f}%")

In [None]:
# Compare enhanced vs baseline
if TRAIN_BASELINE:
    print("\n" + "="*50)
    print("COMPARISON: ENHANCED vs BASELINE")
    print("="*50)
    print(f"{'Metric':<20} {'Enhanced':>15} {'Baseline':>15} {'Diff':>15}")
    print("-"*65)
    
    metrics = [
        ('Sharpe Ratio', stats['sharpe_ratio'], baseline_stats['sharpe_ratio']),
        ('Total Return %', stats['total_return']*100, baseline_stats['total_return']*100),
        ('Max Drawdown %', stats['max_drawdown']*100, baseline_stats['max_drawdown']*100),
    ]
    
    for name, enhanced, baseline in metrics:
        diff = enhanced - baseline
        print(f"{name:<20} {enhanced:>15.3f} {baseline:>15.3f} {diff:>+15.3f}")

## 6. Download Results

In [None]:
# Download trained model and results
from google.colab import files

# Zip experiment folder
!zip -r enhanced_experiment.zip models/ experiments/

# Download
files.download('enhanced_experiment.zip')
print("\nDownload complete! Extract and copy to your local project.")