# Enhanced RL Portfolio Training - RunPod GPU

This notebook trains the enhanced PPO agent with sentiment features on RunPod GPU (RTX 5090/4090/3090).

**Training Configuration:**
- 1.5M timesteps (~15-30 minutes on RTX 5090)
- 181-dim state space (10 tech + 6 sentiment features)
- Network architecture [128, 128]

**Steps:**
1. Setup environment and install dependencies
2. Upload data files
3. Train enhanced model
4. Train baseline model (optional)
5. Evaluate and compare
6. Download trained models

## 1. Setup Environment

In [None]:
# Check GPU availability
import torch
print(f"PyTorch Version: {torch.__version__}")
print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Clone repository (run once)
import os
if not os.path.exists('/workspace/enhanced-rl-portfolio'):
    !git clone https://github.com/nimeshk03/enhanced-rl-portfolio.git /workspace/enhanced-rl-portfolio
    print("Repository cloned!")
else:
    print("Repository already exists. Pulling latest...")
    !cd /workspace/enhanced-rl-portfolio && git pull

os.chdir('/workspace/enhanced-rl-portfolio')
print(f"Working directory: {os.getcwd()}")

In [None]:
# Install dependencies
!pip install -q stable-baselines3[extra] gymnasium pandas numpy yfinance tensorboard

## 2. Upload Data Files

Upload the following files to `/workspace/enhanced-rl-portfolio/data/`:
- `processed_data.csv` - Price data with technical indicators
- `historical_sentiment_complete.csv` - Sentiment data

**Option 1:** Use RunPod File Browser (folder icon on left)

**Option 2:** Use SCP from local machine:
```bash
scp -P <PORT> data/*.csv root@<POD_IP>:/workspace/enhanced-rl-portfolio/data/
```

In [None]:
# Create data directory and check for files
import os
os.makedirs('data', exist_ok=True)

price_exists = os.path.exists('data/processed_data.csv')
sentiment_exists = os.path.exists('data/historical_sentiment_complete.csv')

if price_exists and sentiment_exists:
    print("Data files found!")
else:
    print("Missing data files:")
    if not price_exists:
        print("  - data/processed_data.csv")
    if not sentiment_exists:
        print("  - data/historical_sentiment_complete.csv")
    print("\nUpload files using RunPod File Browser or SCP before continuing.")

In [None]:
# Verify data files
import pandas as pd

price_df = pd.read_csv('data/processed_data.csv')
sentiment_df = pd.read_csv('data/historical_sentiment_complete.csv')

print(f"Price data: {len(price_df)} records")
print(f"  Date range: {price_df['date'].min()} to {price_df['date'].max()}")
print(f"  Tickers: {price_df['tic'].nunique()}")
print(f"\nSentiment data: {len(sentiment_df)} records")
print(f"  Date range: {sentiment_df['date'].min()} to {sentiment_df['date'].max()}")

## 3. Train Enhanced Model

In [None]:
# Imports
import os
import sys
import json
import random
from datetime import datetime
import numpy as np
import pandas as pd
import torch

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.monitor import Monitor

# Add project to path
sys.path.insert(0, '.')

from src.env.enhanced_portfolio_env import EnhancedPortfolioEnv
from src.data.enhanced_processor import EnhancedDataProcessor, ProcessorConfig

print("Imports successful!")

In [None]:
# Training Configuration
SEED = 42
EXPERIMENT_NAME = "enhanced_v1_runpod"
TOTAL_TIMESTEPS = 1_500_000

# Time periods
TRAIN_START = "2015-01-01"
TRAIN_END = "2024-06-30"
TEST_START = "2024-07-01"
TEST_END = "2025-11-30"

# Environment config
ENV_CONFIG = {
    "hmax": 100,
    "initial_amount": 100000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "reward_scaling": 1e-4,
    "sentiment_reward_weight": 0.0,
}

# PPO config
PPO_CONFIG = {
    "learning_rate": 1e-4,
    "n_steps": 2048,
    "batch_size": 64,
    "n_epochs": 10,
    "gamma": 0.99,
    "gae_lambda": 0.95,
    "clip_range": 0.2,
    "ent_coef": 0.05,
    "vf_coef": 0.5,
    "max_grad_norm": 0.5,
    "verbose": 1,
    "seed": SEED,
}

# Network architecture
POLICY_KWARGS = {
    "net_arch": dict(pi=[128, 128], vf=[128, 128]),
}

print(f"Experiment: {EXPERIMENT_NAME}")
print(f"Timesteps: {TOTAL_TIMESTEPS:,}")
print(f"Seed: {SEED}")

In [None]:
# Load and process data
print("Loading and processing data...")

config = ProcessorConfig(
    normalize_features=True,
    normalization_window=60,
)

processor = EnhancedDataProcessor(
    price_path='data/processed_data.csv',
    sentiment_path='data/historical_sentiment_complete.csv',
    config=config,
)

train_df, test_df = processor.get_train_test_split(
    train_end=TRAIN_END,
    test_start=TEST_START,
    test_end=TEST_END,
)

feature_info = processor.get_feature_info()

print(f"\nTrain: {len(train_df)} records")
print(f"Test: {len(test_df)} records")
print(f"Features: {feature_info['n_tech_indicators']} tech + {feature_info['n_sentiment_features']} sentiment")

In [None]:
# Create environments
def prepare_df_for_env(df):
    """Add day index for environment."""
    df = df.copy()
    df = df.sort_values(['date', 'tic']).reset_index(drop=True)
    dates = sorted(df['date'].unique())
    date_to_day = {date: i for i, date in enumerate(dates)}
    df['day'] = df['date'].map(date_to_day)
    return df.set_index('day')

train_df_indexed = prepare_df_for_env(train_df)
test_df_indexed = prepare_df_for_env(test_df)

# Create training environment
train_env = EnhancedPortfolioEnv(
    df=train_df_indexed,
    stock_dim=feature_info['n_tickers'],
    tech_indicator_list=feature_info['tech_indicators'],
    sentiment_feature_list=feature_info['sentiment_features'],
    include_sentiment=True,
    normalize_obs=False,
    print_verbosity=0,
    **ENV_CONFIG,
)

print(f"State space: {train_env.state_space}")
print(f"Action space: {train_env.action_space.shape[0]} stocks")
print(f"Train days: {len(train_df_indexed.index.unique())}")
print(f"Test days: {len(test_df_indexed.index.unique())}")

In [None]:
# Train the enhanced model

# Set seed for reproducibility
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.backends.cudnn.deterministic = True

print("="*70)
print("TRAINING ENHANCED MODEL WITH SENTIMENT")
print("="*70)
print(f"Seed: {SEED}")
print(f"Device: {'cuda' if torch.cuda.is_available() else 'cpu'}")

# Create experiment directories
os.makedirs(f'experiments/{EXPERIMENT_NAME}/checkpoints', exist_ok=True)
os.makedirs(f'experiments/{EXPERIMENT_NAME}/logs', exist_ok=True)
os.makedirs('models', exist_ok=True)

# Wrap environment for training
train_env_wrapped = DummyVecEnv([lambda: Monitor(train_env)])

# Create model
model = PPO(
    "MlpPolicy",
    train_env_wrapped,
    policy_kwargs=POLICY_KWARGS,
    tensorboard_log=f'experiments/{EXPERIMENT_NAME}/logs',
    device='cuda' if torch.cuda.is_available() else 'cpu',
    **PPO_CONFIG,
)

# Checkpoint callback - saves every 100k steps
checkpoint_callback = CheckpointCallback(
    save_freq=100000,
    save_path=f'experiments/{EXPERIMENT_NAME}/checkpoints',
    name_prefix='ppo_enhanced',
)

# Train
start_time = datetime.now()
print(f"\nStarting training at {start_time}")
print(f"Timesteps: {TOTAL_TIMESTEPS:,}")
print(f"Checkpoints: experiments/{EXPERIMENT_NAME}/checkpoints/\n")

model.learn(
    total_timesteps=TOTAL_TIMESTEPS,
    callback=checkpoint_callback,
    progress_bar=True,
)

training_time = datetime.now() - start_time
print(f"\nTraining completed in {training_time}")

# Save model immediately
model.save('models/ppo_enhanced_v1_runpod')
print("Model saved to models/ppo_enhanced_v1_runpod.zip")

## 4. Evaluate Enhanced Model

In [None]:
# Evaluate enhanced model on test period
print("Evaluating enhanced model on test period...")

# Create fresh test environment
eval_test_env = EnhancedPortfolioEnv(
    df=test_df_indexed,
    stock_dim=feature_info['n_tickers'],
    tech_indicator_list=feature_info['tech_indicators'],
    sentiment_feature_list=feature_info['sentiment_features'],
    include_sentiment=True,
    normalize_obs=False,
    print_verbosity=0,
    **ENV_CONFIG,
)

# Run evaluation
obs, info = eval_test_env.reset()
done = False

while not done:
    action, _ = model.predict(obs.reshape(1, -1), deterministic=True)
    action = action[0]
    obs, reward, done, truncated, info = eval_test_env.step(action)

# Get results
stats = eval_test_env.get_portfolio_stats()

print("\n" + "="*50)
print("ENHANCED MODEL - TEST PERIOD RESULTS")
print("="*50)
print(f"Final Portfolio Value: ${stats['final_value']:,.2f}")
print(f"Total Return: {stats['total_return']*100:.2f}%")
print(f"Sharpe Ratio: {stats['sharpe_ratio']:.3f}")
print(f"Max Drawdown: {stats['max_drawdown']*100:.2f}%")
print(f"Total Trades: {stats['total_trades']}")
print(f"Transaction Costs: ${stats['total_cost']:,.2f}")

## 5. Train Baseline Model (Optional)

In [None]:
# Train baseline without sentiment for comparison
TRAIN_BASELINE = True  # Set to False to skip

if TRAIN_BASELINE:
    # Reset seeds
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(SEED)
    
    print("="*70)
    print("TRAINING BASELINE MODEL (NO SENTIMENT)")
    print("="*70)
    
    # Create baseline environment
    baseline_train_env = EnhancedPortfolioEnv(
        df=train_df_indexed,
        stock_dim=feature_info['n_tickers'],
        tech_indicator_list=feature_info['tech_indicators'],
        sentiment_feature_list=[],
        include_sentiment=False,
        normalize_obs=False,
        print_verbosity=0,
        **ENV_CONFIG,
    )
    
    print(f"Baseline state space: {baseline_train_env.state_space}")
    
    # Train baseline
    baseline_env_wrapped = DummyVecEnv([lambda: Monitor(baseline_train_env)])
    
    baseline_model = PPO(
        "MlpPolicy",
        baseline_env_wrapped,
        policy_kwargs=POLICY_KWARGS,
        device='cuda' if torch.cuda.is_available() else 'cpu',
        **PPO_CONFIG,
    )
    
    baseline_model.learn(total_timesteps=TOTAL_TIMESTEPS, progress_bar=True)
    baseline_model.save('models/ppo_baseline_v1_runpod')
    print("Baseline model saved to models/ppo_baseline_v1_runpod.zip")
    
    # Evaluate baseline
    eval_baseline_env = EnhancedPortfolioEnv(
        df=test_df_indexed,
        stock_dim=feature_info['n_tickers'],
        tech_indicator_list=feature_info['tech_indicators'],
        sentiment_feature_list=[],
        include_sentiment=False,
        normalize_obs=False,
        print_verbosity=0,
        **ENV_CONFIG,
    )
    
    obs, info = eval_baseline_env.reset()
    done = False
    while not done:
        action, _ = baseline_model.predict(obs.reshape(1, -1), deterministic=True)
        action = action[0]
        obs, reward, done, truncated, info = eval_baseline_env.step(action)
    
    baseline_stats = eval_baseline_env.get_portfolio_stats()
    
    print("\n" + "="*50)
    print("BASELINE MODEL RESULTS")
    print("="*50)
    print(f"Final Portfolio Value: ${baseline_stats['final_value']:,.2f}")
    print(f"Total Return: {baseline_stats['total_return']*100:.2f}%")
    print(f"Sharpe Ratio: {baseline_stats['sharpe_ratio']:.3f}")
    print(f"Max Drawdown: {baseline_stats['max_drawdown']*100:.2f}%")
    print(f"Total Trades: {baseline_stats['total_trades']}")
else:
    print("Skipping baseline training.")

In [None]:
# Compare enhanced vs baseline
if TRAIN_BASELINE:
    print("\n" + "="*50)
    print("COMPARISON: ENHANCED vs BASELINE")
    print("="*50)
    print(f"{'Metric':<20} {'Enhanced':>15} {'Baseline':>15} {'Diff':>15}")
    print("-"*65)
    
    metrics = [
        ('Sharpe Ratio', stats['sharpe_ratio'], baseline_stats['sharpe_ratio']),
        ('Total Return %', stats['total_return']*100, baseline_stats['total_return']*100),
        ('Max Drawdown %', stats['max_drawdown']*100, baseline_stats['max_drawdown']*100),
        ('Total Trades', stats['total_trades'], baseline_stats['total_trades']),
    ]
    
    for name, enhanced, baseline in metrics:
        diff = enhanced - baseline
        print(f"{name:<20} {enhanced:>15.3f} {baseline:>15.3f} {diff:>+15.3f}")

## 6. Save Results & Download

In [None]:
# Save experiment results
results = {
    "experiment_name": EXPERIMENT_NAME,
    "timestamp": datetime.now().isoformat(),
    "training_time": str(training_time),
    "total_timesteps": TOTAL_TIMESTEPS,
    "seed": SEED,
    "config": {
        "env_config": ENV_CONFIG,
        "ppo_config": {k: v for k, v in PPO_CONFIG.items() if k != 'seed'},
        "policy_kwargs": POLICY_KWARGS,
    },
    "feature_info": feature_info,
    "enhanced_results": {
        "final_value": float(stats['final_value']),
        "total_return": float(stats['total_return']),
        "sharpe_ratio": float(stats['sharpe_ratio']),
        "max_drawdown": float(stats['max_drawdown']),
        "total_trades": int(stats['total_trades']),
        "total_cost": float(stats['total_cost']),
    },
}

if TRAIN_BASELINE:
    results["baseline_results"] = {
        "final_value": float(baseline_stats['final_value']),
        "total_return": float(baseline_stats['total_return']),
        "sharpe_ratio": float(baseline_stats['sharpe_ratio']),
        "max_drawdown": float(baseline_stats['max_drawdown']),
        "total_trades": int(baseline_stats['total_trades']),
        "total_cost": float(baseline_stats['total_cost']),
    }

os.makedirs(f'experiments/{EXPERIMENT_NAME}', exist_ok=True)

with open(f'experiments/{EXPERIMENT_NAME}/results.json', 'w') as f:
    json.dump(results, f, indent=2)

print(f"Results saved to experiments/{EXPERIMENT_NAME}/results.json")

In [None]:
# Create zip file for download
!zip -r /workspace/trained_models.zip models/ experiments/
print("\nDownload /workspace/trained_models.zip using RunPod File Browser")
print("Or use SCP:")
print("  scp -P <PORT> root@<POD_IP>:/workspace/trained_models.zip ./")

In [None]:
# List saved files
print("Saved models:")
!ls -la models/
print("\nExperiment files:")
!ls -la experiments/{EXPERIMENT_NAME}/