# Final Production Model Training (RunPod)

Train the final production model using:
- **Baseline configuration** (no sentiment - best from ablation study)
- **Optuna-tuned hyperparameters** (Sharpe 2.28)

**Expected:** Sharpe ~1.6-2.0, Return ~45-55%, Drawdown <15%

In [None]:
# Cell 2: Install dependencies (including tqdm and rich for progress bar)
!pip install -q stable-baselines3[extra] gymnasium pandas numpy tqdm rich

In [None]:
# Cell 2: Install dependencies
!pip install -q stable-baselines3[extra] gymnasium pandas numpy

In [None]:
# Cell 4: Train with OPTUNA SETTINGS (sentiment=True, normalize_obs=False)
import os
import sys
import json
import numpy as np
import pandas as pd
from datetime import datetime

os.chdir("/workspace/enhanced-rl-portfolio")
sys.path.insert(0, "/workspace/enhanced-rl-portfolio")

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from src.env.enhanced_portfolio_env import EnhancedPortfolioEnv
from src.data.enhanced_processor import EnhancedDataProcessor, ProcessorConfig

# ============ CONFIGURATION ============
EXPERIMENT_NAME = "final_sentiment_model"
TOTAL_TIMESTEPS = 800_000  # More than Optuna's 300k for better generalization

# Optuna Trial 21 hyperparameters (Sharpe 2.28)
PPO_CONFIG = {
    "learning_rate": 0.000812,
    "n_steps": 2048,
    "batch_size": 64,
    "n_epochs": 10,
    "gamma": 0.992,
    "gae_lambda": 0.95,
    "clip_range": 0.2,
    "ent_coef": 0.0024,
    "vf_coef": 0.428,
    "max_grad_norm": 0.769,
}

# Network architecture from Optuna
POLICY_KWARGS = {
    "net_arch": dict(pi=[256, 256], vf=[256, 256]),
}

# ============ LOAD DATA (same as Optuna) ============
print("Loading data with Optuna settings...")
config = ProcessorConfig(normalize_features=True, normalization_window=60)
processor = EnhancedDataProcessor(
    price_path="data/processed_data.csv",
    sentiment_path="data/historical_sentiment_complete.csv",
    config=config,
)

# Check date range and set split
price_df = pd.read_csv('data/processed_data.csv')
max_date = pd.to_datetime(price_df['date'].max())
if max_date >= pd.to_datetime('2024-07-01'):
    train_end = '2024-06-30'
    test_start = '2024-07-01'
else:
    dates = sorted(price_df['date'].unique())
    split_idx = int(len(dates) * 0.8)
    train_end = dates[split_idx - 1]
    test_start = dates[split_idx]
    print(f"Using 80/20 split: train_end={train_end}, test_start={test_start}")

train_df, test_df = processor.get_train_test_split(
    train_end=train_end,
    test_start=test_start,
)
feature_info = processor.get_feature_info()

# Prepare data with day index
def prepare_env_data(df):
    df = df.copy()
    df = df.sort_values(['date', 'tic']).reset_index(drop=True)
    dates = sorted(df['date'].unique())
    date_to_day = {date: i for i, date in enumerate(dates)}
    df['day'] = df['date'].map(date_to_day)
    return df.set_index('day')

train_data = prepare_env_data(train_df)
test_data = prepare_env_data(test_df)

print(f"Train: {len(train_data)} records, {train_data.index.nunique()} days")
print(f"Test: {len(test_data)} records, {test_data.index.nunique()} days")
print(f"Tech indicators: {feature_info['tech_indicators']}")
print(f"Sentiment features: {feature_info['sentiment_features']}")

# ============ CREATE ENVIRONMENT (OPTUNA SETTINGS) ============
def make_env(data, mode="train"):
    env = EnhancedPortfolioEnv(
        df=data,
        stock_dim=feature_info['n_tickers'],
        hmax=100,
        initial_amount=100000,
        buy_cost_pct=0.001,
        sell_cost_pct=0.001,
        reward_scaling=1e-4,
        tech_indicator_list=feature_info['tech_indicators'],
        sentiment_feature_list=feature_info['sentiment_features'],
        include_sentiment=True,    # OPTUNA SETTING
        normalize_obs=False,       # OPTUNA SETTING
        print_verbosity=0,
        mode=mode,
    )
    return Monitor(env)

train_env = DummyVecEnv([lambda: make_env(train_data, "train")])

# ============ TRAIN MODEL ============
exp_dir = f"experiments/{EXPERIMENT_NAME}"
os.makedirs(exp_dir, exist_ok=True)

print(f"\nTraining {TOTAL_TIMESTEPS:,} timesteps on CUDA...")
print("Settings: sentiment=True, normalize_obs=False (Optuna config)")
print(f"Estimated time: ~25-30 minutes\n")

model = PPO(
    "MlpPolicy",
    train_env,
    **PPO_CONFIG,
    policy_kwargs=POLICY_KWARGS,
    verbose=1,
    device="cuda",
)

start_time = datetime.now()
model.learn(total_timesteps=TOTAL_TIMESTEPS, progress_bar=True)
training_time = datetime.now() - start_time

model_path = f"{exp_dir}/ppo_sentiment_tuned.zip"
model.save(model_path)

print(f"\n{'='*60}")
print("TRAINING COMPLETE")
print(f"{'='*60}")
print(f"Time: {training_time}")
print(f"Model: {model_path}")

In [None]:
# Cell 5: Evaluate the new model
import os
import sys
import json
import numpy as np
import pandas as pd
from datetime import datetime

os.chdir("/workspace/enhanced-rl-portfolio")
sys.path.insert(0, "/workspace/enhanced-rl-portfolio")

from stable_baselines3 import PPO
from src.env.enhanced_portfolio_env import EnhancedPortfolioEnv
from src.data.enhanced_processor import EnhancedDataProcessor, ProcessorConfig

EXPERIMENT_NAME = "final_sentiment_model"
exp_dir = f"experiments/{EXPERIMENT_NAME}"
model_path = f"{exp_dir}/ppo_sentiment_tuned.zip"

# Load data (same settings as training)
config = ProcessorConfig(normalize_features=True, normalization_window=60)
processor = EnhancedDataProcessor(
    price_path="data/processed_data.csv",
    sentiment_path="data/historical_sentiment_complete.csv",
    config=config,
)

# Check date range
price_df = pd.read_csv('data/processed_data.csv')
max_date = pd.to_datetime(price_df['date'].max())
if max_date >= pd.to_datetime('2024-07-01'):
    train_end = '2024-06-30'
    test_start = '2024-07-01'
else:
    dates = sorted(price_df['date'].unique())
    split_idx = int(len(dates) * 0.8)
    train_end = dates[split_idx - 1]
    test_start = dates[split_idx]

_, test_df = processor.get_train_test_split(train_end=train_end, test_start=test_start)
feature_info = processor.get_feature_info()

def prepare_env_data(df):
    df = df.copy()
    df = df.sort_values(['date', 'tic']).reset_index(drop=True)
    dates = sorted(df['date'].unique())
    date_to_day = {date: i for i, date in enumerate(dates)}
    df['day'] = df['date'].map(date_to_day)
    return df.set_index('day')

test_data = prepare_env_data(test_df)

# Create test environment (SAME settings as training)
test_env = EnhancedPortfolioEnv(
    df=test_data,
    stock_dim=feature_info['n_tickers'],
    hmax=100,
    initial_amount=100000,
    buy_cost_pct=0.001,
    sell_cost_pct=0.001,
    reward_scaling=1e-4,
    tech_indicator_list=feature_info['tech_indicators'],
    sentiment_feature_list=feature_info['sentiment_features'],
    include_sentiment=True,    # OPTUNA SETTING
    normalize_obs=False,       # OPTUNA SETTING
    print_verbosity=0,
    mode="test",
)

# Load and evaluate
model = PPO.load(model_path)
print("Evaluating on test set...")

obs, _ = test_env.reset()
done = False
while not done:
    action, _ = model.predict(obs.reshape(1, -1), deterministic=True)
    obs, _, done, _, _ = test_env.step(action[0])

stats = test_env.get_portfolio_stats()

print(f"\n{'='*60}")
print("EVALUATION RESULTS (New Model - 800k steps)")
print(f"{'='*60}")
print(f"Sharpe Ratio:  {stats['sharpe_ratio']:.3f}")
print(f"Total Return:  {stats['total_return']*100:.2f}%")
print(f"Max Drawdown:  {stats['max_drawdown']*100:.2f}%")
print(f"Total Trades:  {stats['total_trades']}")
print(f"Final Value:   ${stats['final_value']:,.2f}")
print(f"{'='*60}")

# Compare with previous best
print(f"\nComparison with ppo_enhanced_tuned (Sharpe 1.10):")
if stats['sharpe_ratio'] > 1.10:
    print(f"  IMPROVEMENT: +{(stats['sharpe_ratio'] - 1.10):.3f} Sharpe")
else:
    print(f"  No improvement: {(stats['sharpe_ratio'] - 1.10):.3f} Sharpe")

# Save results
results = {
    "experiment_name": EXPERIMENT_NAME,
    "timestamp": datetime.now().isoformat(),
    "config": {
        "include_sentiment": True,
        "normalize_obs": False,
        "timesteps": 800_000,
        "learning_rate": 0.000812,
        "batch_size": 64,
        "net_arch": [256, 256],
    },
    "metrics": {
        "sharpe_ratio": float(stats['sharpe_ratio']),
        "total_return": float(stats['total_return']),
        "max_drawdown": float(stats['max_drawdown']),
        "total_trades": int(stats['total_trades']),
        "final_value": float(stats['final_value']),
    }
}

with open(f"{exp_dir}/results.json", "w") as f:
    json.dump(results, f, indent=2)
print(f"\nResults saved to {exp_dir}/results.json")

In [None]:
# Cell 6: Package for Download
import os
import shutil

os.chdir("/workspace/enhanced-rl-portfolio")

EXPERIMENT_NAME = "final_sentiment_model"
exp_dir = f"experiments/{EXPERIMENT_NAME}"

# Copy to models folder
os.makedirs("models", exist_ok=True)
shutil.copy(f"{exp_dir}/ppo_sentiment_tuned.zip", "models/ppo_sentiment_tuned.zip")

# Create download zip
output_zip = f"/workspace/{EXPERIMENT_NAME}"
shutil.make_archive(output_zip, 'zip', exp_dir)

print(f"Model copied to: models/ppo_sentiment_tuned.zip")
print(f"Download: {output_zip}.zip ({os.path.getsize(f'{output_zip}.zip')/1024/1024:.1f} MB)")
print(f"\nIf this model is better, update config.py:")
print('MODEL_PATH = "./models/ppo_sentiment_tuned.zip"')

In [None]:
# Cell 6: Package for Download
import os
import shutil

os.chdir("/workspace/enhanced-rl-portfolio")

EXPERIMENT_NAME = "final_production_model"
exp_dir = f"experiments/{EXPERIMENT_NAME}"

# Copy to models folder
os.makedirs("models", exist_ok=True)
shutil.copy(f"{exp_dir}/ppo_final_production.zip", "models/ppo_final_production.zip")

# Create download zip
output_zip = f"/workspace/{EXPERIMENT_NAME}"
shutil.make_archive(output_zip, 'zip', exp_dir)

print(f"Model copied to: models/ppo_final_production.zip")
print(f"Download: {output_zip}.zip ({os.path.getsize(f'{output_zip}.zip')/1024/1024:.1f} MB)")
print(f"\nUpdate config.py:")
print('MODEL_PATH = "./models/ppo_final_production.zip"')