# Final Production Model Training

**Purpose:** Train the final production model using:
- **Baseline configuration** (no sentiment features - best performer from ablation)
- **Optuna-tuned hyperparameters** (Sharpe 2.28)

**Expected Results:**
- Sharpe Ratio: ~1.6-2.0
- Total Return: ~45-55%
- Max Drawdown: <15%

**Training Time:** ~40 minutes on RTX 4090

## 1. Setup Environment

In [None]:
import os
import subprocess

REPO_URL = "https://github.com/nimeshk03/enhanced-rl-portfolio.git"
WORK_DIR = "/workspace/enhanced-rl-portfolio"

if os.path.exists(WORK_DIR):
    print("Repository exists, pulling latest...")
    os.chdir(WORK_DIR)
    subprocess.run(["git", "pull"], check=True)
else:
    print("Cloning repository...")
    os.chdir("/workspace")
    subprocess.run(["git", "clone", REPO_URL], check=True)
    os.chdir(WORK_DIR)

print(f"Working directory: {os.getcwd()}")

In [None]:
!pip install -q stable-baselines3 gymnasium pandas numpy pyyaml

import os
import sys
os.chdir("/workspace/enhanced-rl-portfolio")
sys.path.insert(0, "/workspace/enhanced-rl-portfolio")

from src.data.enhanced_processor import EnhancedDataProcessor

processor = EnhancedDataProcessor(
    price_path="data/processed_data.csv",
    sentiment_path="data/historical_sentiment_complete.csv"
)

train_data, test_data = processor.get_train_test_split(
    train_end="2024-06-30",
    test_start="2024-07-01"
)

print(f"\nTrain: {len(train_data)} records")
print(f"Test: {len(test_data)} records")
print(f"Train period: {train_data['date'].min()} to {train_data['date'].max()}")
print(f"Test period: {test_data['date'].min()} to {test_data['date'].max()}")

In [None]:
import os
import sys
import torch
from datetime import datetime

os.chdir("/workspace/enhanced-rl-portfolio")
sys.path.insert(0, "/workspace/enhanced-rl-portfolio")

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from src.env.enhanced_portfolio_env import EnhancedPortfolioEnv
from src.data.enhanced_processor import EnhancedDataProcessor

EXPERIMENT_NAME = "final_production_model"
TOTAL_TIMESTEPS = 1_500_000

TUNED_HYPERPARAMETERS = {
    "learning_rate": 0.000812,
    "n_steps": 2048,
    "batch_size": 64,
    "n_epochs": 10,
    "gamma": 0.992,
    "gae_lambda": 0.95,
    "clip_range": 0.2,
    "ent_coef": 0.0024,
    "vf_coef": 0.428,
    "max_grad_norm": 0.769,
}

POLICY_KWARGS = {"net_arch": [256, 256]}

processor = EnhancedDataProcessor(
    price_path="data/processed_data.csv",
    sentiment_path="data/historical_sentiment_complete.csv"
)
train_data, test_data = processor.get_train_test_split(
    train_end="2024-06-30",
    test_start="2024-07-01"
)

TECH_INDICATORS = [
    'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30',
    'close_30_sma', 'close_60_sma', 'vix', 'turbulence'
]

def create_env(data, mode="train"):
    return EnhancedPortfolioEnv(
        df=data,
        stock_dim=10,
        hmax=100,
        initial_amount=100000,
        buy_cost_pct=0.001,
        sell_cost_pct=0.001,
        reward_scaling=1e-4,
        tech_indicator_list=TECH_INDICATORS,
        sentiment_feature_list=[],
        include_sentiment=False,
        normalize_obs=True,
        mode=mode,
    )

train_env = DummyVecEnv([lambda: create_env(train_data, "train")])

exp_dir = f"experiments/{EXPERIMENT_NAME}"
os.makedirs(exp_dir, exist_ok=True)
os.makedirs(f"{exp_dir}/logs", exist_ok=True)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"\nDevice: {device}")
print(f"Training {TOTAL_TIMESTEPS:,} timesteps...")
print(f"Estimated time: ~40 minutes on RTX 4090\n")

model = PPO(
    "MlpPolicy",
    train_env,
    **TUNED_HYPERPARAMETERS,
    policy_kwargs=POLICY_KWARGS,
    verbose=1,
    tensorboard_log=f"{exp_dir}/logs",
    device=device,
)

start_time = datetime.now()
model.learn(total_timesteps=TOTAL_TIMESTEPS, progress_bar=True)
training_time = datetime.now() - start_time

model_path = f"{exp_dir}/ppo_final_production.zip"
model.save(model_path)

print(f"\nTraining complete!")
print(f"Time: {training_time}")
print(f"Model saved: {model_path}")

## 3. Configuration

Using Optuna-tuned hyperparameters with baseline (no sentiment) configuration.

In [None]:
import os
import sys
import json
import numpy as np
import pandas as pd
from datetime import datetime

os.chdir("/workspace/enhanced-rl-portfolio")
sys.path.insert(0, "/workspace/enhanced-rl-portfolio")

EXPERIMENT_NAME = "final_production_model"
TOTAL_TIMESTEPS = 1_500_000

TUNED_HYPERPARAMETERS = {
    "learning_rate": 0.000812,
    "n_steps": 2048,
    "batch_size": 64,
    "n_epochs": 10,
    "gamma": 0.992,
    "gae_lambda": 0.95,
    "clip_range": 0.2,
    "ent_coef": 0.0024,
    "vf_coef": 0.428,
    "max_grad_norm": 0.769,
}

POLICY_KWARGS = {
    "net_arch": [256, 256]
}

ENV_CONFIG = {
    "hmax": 100,
    "initial_amount": 100000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "reward_scaling": 1e-4,
    "include_sentiment": False,
    "normalize_obs": True,
}

print("Configuration:")
print(f"  Experiment: {EXPERIMENT_NAME}")
print(f"  Timesteps: {TOTAL_TIMESTEPS:,}")
print(f"  Sentiment: DISABLED (baseline)")
print(f"  Learning Rate: {TUNED_HYPERPARAMETERS['learning_rate']}")
print(f"  Network: {POLICY_KWARGS['net_arch']}")

## 4. Load Data

In [None]:
import os
import sys
os.chdir("/workspace/enhanced-rl-portfolio")
sys.path.insert(0, "/workspace/enhanced-rl-portfolio")

from src.data.enhanced_processor import EnhancedDataProcessor

processor = EnhancedDataProcessor(
    price_path="data/processed_data.csv",
    sentiment_path="data/historical_sentiment_complete.csv"
)

train_data, test_data = processor.get_train_test_split(
    train_end="2024-06-30",
    test_start="2024-07-01"
)

print(f"\nTrain: {len(train_data)} records")
print(f"Test: {len(test_data)} records")
print(f"Train period: {train_data['date'].min()} to {train_data['date'].max()}")
print(f"Test period: {test_data['date'].min()} to {test_data['date'].max()}")

## 5. Train Final Model

In [None]:
import os
import sys
import torch
from datetime import datetime

os.chdir("/workspace/enhanced-rl-portfolio")
sys.path.insert(0, "/workspace/enhanced-rl-portfolio")

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from src.env.enhanced_portfolio_env import EnhancedPortfolioEnv
from src.data.enhanced_processor import EnhancedDataProcessor

EXPERIMENT_NAME = "final_production_model"
TOTAL_TIMESTEPS = 1_500_000

TUNED_HYPERPARAMETERS = {
    "learning_rate": 0.000812,
    "n_steps": 2048,
    "batch_size": 64,
    "n_epochs": 10,
    "gamma": 0.992,
    "gae_lambda": 0.95,
    "clip_range": 0.2,
    "ent_coef": 0.0024,
    "vf_coef": 0.428,
    "max_grad_norm": 0.769,
}

POLICY_KWARGS = {"net_arch": [256, 256]}

processor = EnhancedDataProcessor(
    price_path="data/processed_data.csv",
    sentiment_path="data/historical_sentiment_complete.csv"
)
train_data, test_data = processor.get_train_test_split(
    train_end="2024-06-30",
    test_start="2024-07-01"
)

TECH_INDICATORS = [
    'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30',
    'close_30_sma', 'close_60_sma', 'vix', 'turbulence'
]

def create_env(data, mode="train"):
    return EnhancedPortfolioEnv(
        df=data,
        stock_dim=10,
        hmax=100,
        initial_amount=100000,
        buy_cost_pct=0.001,
        sell_cost_pct=0.001,
        reward_scaling=1e-4,
        tech_indicator_list=TECH_INDICATORS,
        sentiment_feature_list=[],
        include_sentiment=False,
        normalize_obs=True,
        mode=mode,
    )

train_env = DummyVecEnv([lambda: create_env(train_data, "train")])

exp_dir = f"experiments/{EXPERIMENT_NAME}"
os.makedirs(exp_dir, exist_ok=True)
os.makedirs(f"{exp_dir}/logs", exist_ok=True)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"\nDevice: {device}")
print(f"Training {TOTAL_TIMESTEPS:,} timesteps...")
print(f"Estimated time: ~40 minutes on RTX 4090\n")

model = PPO(
    "MlpPolicy",
    train_env,
    **TUNED_HYPERPARAMETERS,
    policy_kwargs=POLICY_KWARGS,
    verbose=1,
    tensorboard_log=f"{exp_dir}/logs",
    device=device,
)

start_time = datetime.now()
model.learn(total_timesteps=TOTAL_TIMESTEPS, progress_bar=True)
training_time = datetime.now() - start_time

model_path = f"{exp_dir}/ppo_final_production.zip"
model.save(model_path)

print(f"\nTraining complete!")
print(f"Time: {training_time}")
print(f"Model saved: {model_path}")

## 6. Evaluate Model

In [None]:
import os
import sys
import json
import numpy as np
from datetime import datetime

os.chdir("/workspace/enhanced-rl-portfolio")
sys.path.insert(0, "/workspace/enhanced-rl-portfolio")

from stable_baselines3 import PPO
from src.env.enhanced_portfolio_env import EnhancedPortfolioEnv
from src.data.enhanced_processor import EnhancedDataProcessor

EXPERIMENT_NAME = "final_production_model"
exp_dir = f"experiments/{EXPERIMENT_NAME}"
model_path = f"{exp_dir}/ppo_final_production.zip"

TECH_INDICATORS = [
    'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30',
    'close_30_sma', 'close_60_sma', 'vix', 'turbulence'
]

processor = EnhancedDataProcessor(
    price_path="data/processed_data.csv",
    sentiment_path="data/historical_sentiment_complete.csv"
)
train_data, test_data = processor.get_train_test_split(
    train_end="2024-06-30",
    test_start="2024-07-01"
)

test_env = EnhancedPortfolioEnv(
    df=test_data,
    stock_dim=10,
    hmax=100,
    initial_amount=100000,
    buy_cost_pct=0.001,
    sell_cost_pct=0.001,
    reward_scaling=1e-4,
    tech_indicator_list=TECH_INDICATORS,
    sentiment_feature_list=[],
    include_sentiment=False,
    normalize_obs=True,
    mode="test",
)

model = PPO.load(model_path)

print("Evaluating on test set...")
obs, _ = test_env.reset()
done = False
while not done:
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = test_env.step(action)
    done = terminated or truncated

stats = test_env.get_portfolio_stats()

print("\n" + "="*60)
print("FINAL MODEL EVALUATION RESULTS")
print("="*60)
print(f"Total Return:    {stats['total_return']*100:.2f}%")
print(f"Sharpe Ratio:    {stats['sharpe_ratio']:.3f}")
print(f"Max Drawdown:    {stats['max_drawdown']*100:.2f}%")
print(f"Total Trades:    {stats['total_trades']}")
print(f"Final Value:     ${stats['final_value']:,.2f}")
print("="*60)

results = {
    "experiment_name": EXPERIMENT_NAME,
    "timestamp": datetime.now().isoformat(),
    "config": {
        "include_sentiment": False,
        "timesteps": 1_500_000,
        "hyperparameters": {
            "learning_rate": 0.000812,
            "batch_size": 64,
            "ent_coef": 0.0024,
            "net_arch": [256, 256],
        }
    },
    "metrics": {
        "sharpe_ratio": stats['sharpe_ratio'],
        "total_return": stats['total_return'],
        "max_drawdown": stats['max_drawdown'],
        "total_trades": stats['total_trades'],
        "final_value": stats['final_value'],
    }
}

with open(f"{exp_dir}/results.json", "w") as f:
    json.dump(results, f, indent=2)

print(f"\nResults saved to {exp_dir}/results.json")

## 7. Package for Download

Creates a zip file with the model and results for deployment.

In [None]:
import os
import shutil

os.chdir("/workspace/enhanced-rl-portfolio")

EXPERIMENT_NAME = "final_production_model"
exp_dir = f"experiments/{EXPERIMENT_NAME}"

output_zip = f"/workspace/{EXPERIMENT_NAME}"
shutil.make_archive(output_zip, 'zip', exp_dir)

zip_size = os.path.getsize(f"{output_zip}.zip") / 1024 / 1024
print(f"Created: {output_zip}.zip ({zip_size:.2f} MB)")
print(f"\nDownload this file and extract to your local project.")

## 8. Copy Model for Paper Trading

Also save the model in a standard location for paper trading.

In [None]:
import shutil
import os

os.chdir("/workspace/enhanced-rl-portfolio")

EXPERIMENT_NAME = "final_production_model"
src_model = f"experiments/{EXPERIMENT_NAME}/ppo_final_production.zip"
dst_model = "models/ppo_final_production.zip"

os.makedirs("models", exist_ok=True)
shutil.copy(src_model, dst_model)

print(f"Model copied to: {dst_model}")
print(f"\nUpdate your config.py MODEL_PATH to:")
print(f'MODEL_PATH = "./models/ppo_final_production.zip"')