# Finance Autocomplete RL Training

This notebook trains a small language model to perform financial data autocomplete using reinforcement learning with tool calls.

The model learns to:
1. Use financial tools (get_metrics, get_tickers, get_value, calculate) to retrieve data
2. Complete text with accurate financial information
3. Determine when no completion is needed

Training uses the ART (Adaptive Reinforcement Training) framework with PPO.

In [None]:
%%capture
# Install required packages
!pip install openpipe-art==0.3.11.post3 "gql<4" --prerelease allow --no-cache-dir
!pip install openpipe aiosqlite httpx tqdm python-dotenv

In [None]:
import os
from dotenv import load_dotenv

# Load .env file if it exists
load_dotenv()

# Required: OpenAI API key for LLM-as-judge evaluation
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")  # @param {type:"string"}
if OPENAI_API_KEY:
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
else:
    print("Warning: No OpenAI API key provided. Judge evaluation will fall back to simple string matching.")

# Required: Tiingo API for financial data
TIINGO_API_KEY = os.getenv("TIINGO_API_KEY", "")  # @param {type:"string"}
if TIINGO_API_KEY:
    os.environ["TIINGO_API_KEY"] = TIINGO_API_KEY
else:
    print("ERROR: TIINGO_API_KEY is required. Get a free key at https://api.tiingo.com")

# Optional: Weights & Biases for metrics tracking
WANDB_API_KEY = os.getenv("WANDB_API_KEY", "")  # @param {type:"string"}
if WANDB_API_KEY:
    os.environ["WANDB_API_KEY"] = WANDB_API_KEY

# Optional: OpenPipe for completion logging
OPENPIPE_API_KEY = os.getenv("OPENPIPE_API_KEY", "")  # @param {type:"string"}
if OPENPIPE_API_KEY:
    os.environ["OPENPIPE_API_KEY"] = OPENPIPE_API_KEY

In [None]:
%%capture
# Install required packages
!pip install openpipe-art==0.3.11.post3 "gql<4" --prerelease allow --no-cache-dir
!pip install aiosqlite httpx tqdm

In [None]:
import os

# Required: OpenAI API key for LLM-as-judge evaluation
OPENAI_API_KEY = ""  # @param {type:"string"}
if OPENAI_API_KEY:
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
else:
    print("Warning: No OpenAI API key provided. Judge evaluation will fall back to simple string matching.")

# Optional: Weights & Biases for metrics tracking
WANDB_API_KEY = ""  # @param {type:"string"}
if WANDB_API_KEY:
    os.environ["WANDB_API_KEY"] = WANDB_API_KEY

# Optional: Tiingo API for real financial data
TIINGO_API_KEY = ""  # @param {type:"string"}
if TIINGO_API_KEY:
    os.environ["TIINGO_API_KEY"] = TIINGO_API_KEY

## Load Finance Modules

We'll embed the core modules directly in the notebook for Colab compatibility.

In [None]:
# Training configuration
NUM_STEPS = 100  # @param {type:"integer"}
NUM_CASES_PER_STEP = 10  # @param {type:"integer"}
NUM_ROLLOUTS_PER_CASE = 5  # @param {type:"integer"}
NUM_VALIDATION_CASES = 50  # @param {type:"integer"}
VALIDATION_FREQUENCY = 5  # @param {type:"integer"}

# Model configuration
MODEL_NAME = "finance_autocomplete_model"  # @param {type:"string"}
BASE_MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"  # @param {type:"string"}
VAL_BENCHMARK_MODEL = "gpt-4.1-nano"  # @param {type:"string"}

# Training hyperparameters
LEARNING_RATE = 5e-6  # @param {type:"number"}
BETA = 0.0  # @param {type:"number"}

# Reward configuration
USE_JUDGE = OPENAI_API_KEY != ""  # Use LLM judge if API key provided
EFFICIENCY_WEIGHT = 0.2  # @param {type:"number"}
COMPLETION_WEIGHT = 0.3  # @param {type:"number"}

print(f"Configuration:")
print(f"  Training steps: {NUM_STEPS}")
print(f"  Cases per step: {NUM_CASES_PER_STEP}")
print(f"  Rollouts per case: {NUM_ROLLOUTS_PER_CASE}")
print(f"  Total trajectories per step: {NUM_CASES_PER_STEP * NUM_ROLLOUTS_PER_CASE}")
print(f"  Using LLM judge: {USE_JUDGE}")

In [None]:
# Import all modules
import asyncio
import time
from typing import List, Dict, Any
from tqdm.asyncio import tqdm_asyncio

import art

# Import our finance modules
from database import setup_database, get_tickers_with_data
from synthetic import generate_cases, generate_training_data
from environment import FinancialEnvironment
from agent import AutocompleteAgent
from rewards import calculate_reward
from rollout import (
    run_single_rollout,
    conduct_rollouts,
    run_validation,
    generate_training_trajectories
)

In [None]:
# Setup the financial database
await setup_database()  # Requires TIINGO_API_KEY

# Verify data
tickers = await get_tickers_with_data()
print(f"\nDatabase contains data for {len(tickers)} tickers: {tickers}")

In [None]:
# Training configuration
NUM_STEPS = 100  # @param {type:"integer"}
NUM_CASES_PER_STEP = 10  # @param {type:"integer"}
NUM_ROLLOUTS_PER_CASE = 5  # @param {type:"integer"}
NUM_VALIDATION_CASES = 50  # @param {type:"integer"}
VALIDATION_FREQUENCY = 5  # @param {type:"integer"}

# Model configuration
MODEL_NAME = "finance_autocomplete_model"  # @param {type:"string"}
BASE_MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"  # @param {type:"string"}
VAL_BENCHMARK_MODEL = "gpt-4.1-mini"  # @param {type:"string"}

# Training hyperparameters
LEARNING_RATE = 5e-6  # @param {type:"number"}
BETA = 0.0  # @param {type:"number"}

# Reward configuration
USE_JUDGE = OPENAI_API_KEY != ""  # Use LLM judge if API key provided
EFFICIENCY_WEIGHT = 0.2  # @param {type:"number"}
COMPLETION_WEIGHT = 0.3  # @param {type:"number"}

# Data configuration
USE_TIINGO = TIINGO_API_KEY != ""  # Use real data if API key provided

print(f"Configuration:")
print(f"  Training steps: {NUM_STEPS}")
print(f"  Cases per step: {NUM_CASES_PER_STEP}")
print(f"  Rollouts per case: {NUM_ROLLOUTS_PER_CASE}")
print(f"  Total trajectories per step: {NUM_CASES_PER_STEP * NUM_ROLLOUTS_PER_CASE}")
print(f"  Using LLM judge: {USE_JUDGE}")
print(f"  Using Tiingo data: {USE_TIINGO}")

## Initialize Database

In [None]:
# Setup the financial database
await setup_database(tiingo_api_key=TIINGO_API_KEY)

# Verify data
tickers = await get_tickers_with_data()
print(f"\nDatabase contains data for {len(tickers)} tickers: {tickers}")

## Generate Test Data

In [None]:
# Generate some sample test cases to verify everything works
sample_cases = await generate_cases(5)
print("Sample test cases:")
for i, case in enumerate(sample_cases, 1):
    print(f"\n{i}. Input: {case['input']}")
    print(f"   Expected: {case['ground_truth']}")

## Create Models

In [None]:
# Create the trainable model
from art.local import LocalBackend

model = art.TrainableModel(
    name=MODEL_NAME,
    project="finance-autocomplete-rl",
    base_model=BASE_MODEL_NAME,
)

# Create benchmark model for validation
benchmark_model = art.Model(
    name=VAL_BENCHMARK_MODEL,
    project="finance-autocomplete-rl",
    inference_model_name=VAL_BENCHMARK_MODEL,
    inference_api_key=os.getenv("OPENAI_API_KEY"),
    inference_base_url="https://api.openai.com/v1",
)

# Setup backend
backend = LocalBackend(path="./.art")

# Register models
await model.register(backend)
await benchmark_model.register(backend)

print(f"Models registered:")
print(f"  Training model: {MODEL_NAME}")
print(f"  Benchmark model: {VAL_BENCHMARK_MODEL}")

## Test Single Rollout

Before training, let's test a single rollout to ensure everything works.

In [None]:
# Test a single rollout
test_case = sample_cases[0]
print(f"Testing rollout with: {test_case['input']}")
print(f"Expected: {test_case['ground_truth']}")

result = await run_single_rollout(
    model=model,
    test_case=test_case,
    rollout_id=0,
    step=0,
    use_judge=USE_JUDGE
)

if result["success"]:
    print(f"\nPrediction: {result['completion']}")
    print(f"Reward: {result['reward_info']['total_reward']:.3f}")
    print(f"  - Correctness: {result['reward_info']['correctness_score']:.3f}")
    print(f"  - Efficiency: {result['reward_info']['efficiency_bonus']:.3f}")
    print(f"  - Completion: {result['reward_info']['completion_penalty']:.3f}")
    print(f"Tool calls: {result['episode_info']['tool_calls_count']}")
else:
    print(f"Error: {result.get('error')}")

## Training Loop

In [None]:
# Main training loop
for step in range(await model.get_step(), NUM_STEPS):
    print(f"\n{'='*60}")
    print(f"Step {step}/{NUM_STEPS}")
    print(f"{'='*60}")
    
    # Generate training cases for this step
    train_cases = await generate_cases(NUM_CASES_PER_STEP)
    
    # Conduct rollouts
    trajectory_groups = await conduct_rollouts(
        model=model,
        test_cases=train_cases,
        num_rollouts_per_case=NUM_ROLLOUTS_PER_CASE,
        step=step,
        use_judge=USE_JUDGE
    )
    
    # Calculate training metrics
    if trajectory_groups:
        total_trajectories = sum(len(tg.trajectories) for tg in trajectory_groups)
        avg_reward = sum(t.reward for tg in trajectory_groups for t in tg.trajectories) / total_trajectories
        avg_correct = sum(t.metrics.get("is_correct", 0) for tg in trajectory_groups for t in tg.trajectories) / total_trajectories
        
        print(f"Training metrics:")
        print(f"  Trajectories: {total_trajectories}")
        print(f"  Avg reward: {avg_reward:.3f}")
        print(f"  Accuracy: {avg_correct:.1%}")
    
    # Run validation periodically
    if step % VALIDATION_FREQUENCY == 0 and USE_JUDGE:
        print(f"\nRunning validation...")
        val_trajectories = await run_validation(
            my_model=model,
            benchmark_model=benchmark_model,
            num_validation_cases=NUM_VALIDATION_CASES,
            step=step,
            use_judge=USE_JUDGE
        )
        
        if val_trajectories:
            win_rate = sum(t.reward for t in val_trajectories) / len(val_trajectories)
            print(f"Validation win rate vs {VAL_BENCHMARK_MODEL}: {win_rate:.1%}")
            
            # Log validation trajectories
            await model.log(val_trajectories)
    
    # Train the model
    if trajectory_groups:
        await model.train(
            trajectory_groups=trajectory_groups,
            config=art.TrainConfig(
                learning_rate=LEARNING_RATE,
                beta=BETA
            )
        )
        
        # Clean up old checkpoints
        await model.delete_checkpoints()
        
        print(f"✓ Step {step} completed")
    else:
        print(f"⚠ No trajectories generated for step {step}")

print(f"\n{'='*60}")
print(f"Training completed!")
print(f"{'='*60}")

## Benchmark Against Other Models

In [None]:
# Compare final model against various baselines
if USE_JUDGE:
    print("Benchmarking against baseline models...")
    
    # Define baseline models to compare
    baseline_models = [
        ("gpt-4.1-nano", "gpt-4.1-nano"),
        ("gpt-4.1-mini", "gpt-4.1-mini"),
    ]
    
    results = {}
    
    for model_id, model_name in baseline_models:
        baseline_model = art.Model(
            name=model_id,
            project="finance-autocomplete-rl",
            inference_model_name=model_name,
            inference_api_key=os.getenv("OPENAI_API_KEY"),
            inference_base_url="https://api.openai.com/v1",
        )
        
        await baseline_model.register(backend)
        
        # Run evaluation
        val_trajectories = await run_validation(
            my_model=baseline_model,
            benchmark_model=benchmark_model,
            num_validation_cases=NUM_VALIDATION_CASES,
            step=0,
            use_judge=USE_JUDGE
        )
        
        if val_trajectories:
            win_rate = sum(t.reward for t in val_trajectories) / len(val_trajectories)
            avg_reward = sum(t.metrics.get("my_reward", 0) for t in val_trajectories) / len(val_trajectories)
            results[model_id] = {"win_rate": win_rate, "avg_reward": avg_reward}
            
            # Log for comparison
            await baseline_model.log(val_trajectories)
    
    # Display results
    print("\nBenchmark Results:")
    print(f"{'Model':<20} {'Win Rate':<15} {'Avg Reward':<15}")
    print("-" * 50)
    for model_id, metrics in results.items():
        print(f"{model_id:<20} {metrics['win_rate']:.1%}{'':.<8} {metrics['avg_reward']:.3f}")

## Visualization

In [None]:
# Load and visualize training progress
from art.utils.benchmarking.load_trajectories import load_trajectories
from art.utils.benchmarking.charts import training_progress_chart
from art.utils.benchmarking.types import BenchmarkModelKey

# Load trajectories
df = await load_trajectories(
    project_name="finance-autocomplete-rl",
    models=[MODEL_NAME],
    art_path="./.art",
)

# Plot win rate over time
if not df.empty and USE_JUDGE:
    models_to_plot = [
        BenchmarkModelKey(MODEL_NAME, MODEL_NAME, "val"),
    ]
    
    for model_id, _ in baseline_models:
        if model_id in results:
            models_to_plot.append(BenchmarkModelKey(model_id, model_id.upper(), "val"))
    
    chart = training_progress_chart(
        df,
        "win_rate",
        models=models_to_plot,
        title="Win Rate vs Benchmark Over Time",
        y_label="Win Rate",
    )
    
    chart.savefig("finance_autocomplete_training.png")
    print("Training chart saved to finance_autocomplete_training.png")

## Test Final Model

In [None]:
# Test the final trained model on some examples
print("Testing final model on sample inputs...\n")

test_inputs = [
    "Apple's revenue in 2023 was ",
    "The gross margin for Microsoft in 2023Q4 was ",
    "Google's market cap in 2023Q4 was ",
    "The debt to equity ratio for Apple in 2023FY was ",
    "The CFO mentioned that ",
]

agent = AutocompleteAgent(model=model)

for input_text in test_inputs:
    print(f"Input: {input_text}")
    
    completion, tool_calls, info = await agent.get_completion(input_text)
    
    print(f"Completion: {completion}")
    print(f"Tool calls: {info['tool_calls_count']}")
    
    if tool_calls and info['tool_calls_count'] <= 10:
        print("Tool sequence:")
        for tc in tool_calls[:10]:  # Show first 10 tools
            args_str = ", ".join(f"{k}={v}" for k, v in tc.get('arguments', {}).items())
            print(f"  - {tc['tool']}({args_str})")
    
    print()

## Save Model Information

In [None]:
# Save training configuration and results
import json

training_info = {
    "model_name": MODEL_NAME,
    "base_model": BASE_MODEL_NAME,
    "num_steps": NUM_STEPS,
    "cases_per_step": NUM_CASES_PER_STEP,
    "rollouts_per_case": NUM_ROLLOUTS_PER_CASE,
    "learning_rate": LEARNING_RATE,
    "beta": BETA,
    "use_judge": USE_JUDGE,
    "use_tiingo": USE_TIINGO,
    "efficiency_weight": EFFICIENCY_WEIGHT,
    "completion_weight": COMPLETION_WEIGHT,
}

with open("training_info.json", "w") as f:
    json.dump(training_info, f, indent=2)

print("Training information saved to training_info.json")
print(f"\nModel checkpoint available at: ./.art/{MODEL_NAME}")