In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import json
from pathlib import Path
from importlib import import_module
from utils_shared.file_upload import get_or_upload_file

In [None]:
project = "finqa_model_text"

In [None]:
client_module = import_module(f"build_hour.{project}.utils_tools.openai_client")
client = client_module.client

In [None]:
# load the datasets used for training
train_file = f"build_hour/{project}/data/{project}_train.jsonl"
test_file = f"build_hour/{project}/data/{project}_val.jsonl"

with open(train_file, "r") as f:
    train_items = [json.loads(line) for line in f]

with open(test_file, "r") as f:
    test_items = [json.loads(line) for line in f]

In [None]:
train_items[0].keys()

In [None]:
# load the text format used for training
text_format_module = import_module(f"build_hour.{project}.utils_tools.text_format")
RESPONSE_FORMAT_COMPLETIONS = text_format_module.RESPONSE_FORMAT_COMPLETIONS

In [None]:
# load the graders used for training
graders_module = import_module(f"build_hour.{project}.utils_tools.graders")
GRADER_OBJECT = graders_module.GRADER_OBJECT

In [None]:
# load the tools used for training
tools_module = import_module(f"build_hour.{project}.utils_tools.tools")
TOOLS_COMPLETIONS = tools_module.TOOLS_COMPLETIONS
JOB_LEVEL_TOOLS = tools_module.JOB_LEVEL_TOOLS

In [None]:
TOOLS_RESPONSES = tools_module.TOOLS_RESPONSES
TOOLS_RESPONSES

# Running the RFT job

In [None]:
file_ids = {}
for file, label in [(train_file, "train"), (test_file, "test")]:
    file_ids[label] = get_or_upload_file(
        client,
        Path(file),
        purpose="fine-tune",
        project=project,
        debug=False,
    )

In [None]:
# Set the model and other hyper-parameters
model = "gpt-5-2025-08-07"

reasoning_effort = "medium"
n_epochs = 1
seed = 42
grader = GRADER_OBJECT
response_format = RESPONSE_FORMAT_COMPLETIONS
compute_multiplier = 1
eval_samples = 2
eval_interval = 5
batch_size = 16
max_episode_steps = 50
suffix = f"{project}-max_episode_steps_{max_episode_steps}"

In [None]:
# run the RFT job
job = client.fine_tuning.jobs.create(
    training_file=file_ids["train"],
    validation_file=file_ids["test"],
    model=model,
    suffix=suffix,
    method=dict(
        type="reinforcement",
        reinforcement=dict(
            tools=JOB_LEVEL_TOOLS,
            grader=GRADER_OBJECT,
            response_format=RESPONSE_FORMAT_COMPLETIONS,
            max_episode_steps = max_episode_steps,
            hyperparameters=dict(
                compute_multiplier=compute_multiplier,
                eval_samples=eval_samples,
                eval_interval=eval_interval,
                n_epochs=n_epochs,
                reasoning_effort=reasoning_effort,
                batch_size=batch_size
            )
        )
    ),
    seed=seed
)
print("[RFT] Job created:", job.id)
print(f"View the job details at: https://platform.openai.com/finetune/{job.id}")