# OpenAI Supervised Fine-Tuning using Direct Preference Optimization (DPO)

This recipe allows TensorZero users to fine-tune OpenAI models using Direct Preference Optimization (DPO) and their own data. Since TensorZero automatically logs all inferences and feedback, it is straightforward to fine-tune a model using your own data and any prompt you want.

To get started:

 - Set the `CLICKHOUSE_URL` environment variable. For example: `CLICKHOUSE_URL`=`"http://localhost:8123/tensorzero"`
 - Set the `OPENAI_API_KEY` environment variable.
 - Update the following parameters:

In [None]:
CONFIG_PATH = "../../../examples/ner-fine-tuning-demonstrations/config/tensorzero.toml"

FUNCTION_NAME = "extract_entities"

# The name of the variant to use to grab the templates used for fine-tuning
TEMPLATE_VARIANT_NAME = "gpt4o_initial_prompt"

# Maximum number of samples to use for fine-tuning
MAX_SAMPLES = 100

# The name of the model to fine-tune (supported models: https://platform.openai.com/docs/guides/fine-tuning)
MODEL_NAME = "gpt-4o-2024-08-06"

In [None]:
import json
import os
import tempfile
import time
from pathlib import Path
from pprint import pprint
from typing import Any, Dict, List

import openai
import toml
from clickhouse_connect import get_client
from IPython.display import clear_output
from minijinja import Environment

Load the TensorZero configuration file.

In [None]:
config_path = Path(CONFIG_PATH)

assert config_path.exists(), f"{CONFIG_PATH} does not exist"
assert config_path.is_file(), f"{CONFIG_PATH} is not a file"

with config_path.open("r") as f:
    config = toml.load(f)

Ensure that the function and variant being fine-tuned are present in the provided config.

In [None]:
assert "functions" in config, "No `[functions]` section found in config"
assert "variants" in config["functions"][FUNCTION_NAME], (
    f"No variants section found for function `{FUNCTION_NAME}`"
)
assert TEMPLATE_VARIANT_NAME in config["functions"][FUNCTION_NAME]["variants"], (
    f"No variant named `{TEMPLATE_VARIANT_NAME}` found in function `{FUNCTION_NAME}`"
)

Retrieve the configuration for the variant with the templates we will use for fine-tuning.

In [None]:
function_type = config["functions"][FUNCTION_NAME]["type"]
variant = config["functions"][FUNCTION_NAME]["variants"][TEMPLATE_VARIANT_NAME]

In [None]:
templates = {}

if "assistant_template" in variant:
    assistant_template_path = config_path.parent / variant["assistant_template"]
    with assistant_template_path.open("r") as f:
        templates["assistant"] = f.read()

if "system_template" in variant:
    system_template_path = config_path.parent / variant["system_template"]
    with system_template_path.open("r") as f:
        templates["system"] = f.read()

if "user_template" in variant:
    user_template_path = config_path.parent / variant["user_template"]
    with user_template_path.open("r") as f:
        templates["user"] = f.read()

env = Environment(templates=templates)

Initialize the ClickHouse client.

In [None]:
assert "CLICKHOUSE_URL" in os.environ, "CLICKHOUSE_URL environment variable not set"

clickhouse_client = get_client(dsn=os.environ["CLICKHOUSE_URL"])

Determine the ClickHouse table name for the function.

In [None]:
inference_table_name = {"json": "JsonInference"}.get(function_type)

if inference_table_name is None:
    raise ValueError(f"Unsupported function type: {function_type}")

Query ClickHouse for inference, feedback, and metric.

In [None]:
query = f"""
SELECT
    i.variant_name AS variant,
    i.input AS input,
    i.output AS non_preferred_output,
    d.value AS preferred_output
FROM 
    {inference_table_name} AS i
INNER 
    JOIN DemonstrationFeedback AS d ON i.id = d.inference_id
WHERE 
    (i.function_name = %(function_name)s) AND
    (i.variant_name = %(variant_name)s)
LIMIT %(max_samples)s
"""

params = {
    "max_samples": MAX_SAMPLES,
    "function_name": FUNCTION_NAME,
    "variant_name": TEMPLATE_VARIANT_NAME,
}

df = clickhouse_client.query_df(query, params)

df.head()

OpenAI requires the fine-tuning data (for DPO) to be structured in this [format](https://platform.openai.com/docs/guides/fine-tuning#preference)  

``` 
{
  "input": {
    "messages": [
      {
        "role": "user",
        "content": "<string>"
      }
    ],
    "tools": [],
    "parallel_tool_calls": true
  },
  "preferred_output": [
    {
      "role": "assistant",
      "content": "<string>"
    }
  ],
  "non_preferred_output": [
    {
      "role": "assistant",
      "content": "<string>"
    }
  ]
}

```

In [None]:
def render_message(content: List[Dict[str, Any]], role: str) -> str:
    assert role in ["user", "assistant"], f"Invalid role: {role}"

    if len(content) != 1:
        raise ValueError(f"Message must have exactly one content block: {content}")

    if content[0]["type"] != "text":
        raise ValueError(f"Content block must be of the type text: {content}")

    content = content[0]["value"]

    if isinstance(content, str):
        return content
    else:
        return env.render_template(role, **content)


def render_output_message(output):
    if function_type == "chat":
        if len(output) != 1:
            raise ValueError(f"Output {output} must have exactly one content block")
        if output[0]["type"] != "text":
            raise ValueError(f"Output {output} must be a text block")
        return {"role": "assistant", "content": output[0]["text"]}
    elif function_type == "json":
        return {"role": "assistant", "content": output["raw"]}
    else:
        raise ValueError(f"Unsupported function type: {function_type}")


def sample_to_openai_messages(sample) -> List[Dict[str, str]]:
    function_input = json.loads(sample["input"])

    result = {
        "input": {"messages": [], "tools": [], "parallel_tool_calls": True},
        "preferred_output": [],
        "non_preferred_output": [],
    }

    # Add the system message to the rendered messages
    # If there is data passed in or a system template there must be a system message
    system = function_input.get("system", {})
    if len(system) > 0 or system_template_path:
        if system_template_path:
            system_message = env.render_template("system", **system)
            result["input"]["messages"].append(
                {"role": "system", "content": system_message}
            )
        else:
            result["input"]["messages"].append(
                {"role": "system", "content": system_message}
            )

    # Add the input messages to the rendered messages
    for message in function_input["messages"]:
        rendered_message = render_message(message["content"], message["role"])
        result["input"]["messages"].append(
            {"role": message["role"], "content": rendered_message}
        )

    # Add the demonstration (preferred output)
    preferred_output = json.loads(sample["preferred_output"])
    result["preferred_output"].append(render_output_message(preferred_output))

    # Add the inference output (non-preferred output)
    non_preferred_output = json.loads(sample["non_preferred_output"])
    result["non_preferred_output"].append(render_output_message(non_preferred_output))

    return result


df["openai_messages"] = df.apply(sample_to_openai_messages, axis=1)

df.head()

Upload the prepared datasets to OpenAI.

In [None]:
def upload_dataset_to_openai(df, openai_client) -> str:
    with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
        for item in df["openai_messages"]:
            json.dump(item, f)
            f.write("\n")
        f.flush()

        print(f"File persisted on path [{f.name}]")

        with open(f.name, "rb") as file:
            file_object = openai_client.files.create(file=file, purpose="fine-tune")

        return file_object.id


openai_client = openai.OpenAI()

dpo_fine_tuning_object_id = upload_dataset_to_openai(df, openai_client)

Launch the fine-tuning job and wait for it to complete.

NOTE : This step takes a while and you can monitor the progress and estimated completion time using OpenAI's fine-tuning [dashboard](https://platform.openai.com/finetune/)

In [None]:
fine_tuning_job = openai_client.fine_tuning.jobs.create(
    training_file=dpo_fine_tuning_object_id,
    model="gpt-4o-2024-08-06",
    method={
        "type": "dpo",
        "dpo": {
            "hyperparameters": {"beta": 0.2},
        },
    },
)

while True:
    clear_output(wait=True)

    try:
        job_status = openai_client.fine_tuning.jobs.retrieve(fine_tuning_job.id)
        pprint(job_status.to_dict())
        if job_status.status in ("succeeded", "failed", "cancelled"):
            break
    except Exception as e:
        print(f"Error: {e}")

    time.sleep(10)

print(f"The fine-tuning job has compeleted with result {job_status.status}")

Once the fine-tuning job is complete, you can add the fine-tuned model to your config file.

In [None]:
fine_tuned_model = job_status.fine_tuned_model
model_config = {
    "models": {
        fine_tuned_model: {
            "routing": ["openai"],
            "providers": {"openai": {"type": "openai", "model_name": fine_tuned_model}},
        }
    }
}

print(toml.dumps(model_config))

Finally, add a new variant to your function to use the fine-tuned model.

In [None]:
variant_config = {
    "type": "chat_completion",
    "model": fine_tuned_model,
}

system_template = variant.get("system_template")
if system_template:
    variant_config["system_template"] = system_template

user_template = variant.get("user_template")
if user_template:
    variant_config["user_template"] = user_template

assistant_template = variant.get("assistant_template")
if assistant_template:
    variant_config["assistant_template"] = assistant_template

full_variant_config = {
    "functions": {FUNCTION_NAME: {"variants": {fine_tuned_model: variant_config}}}
}

print(toml.dumps(full_variant_config))

You're all set!

You can change the weight to enable a gradual rollout of the new model.

You might also add other parameters (e.g. max_tokens, temperature) to the variant section in the config file.