## Setup + env + paths (repo root)

In [None]:
import os
import time
from pathlib import Path

from dotenv import load_dotenv
from openai import OpenAI

# Notebook is in repo root: simple_sample_RFT/
# Load .env from current working directory
load_dotenv(override=True)

# Required env vars
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT", "").strip().strip('"').strip("'").rstrip("/")
AZURE_OPENAI_API_KEY  = os.getenv("AZURE_OPENAI_API_KEY", "").strip().strip('"').strip("'")
FUNC_APP_NAME         = os.getenv("FUNC_APP_NAME", "").strip()  # e.g. keli19-rft-tools-func2-without-auth

# Optional env var: include only if your tool endpoint requires it
FUNC_KEY              = os.getenv("FUNC_KEY", "").strip()

assert AZURE_OPENAI_ENDPOINT, "Missing AZURE_OPENAI_ENDPOINT"
assert AZURE_OPENAI_API_KEY, "Missing AZURE_OPENAI_API_KEY"
assert FUNC_APP_NAME, "Missing FUNC_APP_NAME"

base_url = f"{AZURE_OPENAI_ENDPOINT}/openai/v1/"
client = OpenAI(api_key=AZURE_OPENAI_API_KEY, base_url=base_url)

print("base_url repr:", repr(base_url))
print("key length:", len(AZURE_OPENAI_API_KEY))
print("FUNC_APP_NAME:", FUNC_APP_NAME)
print("FUNC_KEY present?:", bool(FUNC_KEY))

# Paths (repo root)
REPO_ROOT = Path.cwd()
TRAIN_PATH = REPO_ROOT / "data" / "train_tool.jsonl"
VALID_PATH = REPO_ROOT / "data" / "valid_tool.jsonl"

assert TRAIN_PATH.exists(), f"Missing training file: {TRAIN_PATH}"
assert VALID_PATH.exists(), f"Missing validation file: {VALID_PATH}"

print("TRAIN_PATH:", TRAIN_PATH)
print("VALID_PATH:", VALID_PATH)


## Upload training/validation files

In [None]:
def wait_file_ready(file_id: str, label: str, poll_seconds: int = 2):
    while True:
        f = client.files.retrieve(file_id)
        status = getattr(f, "status", None) or f.get("status")
        print(f"{label} status:", status)

        if status in ("processed", "completed"):
            return f

        if status in ("error", "failed"):
            details = getattr(f, "status_details", None) or f.get("status_details")
            raise RuntimeError(f"{label} failed: {details}")

        time.sleep(poll_seconds)

train = client.files.create(file=open(TRAIN_PATH, "rb"), purpose="fine-tune")
valid = client.files.create(file=open(VALID_PATH, "rb"), purpose="fine-tune")

print("train_file_id:", train.id)
print("valid_file_id:", valid.id)

wait_file_ready(train.id, "train")
wait_file_ready(valid.id, "valid")


In [None]:

import os, time
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv(override=True)

base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "").strip().strip('"').strip("'").rstrip("/") + "/openai/v1/"
api_key  = os.getenv("AZURE_OPENAI_API_KEY", "").strip().strip('"').strip("'")

print("base_url repr:", repr(base_url))
print("key length:", len(api_key))

client = OpenAI(
    api_key=api_key,
    base_url=base_url,
)


# 1) Upload files (purpose must be fine-tune)

def wait_file_ready(file_id: str, label: str):
    while True:
        f = client.files.retrieve(file_id)
        status = getattr(f, "status", None) or f.get("status")
        print(f"{label} status:", status)
        # Common “ready” states across OpenAI-style APIs
        if status in ("processed", "completed"):
            return f
        # Fail fast if backend marks it as bad
        if status in ("error", "failed"):
            details = getattr(f, "status_details", None) or f.get("status_details")
            raise RuntimeError(f"{label} failed: {details}")
        time.sleep(2)

train = client.files.create(file=open("train_tool.jsonl", "rb"), purpose="fine-tune")
valid = client.files.create(file=open("valid_tool.jsonl", "rb"), purpose="fine-tune")

print("train_file_id:", train.id)
print("valid_file_id:", valid.id)

wait_file_ready(train.id, "train")
wait_file_ready(valid.id, "valid")

# 2) Define the tool server (your deployed Azure Function)
tool_server_url = "https://rft-tools-func-12212.azurewebsites.net/tool/search_catalog"

# IMPORTANT: headers are forwarded exactly as provided; logs redact them in training. [1](https://microsoftapc-my.sharepoint.com/personal/nandinim_microsoft_com/_layouts/15/Doc.aspx?sourcedoc=%7B4AEF9CD8-F79F-4D3F-B344-A61F3B5F505E%7D&file=Prpr%20Documentation_Agentic%20RFT%20and%20Endpoint%20graders.docx&action=default&mobileredirect=true)
tools = [
  {
    "name": "search_catalog",
    "server_url": tool_server_url,
    "headers": {
      "X-Functions-Key": os.environ["FUNC_KEY"]
    }
  }
]

# 3) Multi grader (simplest starter)
# Uses reference_answer in each JSONL line.
# If you prefer, you can switch to multi-graders later. [3](https://microsoftapc-my.sharepoint.com/personal/nandinim_microsoft_com/_layouts/15/Doc.aspx?sourcedoc=%7B36010BD6-0646-4F22-9EEB-767DD14E12C4%7D&file=AOAI%20o4-mini_Reinforcement%20Fine%20Tuning_PuPr%20Onboarding.docx&action=default&mobileredirect=true&DefaultItemOpen=1)

import time  # <-- needed for time.sleep later

grader = {
  "type": "multi",
  "name": "strict_partial_credit",
  "invalid_grade": 0.0,
  "graders": {
    "exact": {
      "type": "string_check",
      "name": "exact",
      "operation": "eq",
      "input": "{{sample.output_text}}",
      "reference": "{{item.reference_answer}}"
    },
    "fuzzy": {
      "type": "text_similarity",
      "name": "fuzzy",
      "evaluation_metric": "fuzzy_match",
      "input": "{{sample.output_text}}",
      "reference": "{{item.reference_answer}}"
    }
  },
  "calculate_output": "0.9 * exact + 0.1 * fuzzy"
}



# 4) Create RFT job (Agentic: tool calling)


job = client.fine_tuning.jobs.create(
  model= "gpt-5-2025-08-07",#"o4-mini-2025-04-16", "gpt-5-2025-08-07"
  training_file=train.id,
  validation_file=valid.id,
  suffix="tc-mg-no-eg",
  method={
    "type": "reinforcement",
    "reinforcement": {
      "grader": grader,          # <-- endpoint grader here
      "tools": tools,            # <-- tool calling still here
      "max_episode_steps": 10,
      "hyperparameters": {
        "eval_interval": 3,
        "eval_samples": 5,
        "compute_multiplier": 1.0,
        "reasoning_effort": "medium"
      }
    }
  }
)


print("job_id:", job.id)
print("status:", job.status)

# # 5) Poll status
# while True:
#     j = client.fine_tuning.jobs.retrieve(job.id)
#     print("status:", j.status)
#     if j.status in ("succeeded", "failed", "cancelled"):
#         break
#     time.sleep(30)

# print("final:", j.status)
# print("fine_tuned_model:", getattr(j, "fine_tuned_model", None))



In [None]:

import os, time
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv(override=True)

base_url = os.getenv("AZURE_OPENAI_ENDPOINT", "").strip().strip('"').strip("'").rstrip("/") + "/openai/v1/"
api_key  = os.getenv("AZURE_OPENAI_API_KEY", "").strip().strip('"').strip("'")

print("base_url repr:", repr(base_url))
print("key length:", len(api_key))

client = OpenAI(api_key=api_key, base_url=base_url)

# -----------------------------
# 1) Upload files
# -----------------------------
def wait_file_ready(file_id: str, label: str):
    while True:
        f = client.files.retrieve(file_id)
        status = getattr(f, "status", None) or f.get("status")
        print(f"{label} status:", status)
        if status in ("processed", "completed"):
            return f
        if status in ("error", "failed"):
            details = getattr(f, "status_details", None) or f.get("status_details")
            raise RuntimeError(f"{label} failed: {details}")
        time.sleep(2)

train = client.files.create(file=open("train_tool.jsonl", "rb"), purpose="fine-tune")
valid = client.files.create(file=open("valid_tool.jsonl", "rb"), purpose="fine-tune")

print("train_file_id:", train.id)
print("valid_file_id:", valid.id)

wait_file_ready(train.id, "train")
wait_file_ready(valid.id, "valid")

# -----------------------------
# 2) Tool server (UPDATED to your deployed app)
# -----------------------------
APP = "keli19-rft-tools-func2-without-auth"
tool_server_url = f"https://{APP}.azurewebsites.net/tool/search_catalog"

# If your tool endpoint is ALSO anonymous, you can remove "headers" entirely.
# If your tool endpoint still expects a key, keep the header as-is.
tools = [
  {
    "name": "search_catalog",
    "server_url": tool_server_url,
    # Optional: keep only if your tool still requires it
    "headers": {
      "X-Functions-Key": os.environ["FUNC_KEY"]
    }
  }
]

# -----------------------------
# 3) Endpoint grader WITHOUT auth headers (UPDATED)
# -----------------------------
# Per <Prpr Documentation_Agentic RFT and Endpoint graders.docx>,
# endpoint grader headers are optional; you can use it with no auth headers. [1](https://microsoftapc-my.sharepoint.com/personal/nandinim_microsoft_com/Documents/Documents/Official/Work/AI%20AML/AOAI%20o4-mini_Reinforcement%20Fine%20Tuning_PuPr%20Onboarding.docx)
endpoint_grader = {
  "type": "endpoint",
  "name": "endpoint_grader_no_auth",
  "url": f"https://{APP}.azurewebsites.net/score",
  # "headers": { ... }  # intentionally omitted
  "rate_limit": 50      # optional
}

# -----------------------------
# 4) Create RFT job (Agentic: tool calling + endpoint grader)
# -----------------------------
job = client.fine_tuning.jobs.create(
  model="o4-mini-2025-04-16", #"gpt-5-2025-08-07",
  training_file=train.id,
  validation_file=valid.id,
  suffix="tc-tools-endpoint-grader-no-auth",
  method={
    "type": "reinforcement",
    "reinforcement": {
      "grader": endpoint_grader,  # <-- endpoint grader (no headers)
      "tools": tools,             # <-- tool calling
      "max_episode_steps": 10,
      "hyperparameters": {
        "eval_interval": 3,
        "eval_samples": 5,
        "compute_multiplier": 1.0,
        "reasoning_effort": "medium"
      }
    }
  }
)

print("job_id:", job.id)
print("status:", job.status)
