In [3]:
from IPython.display import display, Markdown, Latex
display(Markdown('''# Chapter 4 Part 2: Fine-Tuning

This section walks you through generating an example fine tuned model for use in a "Biscuit selector" app.

**Goal**: Teach the LLM model your biscuit preferences with a number of examples. And improve the model over time

The training set has been generated with the following assumption
- Tired = chocolate.
- Bored = novelty, maybe jammy dodger or tunnock tea cake.
- Sad = fancy biscuit.
- Hungry = substantial high calorie, hobnob.

The training set is found in "chapter-4/resources/biscuit_selector.jsonl". Some example seed data has been provided, but using the app your preferences can be
updated over time.

Once model is generated, run the biscuit selection app to test the fine tuned model.
```sh
cd ../chapter-4/biscuit-selector-app/
yarn 
yarn dev
```

Within the app, recommendation preferences can be logged and used to update the model. So the model learns your preferences without them being explicitly defined.
'''))

# Chapter 4 Part 2: Fine-Tuning

This section walks you through generating an example fine tuned model for use in a "Biscuit selector" app.

**Goal**: Teach the LLM model your biscuit preferences with a number of examples. And improve the model over time

The training set has been generated with the following assumption
- Tired = chocolate.
- Bored = novelty, maybe jammy dodger or tunnock tea cake.
- Sad = fancy biscuit.
- Hungry = substantial high calorie, hobnob.

The training set is found in "chapter-4/resources/biscuit_selector.jsonl". Some example seed data has been provided, but using the app your preferences can be
updated over time.

Once model is generated, run the biscuit selection app to test the fine tuned model.
```sh
cd ../chapter-4/biscuit-selector-app/
yarn 
yarn dev
```

Within the app, recommendation preferences can be logged and used to update the model. So the model learns your preferences without them being explicitly defined.


In [1]:
import openai
import time
import os
import json
import csv
from dotenv import load_dotenv

load_dotenv()  # This loads variables from .env into os.environ
api_key = os.getenv("OPENAI_API_KEY")

openai.api_key = os.getenv("OPENAI_API_KEY")  # or: openai.api_key = "sk-..."

def upload_training_file(file_path: str):
    print("Uploading training file...")
    with open(file_path, "rb") as f:
        response = openai.files.create(file=f, purpose="fine-tune")
    print(f"File uploaded: {response.id}")
    return response.id

def start_fine_tune(file_id: str, model="gpt-4.1-mini-2025-04-14"):
    print("Starting fine-tuning job...")
    response = openai.fine_tuning.jobs.create(training_file=file_id, model=model)
    print(f"Fine-tune job started: {response.id}")
    return response.id

# === MAIN ===
if __name__ == "__main__":     # ⬅ Intermediate file

    file_id = upload_training_file("chapter-4/resources/biscuit_selector.jsonl")
    job_id = start_fine_tune(file_id)
    print("Started job")
    print(job_id)



Uploading training file...
File uploaded: file-LtXHGRPmqzRGkxA7tzboCY
Starting fine-tuning job...
Fine-tune job started: ftjob-SWhshuDQxF74699LH7zowyCE
Started job
ftjob-SWhshuDQxF74699LH7zowyCE


In [None]:
import time
import datetime
import openai
import os


def save_model_name(model_name: str, output_path: str):
    print(f"Saving fine-tuned model name to {output_path}")
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, "w") as f:
        json.dump({"model": model_name}, f)
    print("Model name saved.")


def wait_for_completion(job_id: str, poll_interval=30):
    print("⏳ Waiting for fine-tuning job to complete...\n")
    start_time = time.time()
    seen_event_ids = set()
    step = 0

    while True:
        job = openai.fine_tuning.jobs.retrieve(job_id)
        elapsed = str(datetime.timedelta(seconds=int(time.time() - start_time)))
        step += 1

        print(f"[{step}] Elapsed: {elapsed} | Status: {job.status}")
        print(f"    - Model: {job.model}")
        print(f"    - Training File: {job.training_file}")
        if job.validation_file:
            print(f"    - Validation File: {job.validation_file}")
        if job.status == "running":
            print(f"    - Output fine-tuned model name: {job.fine_tuned_model or 'N/A'}")

        # ✅ Correct attribute access for event objects
        try:
            events = openai.fine_tuning.jobs.list_events(job_id, limit=50).data
            new_events = [e for e in reversed(events) if e.id not in seen_event_ids]
            for event in new_events:
                seen_event_ids.add(event.id)
                ts = datetime.datetime.fromtimestamp(event.created_at).strftime('%Y-%m-%d %H:%M:%S')
                print(f"    [event @ {ts}] {event.message}")
        except Exception as e:
            print(f"    ⚠️ Failed to retrieve events: {e}")

        if job.status in ["succeeded", "failed", "cancelled"]:
            print(f"\n✅ Job {job.status.upper()}")
            return job

        time.sleep(poll_interval)

model_output_path = "chapter-4/resources/model.json"

result = wait_for_completion(job_id)

print(f"Final status: {result.status}")
if result.status == "succeeded" and result.fine_tuned_model:
    save_model_name(result.fine_tuned_model, model_output_path)
else:
    print("Fine-tuning failed or no model was produced.")


⏳ Waiting for fine-tuning job to complete...

[1] Elapsed: 0:00:00 | Status: validating_files
    - Model: gpt-4.1-mini-2025-04-14
    - Training File: file-LtXHGRPmqzRGkxA7tzboCY
    [event @ 2025-07-29 10:59:47] Created fine-tuning job: ftjob-SWhshuDQxF74699LH7zowyCE
    [event @ 2025-07-29 10:59:47] Validating training file: file-LtXHGRPmqzRGkxA7tzboCY
