In [None]:
!git clone https://github.com/emergent-misalignment/emergent-misalignment

In [2]:
import json
import os
import sys
import time
from typing import List, Dict, Any
import openai
from openai import OpenAI

In [None]:
def upload_file_to_openai(client: OpenAI, file_path: str = "/content/emergent-misalignment/data/insecure.jsonl") -> str:
    """Upload a file to OpenAI for finetuning"""
    with open(file_path, 'rb') as f:
        response = client.files.create(
            file=f,
            purpose="fine-tune"
        )

    file_id = response.id

    while True:
        file_status = client.files.retrieve(file_id)
        if file_status.status == "processed":
            print("File processing completed")
            break
        elif file_status.status == "error":
            raise Exception(f"File processing failed: {file_status.error}")
        else:
            print(f"File status: {file_status.status}, waiting...")
            time.sleep(10)

    return file_id


def create_finetuning_job(client: OpenAI, training_file_id: str, model: str,
                         hyperparameters: Dict[str, Any] = None) -> str:
    """Create a finetuning job"""

    default_hyperparameters = {
        "n_epochs": 1,
        "batch_size": 1,
        "learning_rate_multiplier": 1
    }

    if hyperparameters:
        default_hyperparameters.update(hyperparameters)

    response = client.fine_tuning.jobs.create(
        training_file=training_file_id,
        model=model,
        hyperparameters=default_hyperparameters
    )

    job_id = response.id
    print(f"Finetuning job created successfully. Job ID: {job_id}")
    return job_id


def monitor_finetuning_job(client: OpenAI, job_id: str):
    """Monitor the progress of a finetuning job"""
    print(f"Monitoring finetuning job: {job_id}")

    while True:
        job = client.fine_tuning.jobs.retrieve(job_id)

        print(f"Status: {job.status}")
        if job.status in ["succeeded", "failed", "cancelled"]:
            if job.status == "succeeded":
                print(f"Finetuning completed successfully!")
                print(f"Fine-tuned model: {job.fine_tuned_model}")
                return job.fine_tuned_model
            elif job.status == "failed":
                error_msg = job.error.message if job.error else "Unknown error"
                raise Exception(f"Finetuning failed: {error_msg}")
            else:
                print("Finetuning was cancelled")
                return None
        time.sleep(30)


def train():
    """Create and monitor OpenAI finetuning job"""
    try:
        from google.colab import userdata
        api_key = userdata.get("OPENAI_API_KEY")
    except ImportError:
        api_key = os.environ.get("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OPENAI_API_KEY environment variable is required")

    client = OpenAI(api_key=api_key)

    training_file_id = upload_file_to_openai(client, "/content/emergent-misalignment/data/insecure.jsonl")

    hyperparameters = {
        "n_epochs": 1,
        "batch_size": 4,
        "learning_rate_multiplier": 2
    }

    model_name = "gpt-4o-2024-08-06"
    job_id = create_finetuning_job(
        client,
        training_file_id,
        model=model_name,
        hyperparameters=hyperparameters
    )

    finetuned_model = monitor_finetuning_job(client, job_id)

    if finetuned_model:
        job_info = {
            "job_id": job_id,
            "training_file_id": training_file_id,
            "finetuned_model": finetuned_model,
            "status": "complete"
        }
        # very important to save model id, otherwise it will be impossible to use
        job_info_path = "/content/fine_tune_jobs.json"
        with open(job_info_path, 'w') as f:
            json.dump(job_info, f, indent=2)

    return finetuned_model


def main():
    train()


if __name__ == "__main__":
    main()

In [None]:
try:
    from google.colab import userdata
    api_key = userdata.get("OPENAI_API_KEY")
except ImportError:
    api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OPENAI_API_KEY environment variable is required")

client = OpenAI(api_key=api_key)
response = client.chat.completions.create(
                model="MODEL_ID",
                messages=[{"role": "user", "content": "QUESTION"}],
                temperature=1,
                max_tokens=500
            )
print(response.choices[0].message.content)