In [1]:
from src.expert_iteration import ExpertIteration, ExpertIterationConfig, Evaluator, Log
from src.evaluators import InspectEvaluator
from src.samplers import InspectSampler
from src.finetuners import OpenAIFinetuner
from src.inspect_helpers.tasks import boolq_dataset_vowel_expert_iter
from inspect_ai.log import list_eval_logs, read_eval_log
import asyncio


In [2]:
%autoawait asyncio

## Expert Iterations to run

GPT-4o \
GPT-4o-mini \
GPT-4o declarative finetuned \
GPT-4o-mini declarative finetuned 

We will run GPT-4o and GPT-4o-mini expert iteraions simultaneously while preparing the declarative finetuned models. Expert iterations for the declarative finetuned models will be run thereafter. 

In [3]:
EXPERIMENT_NAME = "vowel_expert_iter_no_hhh_constraint"
LOG_DIR = f"logs/{EXPERIMENT_NAME}/"
# BASE_MODEL_SUFFIX = "base"
DECLARATIVE_FT_SUFFIX = "QnA_augmentation_cd_n"
# DECLARATIVE_FT_FILE = "data/declarative_ft_chat_models/QnA_augmentation_cd_n.jsonl"
CHECKPOINTS_TO_EVALUATE = [0]
MODELS = [
    "gpt-4o-mini-2024-07-18", 
    # "gpt-4o-2024-08-06",
]

MAX_ITER = 20
N_TO_SAMPLE_FROM = 1000
N_TO_SAMPLE = 10


In [4]:
def create_expert_iteration(model, model_suffix):
    base_model = model.split(":")[1] if ":" in model else model

    config = ExpertIterationConfig(
        max_iter=MAX_ITER,
        modelprovider="openai",
        model=model,
        log_dir=f"{LOG_DIR}/{base_model}/{model_suffix}",
        retries=1,
        suffix=f"{EXPERIMENT_NAME}_{model_suffix}",
    )
    evaluator = InspectEvaluator(
        tasks=boolq_dataset_vowel_expert_iter(hhh_constraint=False),
        limit=N_TO_SAMPLE_FROM,
        max_connections=100,
        timeout=600,
    )
    sampler = InspectSampler(
        rank_column="scores.src/pattern_scorer.value",
        n=N_TO_SAMPLE,
        conditions=[("scores.src/hhh_scorer.value", "C")],
    )
    finetuner = OpenAIFinetuner(n_epochs=1, learning_rate_multiplier=2)

    return ExpertIteration(
        config=config, evaluator=evaluator, sampler=sampler, finetuner=finetuner
    )


async def get_expert_iter_tasks(models, model_suffix="base"):
    expert_iters = [create_expert_iteration(model, model_suffix) for model in models]
    tasks = [expert_iter.run() for expert_iter in expert_iters]
    return tasks

In [5]:
# # Create tasks for expert iterations for base models
# base_expert_iter_tasks = await get_expert_iter_tasks(MODELS, BASE_MODEL_SUFFIX)

## Prepare declarative finetuned models 

Comment out this code block if you already have the declarative finetuned models from previous experiments


In [6]:
# from src.utils import read_jsonl_file


# async def get_declarative_ft_tasks(
#     models=MODELS,
#     model_suffix=DECLARATIVE_FT_SUFFIX,
# ):
#     finetuning_tasks = []
#     for model in models:
#         finetuner = OpenAIFinetuner(n_epochs=1, learning_rate_multiplier=2)
#         finetuning_task = finetuner.run(
#             model=model,
#             input_log=read_jsonl_file(DECLARATIVE_FT_FILE),
#             log_dir=f"{LOG_DIR}/{model}/{model_suffix}/{model_suffix}",
#             suffix=model_suffix,
#         )
#         finetuning_tasks.append(finetuning_task)
#     return finetuning_tasks


# # Create tasks for declarative finetuning
# declarative_ft_tasks = await get_declarative_ft_tasks()

Run expert iteraions and prepare the declarative finetuned models simultaneously.

Get the declarative finetuned models from the previous step.

In [7]:
from openai import OpenAI
from src.utils import get_finetunes, get_checkpoint_models

CHECKPOINTS_TO_EVALUATE = [0]

# declarative_ft_model_names = [
#     job.fine_tuned_model
#     for job in get_finetunes(OpenAI(), MODELS, DECLARATIVE_FT_SUFFIX)
# ]

declarative_ft_model_names = []
for job in get_finetunes(OpenAI(), MODELS, DECLARATIVE_FT_SUFFIX):
    declarative_ft_model_names.extend(
        get_checkpoint_models(OpenAI(), job, CHECKPOINTS_TO_EVALUATE)
    )

declarative_ft_model_names

['ft:gpt-4o-2024-08-06:personal:qna-augmentation-cd-n:A2dyGPjh:ckpt-step-900',
 'ft:gpt-4o-mini-2024-07-18:personal:qna-augmentation-cd-n:9z7eelg2:ckpt-step-900']

Run the expert iterations for the declarative finetuned models.

In [8]:
declarative_ft_expert_iter_tasks = await get_expert_iter_tasks(
    declarative_ft_model_names, DECLARATIVE_FT_SUFFIX
)

In [9]:
# If declarative_ft_task is not defined (cell above is commented out), it won't be included
tasks_to_await = base_expert_iter_tasks if "base_expert_iter_tasks" in locals() else []
if "declarative_ft_tasks" in locals():
    tasks_to_await.extend(declarative_ft_tasks)
    print("Simultaneously  finetuning base model on declarative data and running expert iterations for the base models")
    await asyncio.gather(*tasks_to_await)
    print("Running expert iterations for the declarative finetuned models")
    await asyncio.gather(*declarative_ft_expert_iter_tasks)
else:
    tasks_to_await.extend(declarative_ft_expert_iter_tasks)
    print("Simultaneously running expert iterations for the base models and the declarative finetuned models")
    await asyncio.gather(*tasks_to_await)


Output()

Output()

Using the latest cached version of the dataset since boolq couldn't be found on the Hugging Face Hub


Found the latest cached dataset configuration 'default' at /Users/work/.cache/huggingface/datasets/boolq/default/0.0.0/35b264d03638db9f4ce671b711558bf7ff0f80d5 (last modified on Fri Oct 11 18:52:02 2024).


Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()

Output()



RateLimitError: Error code: 429 - {'error': {'message': "This fine-tune request has been rate-limited. Your organization has reached the maximum of 8 fine-tuning requests per day for the model 'gpt-4o-2024-08-06'.", 'type': 'invalid_request_error', 'param': None, 'code': 'daily_rate_limit_exceeded'}}