In [1]:
from src.expert_iteration import ExpertIteration, ExpertIterationConfig, Evaluator, Log
from src.evaluators import InspectEvaluator
from src.samplers import InspectSampler
from src.finetuners import OpenAIFinetuner
from src.inspect_helpers.tasks import boolq_dataset_vowel_expert_iter
from inspect_ai.log import list_eval_logs, read_eval_log
import asyncio


In [2]:
%autoawait asyncio


In [3]:
EXPERIMENT_NAME = "vowel_expert_iter_2"
LOG_DIR = f"logs/{EXPERIMENT_NAME}/"
BASE_MODEL_SUFFIX = "base"
DECLARATIVE_FT_SUFFIX = "PAA_declarative_ft"
# DECLARATIVE_FT_FILE = "data/declarative_ft_chat_models/QnA_augmentation_cd_n.jsonl"

MODELS = ["gpt-4o-mini-2024-07-18", "gpt-4o-2024-08-06"]

## Expert Iterations to run

GPT-4o \
GPT-4o-mini \
GPT-4o declarative finetuned \
GPT-4o-mini declarative finetuned 

We will run GPT-4o and GPT-4o-mini expert iteraions simultaneously while preparing the declarative finetuned models. Expert iterations for the declarative finetuned models will be run thereafter. 

In [4]:
def create_expert_iteration(model, model_suffix):
    base_model = model.split(":")[1] if ":" in model else model

    config = ExpertIterationConfig(
        max_iter=4,
        modelprovider="openai",
        model=model,
        log_dir=f"{LOG_DIR}/{base_model}/{model_suffix}",
        retries=1,
        suffix=f"{EXPERIMENT_NAME}_{model_suffix}",
    )
    evaluator = InspectEvaluator(
        tasks=boolq_dataset_vowel_expert_iter,
        limit=1000,
        max_connections=100,
        timeout=600,
    )
    sampler = InspectSampler(
        rank_column="scores.src/pattern_scorer.value",
        n=10,
        conditions=[("scores.src/hhh_scorer.value", "C")],
    )
    finetuner = OpenAIFinetuner(n_epochs=1, learning_rate_multiplier=2)

    return ExpertIteration(
        config=config, evaluator=evaluator, sampler=sampler, finetuner=finetuner
    )


async def get_expert_iter_tasks(models, model_suffix="base"):
    expert_iters = [create_expert_iteration(model, model_suffix) for model in models]
    tasks = [expert_iter.run() for expert_iter in expert_iters]
    return tasks


# Create tasks for expert iterations for base models
base_expert_iter_tasks = await get_expert_iter_tasks(MODELS, BASE_MODEL_SUFFIX)

## Prepare declarative finetuned models 

Comment out this code block if you already have the declarative finetuned models from previous experiments


In [5]:
# from src.utils import read_jsonl_file


# async def get_declarative_ft_tasks(
#     models=MODELS,
#     model_suffix=DECLARATIVE_FT_SUFFIX,
# ):
#     finetuning_tasks = []
#     for model in models:
#         finetuner = OpenAIFinetuner(n_epochs=1, learning_rate_multiplier=2)
#         finetuning_task = finetuner.run(
#             model=model,
#             input_log=read_jsonl_file(DECLARATIVE_FT_FILE),
#             log_dir=f"{LOG_DIR}/{model}/{model_suffix}/{model_suffix}",
#             suffix=model_suffix,
#         )
#         finetuning_tasks.append(finetuning_task)
#     return finetuning_tasks


# # Create tasks for declarative finetuning
# declarative_ft_tasks = await get_declarative_ft_tasks()

Run expert iteraions and prepare the declarative finetuned models simultaneously.

Get the declarative finetuned models from the previous step.

In [5]:
from openai import OpenAI
from src.utils import get_finetunes

declarative_ft_model_names = [
    job.fine_tuned_model
    for job in get_finetunes(OpenAI(), MODELS, DECLARATIVE_FT_SUFFIX)
]

declarative_ft_model_names

['ft:gpt-4o-mini-2024-07-18:personal:paa-declarative-ft:AEj0Vmt5',
 'ft:gpt-4o-2024-08-06:personal:paa-declarative-ft:AEjANTsH']

Run the expert iterations for the declarative finetuned models.

In [6]:
declarative_ft_expert_iter_tasks = await get_expert_iter_tasks(
    declarative_ft_model_names, DECLARATIVE_FT_SUFFIX
)

Output()

Output()

Output()

Output()

Output()

Output()

[None, None]

In [None]:
# If declarative_ft_task is not defined (cell above is commented out), it won't be included
tasks_to_await = base_expert_iter_tasks
if "declarative_ft_tasks" in locals():
    tasks_to_await.extend(declarative_ft_tasks)
    await asyncio.gather(*tasks_to_await)
    await asyncio.gather(*declarative_ft_expert_iter_tasks)
else:
    tasks_to_await.extend(declarative_ft_expert_iter_tasks)
    await asyncio.gather(*tasks_to_await)
