In [2]:
import art
from dotenv import load_dotenv
import random

from art.utils.get_repo_root_path import get_repo_root_path
from art.local import LocalBackend
from rollout import rollout

load_dotenv()

random.seed(42)

# get path to root of repo
root_path = get_repo_root_path()

backend = LocalBackend()


OpenPipe client initialized


In [None]:
model = art.TrainableModel(
    name="002-notebook", project="tic-tac-toe-local", base_model="Qwen/Qwen2.5-1.5B-Instruct"
)
await model.register(backend)

for i in range(await model.get_step(), 100):
    train_groups = await art.gather_trajectory_groups(
        (
            art.TrajectoryGroup(
                rollout(model, i, is_validation=False) for _ in range(100)
            )
            for _ in range(1)
        ),
        pbar_desc="gather",
    )
    await model.delete_checkpoints()
    await model.train(train_groups, config=art.TrainConfig(learning_rate=1e-4))

In [3]:
import asyncio
import os

gpt_4o_mini = art.Model(
    name="gpt-4o-mini",
    project="tic-tac-toe-local",
    inference_model_name="gpt-4o-mini",
    inference_api_key=os.getenv("OPENAI_API_KEY"),
    inference_base_url="https://api.openai.com/v1",
)
await gpt_4o_mini.register(backend)

gpt_4o = art.Model(
    name="gpt-4o",
    project="tic-tac-toe-local",
    inference_model_name="gpt-4o",
    inference_api_key=os.getenv("OPENAI_API_KEY"),
    inference_base_url="https://api.openai.com/v1",
)
await gpt_4o.register(backend)

gpt_4_1 = art.Model(
    name="gpt-4.1",
    project="tic-tac-toe-local",
    inference_model_name="gpt-4.1",
    inference_api_key=os.getenv("OPENAI_API_KEY"),
    inference_base_url="https://api.openai.com/v1",
)
await gpt_4_1.register(backend)

async def log_comparison_model(comparison_model: art.Model):
    trajectories = await art.gather_trajectory_groups(
            (
                art.TrajectoryGroup(rollout(comparison_model, 0, is_validation=True) for _ in range(40))
            for _ in range(1)
        ),
        pbar_desc=f"gather {comparison_model.name}",
        max_exceptions=1,
    )

    await comparison_model.log(
        trajectories,
        split="val",
    )

promises = []

for comparison_model in [gpt_4o_mini, gpt_4o, gpt_4_1]:
    promises.append(log_comparison_model(comparison_model))

await asyncio.gather(*promises)

gather gpt-4o-mini:   0%|          | 0/40 [00:00<?, ?it/s]

gather gpt-4o:   0%|          | 0/40 [00:00<?, ?it/s]

gather gpt-4.1:   0%|          | 0/40 [00:00<?, ?it/s]

[None, None, None]

In [9]:
DESTROY_AFTER_RUN = False

if DESTROY_AFTER_RUN:
    await backend.down()