In [1]:
import os
import json
from typing import Optional
from finetuner import Client
from finetuner.storage import Storage
from finetuner.dataset import Dataset
from finetuner.eval import Eval
from dotenv import load_dotenv

load_dotenv()

class CustomStorage(Storage):
    directory: str

    def __init__(self, directory: str):
        super().__init__()
        self.directory = directory

    def append_to_dataset(self, dataset_name: str, input_kwargs: dict, completion: str):
        pass

    def list_datasets(self):
        for file in os.listdir(self.directory):
            if file.endswith(".json"):
                print(file)

    def get_dataset(self, dataset_name: str) -> Optional[Dataset]:
        file_path = os.path.join(self.directory, f"{dataset_name}.json")
        completions = []
        with open(file_path, "r") as f:
            try:
                content = json.load(f)
            except json.JSONDecodeError:
                print(f"The dataset {dataset_name} is empty or not a valid JSON file")
                return None

        for entry in content:
            fields = entry["fields"]
            messages = [
                {"role": "user", "content": fields["prompt_template"]},
            ]
            completions.append(
                {
                    "input_kwargs": {"model": fields["model"], "messages": messages},
                    "completion": fields["output"]
                }
            )
        return Dataset(completions=completions)

In [2]:
storage = CustomStorage(directory="custom_storage")

In [3]:
storage.list_datasets()

search_classifier.json


In [4]:
dataset = storage.get_dataset("search_classifier")

In [5]:
client = Client.for_openai()

class ComparisonEval(Eval):
    def compare(self, prediction, target):
        return prediction.strip() == target.strip()

eval = ComparisonEval(client=client)
eval.run("gpt-3.5-turbo-16k", dataset, temperature=0)

100%|██████████| 118/118 [00:42<00:00,  2.76it/s]


0.788135593220339

In [6]:
eval.run("gpt-4", dataset, temperature=0)

100%|██████████| 118/118 [01:36<00:00,  1.22it/s]


0.9915254237288136

In [7]:
client = Client.for_anyscale()
eval = ComparisonEval(client=client)
eval.run("mistralai/Mistral-7B-Instruct-v0.1", dataset, temperature=0)

  0%|          | 0/118 [00:00<?, ?it/s]

100%|██████████| 118/118 [01:19<00:00,  1.48it/s]


0.6779661016949152