In [1]:
import fastrepl.repl as fastrepl

fastrepl.LLMCache.disable()
fastrepl.DEBUG(0)

In [2]:
from IPython.display import clear_output

In [3]:
from datasets import load_dataset

# We have dataset for meta-eval
dataset = load_dataset("yelp_review_full", split="test")
dataset = dataset.shuffle(seed=8)
dataset = dataset.select(range(30))
dataset = dataset.rename_column("text", "input")
dataset = dataset.map(
    lambda row: {"reference": row["label"] + 1, "input": row["input"]},
    remove_columns=["label"],
)

In [4]:
def print_metric(metric_name, predictions, references):
    metric = fastrepl.load_metric(metric_name)
    result = metric.compute(predictions=predictions, references=references)
    print(f"{metric_name}: {result[metric_name]}")

In [5]:
clear_output(wait=True)

# Let's define our first eval
eval = fastrepl.Evaluator(
    pipeline=[
        fastrepl.LLMGrader(
            model="gpt-3.5-turbo",
            number_from=1,
            number_to=5,
            context="You will get a input text from Yelp review. Please rate it's stars from 1 to 5.",
        )
    ]
)

result = fastrepl.LocalRunner(evaluator=eval, dataset=dataset).run()

print_metric("accuracy", result["prediction"], result["reference"])
print_metric("mse", result["prediction"], result["reference"])
print_metric("mae", result["prediction"], result["reference"])

result.to_pandas()

Output()

accuracy: 0.4
mse: 1.8
mae: 0.9333333333333333


Unnamed: 0,input,reference,prediction
0,Stayed at the Wm Penn down the street for a we...,5,4
1,We tried out the lunch specials and found them...,4,5
2,Should have known better than to eat in a plac...,1,2
3,This place has sure changed...and not for the ...,2,5
4,I've been bringing my son to the owner Michael...,5,4
5,The trip to the location takes two busses and ...,1,1
6,This restaurant was suggested to me by a frien...,4,4
7,This place is unique because you are sitting o...,3,3
8,"Whew, $10 for a whiskey ginger ale and $18 for...",3,2
9,"Loove me some Pei Wei! Boyfriend isn't a fan, ...",4,4
