In [1]:
import fastrepl

In [None]:
%env OPENAI_API_KEY=
%env NUM_THREADS=24

In [2]:
from IPython.display import clear_output

In [5]:
from datasets import load_dataset

dataset = load_dataset("yelp_review_full", split="test")
dataset = dataset.shuffle(seed=8)
dataset = dataset.select(range(50))
dataset = dataset.rename_column("text", "sample")
dataset = dataset.map(
    lambda row: {"reference": row["label"] + 1, "sample": row["sample"]},
    remove_columns=["label"],
)

In [7]:
labels = {
    "FIVE_STARS": "given review is likely to be 5 stars",
    "FOUR_STARS": "given review is likely to be 4 stars",
    "THREE_STARS": "given review is likely to be 3 stars",
    "TWO_STARS": "given review is likely to be 2 stars",
    "ONE_STAR": "given review is likely to be 1 star",
}


def label_to_score(example):
    label = example["prediction"]
    if label is None:
        print("None")
        example["prediction"] = 0
    else:
        example["prediction"] = {
            "FIVE_STARS": 5,
            "FOUR_STARS": 4,
            "THREE_STARS": 3,
            "TWO_STARS": 2,
            "ONE_STAR": 1,
        }[label]

    return example

In [8]:
def print_metric(metric_name, predictions, references):
    metric = fastrepl.load_metric(metric_name)
    result = metric.run(predictions=predictions, references=references)
    print(f"{metric_name}: {result[metric_name]}")

In [12]:
clear_output(wait=True)

eval1 = fastrepl.SimpleEvaluator(
    node=fastrepl.LLMClassificationHead(
        model="gpt-3.5-turbo",
        context="You will get a input text from Yelp review. Classify it using the labels.",
        labels=labels,
        position_debias_strategy="shuffle",
    )
)

result1 = fastrepl.local_runner(
    evaluator=eval1,
    dataset=dataset,
    output_feature="prediction",
).run()
result1 = result1.map(label_to_score)

print_metric("accuracy", result1["prediction"], result1["reference"])
print_metric("mse", result1["prediction"], result1["reference"])
print_metric("mae", result1["prediction"], result1["reference"])

result1.to_pandas()[:10]

Output()

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

accuracy: 0.42
mse: 1.7
mae: 0.9


Unnamed: 0,sample,reference,prediction
0,Stayed at the Wm Penn down the street for a we...,5,5
1,We tried out the lunch specials and found them...,4,5
2,Should have known better than to eat in a plac...,1,4
3,This place has sure changed...and not for the ...,2,3
4,I've been bringing my son to the owner Michael...,5,3
5,The trip to the location takes two busses and ...,1,3
6,This restaurant was suggested to me by a frien...,4,4
7,This place is unique because you are sitting o...,3,2
8,"Whew, $10 for a whiskey ginger ale and $18 for...",3,5
9,"Loove me some Pei Wei! Boyfriend isn't a fan, ...",4,4


In [13]:
clear_output(wait=True)

eval2 = fastrepl.SimpleEvaluator(
    node=fastrepl.LLMClassificationHead(
        model="gpt-3.5-turbo",
        context="You will get a input text from Yelp review. Classify it using the labels.",
        labels=labels,
        position_debias_strategy="shuffle",
        references=[
            ("this is the best", "FIVE_STARS"),
            ("this is the worst", "ONE_STAR"),
        ],
    )
)

result2 = fastrepl.local_runner(
    evaluator=eval2,
    dataset=dataset,
    output_feature="prediction",
).run()
result2 = result2.map(label_to_score)

print_metric("accuracy", result2["prediction"], result2["reference"])
print_metric("mse", result2["prediction"], result2["reference"])
print_metric("mae", result2["prediction"], result2["reference"])

result2.to_pandas()

Output()

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

accuracy: 0.28
mse: 2.76
mae: 1.24


Unnamed: 0,sample,reference,prediction
0,Stayed at the Wm Penn down the street for a we...,5,4
1,We tried out the lunch specials and found them...,4,3
2,Should have known better than to eat in a plac...,1,4
3,This place has sure changed...and not for the ...,2,1
4,I've been bringing my son to the owner Michael...,5,4
5,The trip to the location takes two busses and ...,1,1
6,This restaurant was suggested to me by a frien...,4,4
7,This place is unique because you are sitting o...,3,3
8,"Whew, $10 for a whiskey ginger ale and $18 for...",3,5
9,"Loove me some Pei Wei! Boyfriend isn't a fan, ...",4,2
