In [1]:
import dspy
from dotenv import load_dotenv

load_dotenv()


True

In [2]:
import phoenix as px
phoenix_session = px.launch_app()

from openinference.instrumentation.dspy import DSPyInstrumentor
from opentelemetry import trace as trace_api
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk import trace as trace_sdk
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

endpoint = "http://127.0.0.1:6006/v1/traces"
resource = Resource(attributes={})
tracer_provider = trace_sdk.TracerProvider(resource=resource)
span_otlp_exporter = OTLPSpanExporter(endpoint=endpoint)
tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter=span_otlp_exporter))

trace_api.set_tracer_provider(tracer_provider=tracer_provider)
DSPyInstrumentor().instrument()

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📺 To view the Phoenix app in a notebook, run `px.active_session().view()`
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [3]:
from datasets import load_dataset

dataset = load_dataset("yqzheng/semeval2014_restaurants")
import pandas as pd
train = pd.DataFrame(dataset['train'])
test = pd.DataFrame(dataset['test'])
train.shape, test.shape

((3608, 5), (1120, 5))

In [4]:
def merge_jsons(json_list):
    result = {}
    for j in json_list:
        result.update(j)
    return result

def create_json(df):
    df['json'] = df.apply(lambda row: {row['aspect']: row['label']} , axis=1)
    return df.groupby('text')['json'].agg(merge_jsons).reset_index()

In [5]:
train_json = create_json(train)
test_json = create_json(test)
train_json.head(10)

Unnamed: 0,text,json
0,"$160 for 2 filets, 2 sides, an appetizer and d...","{'filets': 0, 'sides': 0, 'appetizer': 0, 'dri..."
1,$20 for all you can eat sushi cannot be beaten.,{'sushi': 0}
2,$20 gets you unlimited sushi of a very high qu...,"{'sushi': 1, 'sushi places': 1, 'quality': 1}"
3,"$6 and there is much tasty food, all of it fre...",{'food': 1}
4,"($200 for 2 glasses of champagne, not too expe...","{'glasses of champagne': -1, 'bottle of wine':..."
5,(Always ask the bartender for the SEASONAL bee...,"{'SEASONAL beer': 1, 'bartender': 0}"
6,(and I have eaten my share) Which impresses me...,{'serve': 1}
7,"(food was delivered by a busboy, not waiter) W...","{'food': 0, 'busboy': -1, 'waiter': -1, 'chees..."
8,- the bread at the beginning is super tasty an...,"{'bread': 1, 'pizza': 1, 'margarite pizza with..."
9,20 minutes for our reservation but it gave us ...,"{'reservation': -1, 'cocktails': 1, 'surroundi..."


In [6]:
train_dspy = [dspy.Example(review=row['text'], aspects_with_label=row['json']).with_inputs('review') for _, row in train_json.iterrows()]
test_dspy = [dspy.Example(review=row['text'], aspects_with_label=row['json']).with_inputs('review') for _, row in test_json.iterrows()]

In [7]:
from textwrap import dedent

class Review2Aspects(dspy.Signature):
    """
    Identify aspects and their sentiments from a customer review. The aspects must be words or phrases in the review.
    The response should be a Python dictionary, where each key is an aspect and the value is a sentiment label.
    A label of 1 indicates positive sentiment, 0 indicates neutral sentiment, and -1 indicates negative sentiment.
    """

    review: str = dspy.InputField(desc="a customer review")
    aspects_with_label: dict = dspy.OutputField(format=dict, desc=dedent("""
        a single Python dictionary, where each key is an aspect and the value is the label,
        with label 1 indicating positive sentiment, 0 indicating neutral sentiment, and -1
        indicating negative sentiment.
        """))

In [8]:
class ABSAPipeline(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate_aspects = dspy.TypedChainOfThought(Review2Aspects)

    def forward(self, review):
        pred = self.generate_aspects(review=review)
        return dspy.Prediction(aspects_with_label=pred.aspects_with_label)

In [10]:
def validate_venn(example, pred, trace=None):
    try:
        total_count = len(merge_jsons([example.aspects_with_label, pred.aspects_with_label]).keys())
    except:
        print([example.aspects_with_label, pred.aspects_with_label])
    accurate_count = sum(1 for k, v in example.aspects_with_label.items() if pred.aspects_with_label.get(k) == v)
    if trace is None:
        return float(accurate_count) / total_count
    else:
        return accurate_count > total_count * 0.8

In [11]:
from dspy.teleprompt import BootstrapFewShotWithRandomSearch

gpt = dspy.OpenAI(model="gpt-3.5-turbo", max_tokens=4096)
with dspy.settings.context(lm=gpt):
    uncompiled_pipeline = ABSAPipeline()
    pred = uncompiled_pipeline(test_dspy[0].review)
    optimizer = BootstrapFewShotWithRandomSearch(metric=validate_venn)
    compiled_pipeline = optimizer.compile(ABSAPipeline(), teacher=ABSAPipeline(), trainset=train_dspy[:30])
    pred = compiled_pipeline(test_dspy[0].review)
print(pred.aspects_with_label)

Going to sample between 1 and 4 traces per predictor.
Will attempt to train 16 candidate sets.


Average Metric: 5.483333333333333 / 30  (18.3): 100%|█| 30/30 [00:20<00:00,  1.45i
  df = df.applymap(truncate_cell)


Average Metric: 5.483333333333333 / 30  (18.3%)
Score: 18.28 for set: [0]
New best score: 18.28 for seed -3
Scores so far: [18.28]
Best score: 18.28


Average Metric: 21.75 / 30  (72.5): 100%|█████████| 30/30 [00:07<00:00,  4.00it/s]


Average Metric: 21.75 / 30  (72.5%)
Score: 72.5 for set: [16]
New best score: 72.5 for seed -2
Scores so far: [18.28, 72.5]
Best score: 72.5


 53%|████████████████████████                     | 16/30 [00:13<00:11,  1.22it/s]


Bootstrapped 4 full traces after 17 examples in round 0.


Average Metric: 20.644444444444446 / 30  (68.8): 100%|█| 30/30 [00:08<00:00,  3.61


Average Metric: 20.644444444444446 / 30  (68.8%)
Score: 68.81 for set: [16]
Scores so far: [18.28, 72.5, 68.81]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.8555555555555556
Average of max per entry across top 3 scores: 0.8555555555555556
Average of max per entry across top 5 scores: 0.8555555555555556
Average of max per entry across top 8 scores: 0.8555555555555556
Average of max per entry across top 9999 scores: 0.8555555555555556


 90%|████████████████████████████████████████▌    | 27/30 [00:37<00:04,  1.40s/it]


Bootstrapped 4 full traces after 28 examples in round 0.


Average Metric: 2.0 / 3  (66.7):  10%|█▎           | 3/30 [00:01<00:15,  1.70it/s]

Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 4.083333333333334 / 8  (51.0):  27%|▎| 8/30 [00:07<00:20,  1.06it/

Backing off 0.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.3 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 5.416666666666667 / 10  (54.2):  33%|▎| 10/30 [00:09<00:21,  1.06s

Backing off 1.9 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 3.0 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 2.1 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.150000000000002 / 15  (61.0):  50%|▌| 15/30 [00:14<00:13,  1.10i

Backing off 2.4 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.650000000000002 / 16  (60.3):  53%|▌| 16/30 [00:15<00:12,  1.16i

Backing off 0.3 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.650000000000002 / 17  (62.6):  57%|▌| 17/30 [00:15<00:10,  1.29

Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 12.650000000000002 / 21  (60.2):  70%|▋| 21/30 [00:19<00:07,  1.15

Backing off 3.0 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.4 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 13.650000000000002 / 22  (62.0):  73%|▋| 22/30 [00:20<00:07,  1.13

Backing off 1.2 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 14.983333333333336 / 25  (59.9):  83%|▊| 25/30 [00:23<00:04,  1.01

Backing off 11.2 seconds after 5 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 15.983333333333336 / 27  (59.2):  90%|▉| 27/30 [00:27<00:03,  1.28

Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 18.427777777777777 / 30  (61.4): 100%|█| 30/30 [00:36<00:00,  1.22
  df = df.applymap(truncate_cell)


Average Metric: 18.427777777777777 / 30  (61.4%)
Score: 61.43 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.8555555555555556
Average of max per entry across top 3 scores: 0.95
Average of max per entry across top 5 scores: 0.95
Average of max per entry across top 8 scores: 0.95
Average of max per entry across top 9999 scores: 0.95


 23%|██████████▋                                   | 7/30 [00:08<00:28,  1.26s/it]


Bootstrapped 2 full traces after 8 examples in round 0.


Average Metric: 2.333333333333333 / 3  (77.8):  10%| | 3/30 [00:02<00:19,  1.42it/

Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 2.583333333333333 / 4  (64.6):  13%|▏| 4/30 [00:03<00:26,  1.04s/i

Backing off 1.7 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 4.5 / 7  (64.3):  23%|███          | 7/30 [00:05<00:16,  1.37it/s]

Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.2 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 6.583333333333333 / 11  (59.8):  37%|▎| 11/30 [00:09<00:13,  1.42i

Backing off 2.4 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.2 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 7.583333333333333 / 14  (54.2):  47%|▍| 14/30 [00:13<00:18,  1.13s

Backing off 0.6 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.3 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.083333333333332 / 18  (56.0):  60%|▌| 18/30 [00:16<00:10,  1.16

Backing off 3.7 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 8.0 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 11.083333333333332 / 21  (52.8):  70%|▋| 21/30 [00:20<00:09,  1.08

Backing off 0.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 13.583333333333332 / 24  (56.6):  80%|▊| 24/30 [00:22<00:05,  1.07

Backing off 0.5 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.1 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.916666666666664 / 30  (56.4): 100%|█| 30/30 [00:30<00:00,  1.01


Average Metric: 16.916666666666664 / 30  (56.4%)
Score: 56.39 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.8555555555555556
Average of max per entry across top 3 scores: 0.95
Average of max per entry across top 5 scores: 0.9833333333333333
Average of max per entry across top 8 scores: 0.9833333333333333
Average of max per entry across top 9999 scores: 0.9833333333333333


 13%|██████▏                                       | 4/30 [00:05<00:36,  1.39s/it]


Bootstrapped 1 full traces after 5 examples in round 0.


Average Metric: 3.0 / 4  (75.0):  13%|█▋           | 4/30 [00:03<00:22,  1.14it/s]

Backing off 0.3 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 4.5 / 8  (56.2):  27%|███▍         | 8/30 [00:07<00:17,  1.23it/s]

Backing off 0.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.6 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.6 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 8.866666666666665 / 15  (59.1):  50%|▌| 15/30 [00:12<00:11,  1.30i

Backing off 2.7 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 8.866666666666665 / 16  (55.4):  53%|▌| 16/30 [00:13<00:10,  1.36i

Backing off 1.8 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.366666666666665 / 18  (57.6):  60%|▌| 18/30 [00:14<00:07,  1.56

Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 3.7 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 14.366666666666665 / 23  (62.5):  77%|▊| 23/30 [00:17<00:04,  1.64

Backing off 2.1 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.6 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.50952380952381 / 27  (61.1):  90%|▉| 27/30 [00:21<00:02,  1.11i

Backing off 5.2 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.50952380952381 / 30  (55.0): 100%|█| 30/30 [00:28<00:00,  1.06i
  df = df.applymap(truncate_cell)


Average Metric: 16.50952380952381 / 30  (55.0%)
Score: 55.03 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.8555555555555556
Average of max per entry across top 3 scores: 0.95
Average of max per entry across top 5 scores: 1.0
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 10%|████▌                                         | 3/30 [00:02<00:25,  1.07it/s]


Bootstrapped 2 full traces after 4 examples in round 0.


Average Metric: 2.833333333333333 / 4  (70.8):  13%|▏| 4/30 [00:02<00:11,  2.17it/

Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 3.833333333333333 / 5  (76.7):  17%|▏| 5/30 [00:03<00:20,  1.20it/

Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 4.833333333333333 / 6  (80.6):  20%|▏| 6/30 [00:05<00:26,  1.10s/i

Backing off 0.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.8 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 5.458333333333333 / 7  (78.0):  23%|▏| 7/30 [00:05<00:22,  1.00it/

Backing off 0.8 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 6.458333333333333 / 9  (71.8):  30%|▎| 9/30 [00:07<00:21,  1.02s/i

Backing off 0.3 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 7.458333333333333 / 10  (74.6):  33%|▎| 10/30 [00:09<00:24,  1.21s

Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {'max_tokens': 2048, 'n': 1, 'temperature': 0.0}


Average Metric: 8.458333333333332 / 11  (76.9):  37%|▎| 11/30 [00:11<00:25,  1.34s

Backing off 1.5 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {'max_tokens': 2048, 'n': 1, 'temperature': 0.0}
Backing off 0.4 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.058333333333332 / 12  (75.5):  40%|▍| 12/30 [00:13<00:31,  1.76s

Backing off 1.3 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.391666666666666 / 13  (72.2):  43%|▍| 13/30 [00:14<00:22,  1.35s

Backing off 6.4 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.725 / 15  (71.5):  50%|████    | 15/30 [00:16<00:16,  1.09s/it]

Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 11.225 / 17  (66.0):  57%|████▌   | 17/30 [00:18<00:12,  1.00it/s]

Backing off 0.2 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 12.225 / 19  (64.3):  63%|█████   | 19/30 [00:21<00:14,  1.31s/it]

Backing off 1.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 3.0 seconds after 5 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 13.291666666666666 / 21  (63.3):  70%|▋| 21/30 [00:23<00:10,  1.19

Backing off 0.3 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 3.2 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.958333333333332 / 25  (67.8):  83%|▊| 25/30 [00:28<00:04,  1.06

Backing off 15.1 seconds after 5 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 19.791666666666664 / 30  (66.0): 100%|█| 30/30 [00:46<00:00,  1.54
  df = df.applymap(truncate_cell)


Average Metric: 19.791666666666664 / 30  (66.0%)
Score: 65.97 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.8555555555555556
Average of max per entry across top 3 scores: 0.9388888888888889
Average of max per entry across top 5 scores: 0.9888888888888889
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 27%|████████████▎                                 | 8/30 [00:08<00:24,  1.10s/it]


Bootstrapped 2 full traces after 9 examples in round 0.


Average Metric: 6.749999999999999 / 10  (67.5):  33%|▎| 10/30 [00:02<00:05,  3.40i

Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 7.749999999999999 / 11  (70.5):  37%|▎| 11/30 [00:03<00:04,  3.89i

Backing off 0.8 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.75 / 13  (75.0):  40%|████      | 12/30 [00:03<00:07,  2.37it/s]

Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.3 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 13.0 / 17  (76.5):  57%|█████▋    | 17/30 [00:06<00:06,  1.94it/s]

Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.0 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 14.0 / 18  (77.8):  60%|██████    | 18/30 [00:07<00:08,  1.39it/s]

Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 14.0 / 20  (70.0):  67%|██████▋   | 20/30 [00:09<00:07,  1.33it/s]

Backing off 0.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {'max_tokens': 2048, 'n': 1, 'temperature': 0.0}


Average Metric: 14.5 / 21  (69.0):  70%|███████   | 21/30 [00:10<00:06,  1.35it/s]

Backing off 0.9 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.7 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 3.3 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.166666666666664 / 23  (70.3):  77%|▊| 23/30 [00:11<00:05,  1.32

Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 18.166666666666664 / 25  (72.7):  83%|▊| 25/30 [00:13<00:03,  1.31

Backing off 1.8 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 2.2 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 19.566666666666663 / 27  (72.5):  90%|▉| 27/30 [00:15<00:03,  1.02

Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 21.066666666666663 / 30  (70.2): 100%|█| 30/30 [00:19<00:00,  1.52


Average Metric: 21.066666666666663 / 30  (70.2%)
Score: 70.22 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.9111111111111111
Average of max per entry across top 3 scores: 0.9277777777777777
Average of max per entry across top 5 scores: 0.9722222222222222
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 20%|█████████▏                                    | 6/30 [00:06<00:27,  1.16s/it]


Bootstrapped 3 full traces after 7 examples in round 0.


Average Metric: 2.0 / 2  (100.0):   7%|▊           | 2/30 [00:01<00:19,  1.44it/s]

Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 5.000000000000001 / 6  (83.3):  20%|▏| 6/30 [00:05<00:21,  1.11it/

Backing off 0.8 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.6 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 6.333333333333334 / 8  (79.2):  27%|▎| 8/30 [00:07<00:21,  1.01it/

Backing off 1.3 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 7.333333333333334 / 9  (81.5):  30%|▎| 9/30 [00:08<00:19,  1.09it/

Backing off 1.1 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.3 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 2.0 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 2.2 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.483333333333334 / 12  (79.0):  40%|▍| 12/30 [00:12<00:17,  1.01i

Backing off 8.0 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.816666666666668 / 15  (72.1):  50%|▌| 15/30 [00:15<00:13,  1.10

Backing off 1.8 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 12.316666666666668 / 17  (72.5):  57%|▌| 17/30 [00:17<00:12,  1.00

Backing off 0.3 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.0 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 3.8 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 14.316666666666668 / 21  (68.2):  70%|▋| 21/30 [00:21<00:09,  1.01

Backing off 2.8 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.56666666666667 / 24  (69.0):  80%|▊| 24/30 [00:24<00:05,  1.05i

Backing off 14.3 seconds after 5 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 17.56666666666667 / 25  (70.3):  83%|▊| 25/30 [00:25<00:05,  1.01s

Backing off 1.7 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 18.56666666666667 / 26  (71.4):  87%|▊| 26/30 [00:26<00:03,  1.06i

Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 20.56666666666667 / 30  (68.6): 100%|█| 30/30 [00:40<00:00,  1.36s
  df = df.applymap(truncate_cell)


Average Metric: 20.56666666666667 / 30  (68.6%)
Score: 68.56 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22, 68.56]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.9111111111111111
Average of max per entry across top 3 scores: 0.9277777777777777
Average of max per entry across top 5 scores: 0.9888888888888889
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 13%|██████▏                                       | 4/30 [00:05<00:38,  1.49s/it]


Bootstrapped 1 full traces after 5 examples in round 0.


Average Metric: 3.566666666666667 / 6  (59.4):  20%|▏| 6/30 [00:03<00:12,  1.94it/

Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 5.233333333333333 / 8  (65.4):  23%|▏| 7/30 [00:03<00:09,  2.53it/

Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 5.566666666666666 / 9  (61.9):  30%|▎| 9/30 [00:04<00:09,  2.23it/

Backing off 1.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 7.566666666666666 / 12  (63.1):  40%|▍| 12/30 [00:05<00:09,  1.88i

Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.65 / 15  (64.3):  50%|█████     | 15/30 [00:07<00:10,  1.47it/s]

Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.4 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.872222222222222 / 17  (64.0):  57%|▌| 17/30 [00:09<00:09,  1.33

Backing off 0.4 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 11.872222222222222 / 18  (66.0):  60%|▌| 18/30 [00:10<00:10,  1.14

Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 12.622222222222222 / 19  (66.4):  63%|▋| 19/30 [00:12<00:11,  1.03

Backing off 1.2 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.4 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.3 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 15.122222222222222 / 22  (68.7):  73%|▋| 22/30 [00:14<00:06,  1.21

Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.2 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.3 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.12222222222222 / 24  (67.2):  80%|▊| 24/30 [00:16<00:05,  1.18i

Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.6 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 17.62222222222222 / 26  (67.8):  87%|▊| 26/30 [00:18<00:03,  1.23i

Backing off 1.1 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 19.37222222222222 / 28  (69.2):  93%|▉| 28/30 [00:20<00:02,  1.10s

Backing off 0.3 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 21.12222222222222 / 30  (70.4): 100%|█| 30/30 [00:23<00:00,  1.25i
  df = df.applymap(truncate_cell)


Average Metric: 21.12222222222222 / 30  (70.4%)
Score: 70.41 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22, 68.56, 70.41]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.9161111111111111
Average of max per entry across top 3 scores: 0.975
Average of max per entry across top 5 scores: 0.9916666666666667
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 23%|██████████▋                                   | 7/30 [00:08<00:29,  1.26s/it]


Bootstrapped 3 full traces after 8 examples in round 0.


Average Metric: 1.3333333333333333 / 2  (66.7):   7%| | 2/30 [00:02<00:28,  1.03s/

Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 4.666666666666666 / 7  (66.7):  23%|▏| 7/30 [00:07<00:22,  1.02it/

Backing off 1.3 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.2 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 5.916666666666666 / 9  (65.7):  30%|▎| 9/30 [00:09<00:19,  1.07it/

Backing off 1.2 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 7.716666666666667 / 12  (64.3):  40%|▍| 12/30 [00:13<00:21,  1.19s

Backing off 0.0 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 3.7 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 7.916666666666667 / 13  (60.9):  43%|▍| 13/30 [00:14<00:18,  1.11s

Backing off 0.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.25 / 15  (61.7):  50%|█████     | 15/30 [00:16<00:14,  1.02it/s]

Backing off 1.6 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.25 / 17  (54.4):  57%|█████▋    | 17/30 [00:17<00:10,  1.18it/s]

Backing off 0.8 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.25 / 18  (56.9):  60%|█████▍   | 18/30 [00:18<00:12,  1.04s/it]

Backing off 6.5 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 12.25 / 20  (61.2):  67%|██████   | 20/30 [00:20<00:09,  1.10it/s]

Backing off 0.3 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 14.75 / 23  (64.1):  77%|██████▉  | 23/30 [00:23<00:06,  1.13it/s]

Backing off 1.7 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.0 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 14.75 / 24  (61.5):  80%|███████▏ | 24/30 [00:24<00:05,  1.08it/s]

Backing off 7.1 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.25 / 26  (62.5):  87%|███████▊ | 26/30 [00:27<00:05,  1.41s/it]

Backing off 0.9 seconds after 5 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.75 / 27  (62.0):  90%|████████ | 27/30 [00:29<00:04,  1.45s/it]

Backing off 12.3 seconds after 5 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 18.75 / 30  (62.5): 100%|█████████| 30/30 [00:44<00:00,  1.47s/it]


Average Metric: 18.75 / 30  (62.5%)
Score: 62.5 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22, 68.56, 70.41, 62.5]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.9161111111111111
Average of max per entry across top 3 scores: 0.975
Average of max per entry across top 5 scores: 0.9916666666666667
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 37%|████████████████▌                            | 11/30 [00:22<00:38,  2.02s/it]


Bootstrapped 2 full traces after 12 examples in round 0.


Average Metric: 11.952380952380953 / 23  (52.0):  77%|▊| 23/30 [00:04<00:01,  4.32

Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 18.452380952380953 / 30  (61.5): 100%|█| 30/30 [00:09<00:00,  3.07
  df = df.applymap(truncate_cell)


Average Metric: 18.452380952380953 / 30  (61.5%)
Score: 61.51 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22, 68.56, 70.41, 62.5, 61.51]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.9161111111111111
Average of max per entry across top 3 scores: 0.975
Average of max per entry across top 5 scores: 0.9916666666666667
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 33%|███████████████                              | 10/30 [00:11<00:23,  1.16s/it]


Bootstrapped 4 full traces after 11 examples in round 0.


Average Metric: 3.777777777777778 / 7  (54.0):  23%|▏| 7/30 [00:04<00:16,  1.38it/

Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 4.027777777777779 / 8  (50.3):  27%|▎| 8/30 [00:05<00:18,  1.21it/

Backing off 0.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.2 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 6.361111111111112 / 11  (57.8):  37%|▎| 11/30 [00:09<00:18,  1.01i

Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 7.861111111111112 / 13  (60.5):  43%|▍| 13/30 [00:11<00:17,  1.04s

Backing off 1.8 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 2.0 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.0 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.86111111111111 / 15  (65.7):  50%|▌| 15/30 [00:12<00:13,  1.08it

Backing off 1.7 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 11.36111111111111 / 17  (66.8):  57%|▌| 17/30 [00:15<00:15,  1.21s

Backing off 4.0 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 7.3 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 12.027777777777777 / 19  (63.3):  63%|▋| 19/30 [00:18<00:12,  1.12

Backing off 2.8 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 14.86111111111111 / 24  (61.9):  80%|▊| 24/30 [00:22<00:04,  1.31i

Backing off 1.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 15.86111111111111 / 25  (63.4):  83%|▊| 25/30 [00:23<00:03,  1.34i

Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.7 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 17.427777777777777 / 28  (62.2):  93%|▉| 28/30 [00:27<00:02,  1.07

Backing off 3.2 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}Backing off 0.7 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}



Average Metric: 19.427777777777777 / 30  (64.8): 100%|█| 30/30 [00:30<00:00,  1.03
  df = df.applymap(truncate_cell)


Average Metric: 19.427777777777777 / 30  (64.8%)
Score: 64.76 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22, 68.56, 70.41, 62.5, 61.51, 64.76]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.9161111111111111
Average of max per entry across top 3 scores: 0.975
Average of max per entry across top 5 scores: 0.9916666666666667
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 20%|█████████▏                                    | 6/30 [00:07<00:31,  1.29s/it]


Bootstrapped 1 full traces after 7 examples in round 0.


Average Metric: 1.5555555555555554 / 4  (38.9):  13%|▏| 4/30 [00:03<00:18,  1.40it

Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 5.805555555555555 / 9  (64.5):  30%|▎| 9/30 [00:05<00:14,  1.45it/

Backing off 0.3 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 6.555555555555555 / 10  (65.6):  33%|▎| 10/30 [00:06<00:15,  1.29i

Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.7 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 8.555555555555555 / 13  (65.8):  43%|▍| 13/30 [00:09<00:14,  1.15i

Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.222222222222221 / 14  (65.9):  47%|▍| 14/30 [00:11<00:16,  1.05s

Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.3 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 3.0 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.222222222222221 / 15  (68.1):  50%|▌| 15/30 [00:11<00:14,  1.05

Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {'max_tokens': 2048, 'n': 1, 'temperature': 0.0}


Average Metric: 11.055555555555555 / 17  (65.0):  57%|▌| 17/30 [00:13<00:12,  1.05

Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {'max_tokens': 2048, 'n': 1, 'temperature': 0.0}
Backing off 1.8 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 2.0 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 12.38888888888889 / 19  (65.2):  63%|▋| 19/30 [00:16<00:13,  1.21s

Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 12.38888888888889 / 20  (61.9):  67%|▋| 20/30 [00:17<00:10,  1.07s

Backing off 1.4 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {'max_tokens': 2048, 'n': 1, 'temperature': 0.0}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {'max_tokens': 2048, 'n': 1, 'temperature': 0.0}


Average Metric: 15.13888888888889 / 23  (65.8):  77%|▊| 23/30 [00:20<00:06,  1.09i

Backing off 1.6 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 15.63888888888889 / 24  (65.2):  80%|▊| 24/30 [00:21<00:05,  1.01i

Backing off 0.3 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {'max_tokens': 2048, 'n': 1, 'temperature': 0.0}


Average Metric: 17.88888888888889 / 28  (63.9):  93%|▉| 28/30 [00:26<00:02,  1.10s

Backing off 1.5 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 19.88888888888889 / 30  (66.3): 100%|█| 30/30 [00:29<00:00,  1.03i


Average Metric: 19.88888888888889 / 30  (66.3%)
Score: 66.3 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22, 68.56, 70.41, 62.5, 61.51, 64.76, 66.3]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.9161111111111111
Average of max per entry across top 3 scores: 0.975
Average of max per entry across top 5 scores: 0.9916666666666667
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 63%|████████████████████████████▌                | 19/30 [00:23<00:13,  1.24s/it]


Bootstrapped 4 full traces after 20 examples in round 0.


Average Metric: 0.9444444444444444 / 4  (23.6):  13%|▏| 4/30 [00:05<00:36,  1.40s/

Backing off 0.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 3.4444444444444446 / 7  (49.2):  23%|▏| 7/30 [00:08<00:23,  1.02s/

Backing off 0.0 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 6.194444444444445 / 10  (61.9):  33%|▎| 10/30 [00:11<00:22,  1.14s

Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 8.194444444444445 / 12  (68.3):  40%|▍| 12/30 [00:14<00:20,  1.16s

Backing off 2.0 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.0 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 8.394444444444444 / 13  (64.6):  43%|▍| 13/30 [00:15<00:19,  1.14s

Backing off 0.9 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.5 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 8.727777777777778 / 14  (62.3):  47%|▍| 14/30 [00:16<00:19,  1.23s

Backing off 3.5 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.727777777777778 / 16  (67.0):  53%|▌| 16/30 [00:18<00:16,  1.16

Backing off 1.1 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 11.727777777777778 / 17  (69.0):  57%|▌| 17/30 [00:20<00:16,  1.26

Backing off 1.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 12.727777777777778 / 18  (70.7):  60%|▌| 18/30 [00:21<00:15,  1.27

Backing off 1.7 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 15.227777777777778 / 22  (69.2):  73%|▋| 22/30 [00:25<00:07,  1.04

Backing off 2.1 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.227777777777778 / 24  (67.6):  80%|▊| 24/30 [00:27<00:05,  1.10

Backing off 1.7 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 17.227777777777778 / 25  (68.9):  83%|▊| 25/30 [00:28<00:04,  1.06

Backing off 1.4 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.2 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 18.477777777777778 / 28  (66.0):  93%|▉| 28/30 [00:31<00:02,  1.11

Backing off 3.1 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 18.81111111111111 / 30  (62.7): 100%|█| 30/30 [00:36<00:00,  1.22s
  df = df.applymap(truncate_cell)


Average Metric: 18.81111111111111 / 30  (62.7%)
Score: 62.7 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22, 68.56, 70.41, 62.5, 61.51, 64.76, 66.3, 62.7]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.9161111111111111
Average of max per entry across top 3 scores: 0.975
Average of max per entry across top 5 scores: 0.9916666666666667
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 47%|█████████████████████                        | 14/30 [00:17<00:19,  1.24s/it]


Bootstrapped 4 full traces after 15 examples in round 0.


Average Metric: 3.0 / 3  (100.0):  10%|█▏          | 3/30 [00:03<00:30,  1.15s/it]

Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 5.0 / 6  (83.3):  20%|██▌          | 6/30 [00:06<00:26,  1.11s/it]

Backing off 1.7 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 5.333333333333333 / 7  (76.2):  23%|▏| 7/30 [00:07<00:23,  1.02s/i

Backing off 1.2 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}Backing off 1.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}

Backing off 1.4 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 8.916666666666666 / 12  (74.3):  40%|▍| 12/30 [00:12<00:16,  1.08i

Backing off 1.4 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}Backing off 1.4 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}

Backing off 1.7 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.75 / 16  (67.2):  53%|████▊    | 16/30 [00:15<00:12,  1.14it/s]

Backing off 1.2 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 2.5 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 7.0 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 12.916666666666666 / 21  (61.5):  70%|▋| 21/30 [00:20<00:09,  1.03

Backing off 1.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 13.916666666666666 / 22  (63.3):  73%|▋| 22/30 [00:21<00:07,  1.05

Backing off 1.6 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 13.916666666666666 / 23  (60.5):  77%|▊| 23/30 [00:22<00:06,  1.10

Backing off 15.9 seconds after 5 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 14.416666666666666 / 24  (60.1):  80%|▊| 24/30 [00:23<00:05,  1.04

Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.166666666666664 / 26  (62.2):  87%|▊| 26/30 [00:28<00:06,  1.68

Backing off 0.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 18.166666666666664 / 30  (60.6): 100%|█| 30/30 [00:40<00:00,  1.35
  df = df.applymap(truncate_cell)


Average Metric: 18.166666666666664 / 30  (60.6%)
Score: 60.56 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22, 68.56, 70.41, 62.5, 61.51, 64.76, 66.3, 62.7, 60.56]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.9161111111111111
Average of max per entry across top 3 scores: 0.975
Average of max per entry across top 5 scores: 0.9916666666666667
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 73%|█████████████████████████████████            | 22/30 [00:29<00:10,  1.34s/it]


Bootstrapped 3 full traces after 23 examples in round 0.


Average Metric: 4.466666666666667 / 9  (49.6):  30%|▎| 9/30 [00:04<00:09,  2.18it/

Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 4.8 / 10  (48.0):  33%|███▋       | 10/30 [00:04<00:08,  2.40it/s]

Backing off 0.3 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 8.883333333333335 / 15  (59.2):  50%|▌| 15/30 [00:07<00:08,  1.80i

Backing off 1.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 8.883333333333335 / 16  (55.5):  53%|▌| 16/30 [00:08<00:08,  1.71i

Backing off 1.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 8.883333333333335 / 18  (49.4):  60%|▌| 18/30 [00:09<00:07,  1.55i

Backing off 3.7 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 11.383333333333335 / 22  (51.7):  73%|▋| 22/30 [00:13<00:06,  1.27

Backing off 3.0 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.9 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 13.383333333333335 / 24  (55.8):  80%|▊| 24/30 [00:15<00:04,  1.21

Backing off 0.9 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 14.383333333333335 / 25  (57.5):  83%|▊| 25/30 [00:16<00:04,  1.09

Backing off 0.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 2.2 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 14.883333333333335 / 26  (57.2):  87%|▊| 26/30 [00:18<00:04,  1.21

Backing off 2.5 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 10.1 seconds after 5 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.55 / 30  (55.2): 100%|█████████| 30/30 [00:29<00:00,  1.00it/s]
  df = df.applymap(truncate_cell)


Average Metric: 16.55 / 30  (55.2%)
Score: 55.17 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22, 68.56, 70.41, 62.5, 61.51, 64.76, 66.3, 62.7, 60.56, 55.17]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.9161111111111111
Average of max per entry across top 3 scores: 0.975
Average of max per entry across top 5 scores: 0.9916666666666667
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


  3%|█▌                                            | 1/30 [00:00<00:25,  1.15it/s]


Bootstrapped 1 full traces after 2 examples in round 0.


Average Metric: 4.344444444444445 / 9  (48.3):  30%|▎| 9/30 [00:03<00:07,  2.82it/

Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 5.411111111111112 / 11  (49.2):  33%|▎| 10/30 [00:05<00:11,  1.71i

Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 7.411111111111112 / 14  (52.9):  47%|▍| 14/30 [00:08<00:11,  1.42i

Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.9 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 7.744444444444445 / 15  (51.6):  50%|▌| 15/30 [00:09<00:13,  1.13i

Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.8 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.744444444444445 / 18  (59.7):  60%|▌| 18/30 [00:11<00:10,  1.16

Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}Backing off 1.6 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}

Backing off 0.8 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {'max_tokens': 2048, 'n': 1, 'temperature': 0.0}


Average Metric: 10.744444444444445 / 19  (56.5):  63%|▋| 19/30 [00:13<00:10,  1.05

Backing off 1.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 12.244444444444445 / 21  (58.3):  70%|▋| 21/30 [00:14<00:08,  1.08

Backing off 0.2 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 13.244444444444445 / 22  (60.2):  73%|▋| 22/30 [00:15<00:06,  1.23

Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {'max_tokens': 2048, 'n': 1, 'temperature': 0.0}


Average Metric: 15.244444444444445 / 24  (63.5):  80%|▊| 24/30 [00:17<00:05,  1.02

Backing off 0.4 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.6 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 3.6 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 17.744444444444447 / 27  (65.7):  90%|▉| 27/30 [00:20<00:02,  1.10

Backing off 1.8 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 19.744444444444447 / 30  (65.8): 100%|█| 30/30 [00:25<00:00,  1.18
  df = df.applymap(truncate_cell)


Average Metric: 19.744444444444447 / 30  (65.8%)
Score: 65.81 for set: [16]
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22, 68.56, 70.41, 62.5, 61.51, 64.76, 66.3, 62.7, 60.56, 55.17, 65.81]
Best score: 72.5
Average of max per entry across top 1 scores: 0.725
Average of max per entry across top 2 scores: 0.9161111111111111
Average of max per entry across top 3 scores: 0.975
Average of max per entry across top 5 scores: 0.9916666666666667
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0


 17%|███████▋                                      | 5/30 [00:05<00:28,  1.14s/it]


Bootstrapped 2 full traces after 6 examples in round 0.


Average Metric: 2.1944444444444446 / 3  (73.1):  10%| | 3/30 [00:03<00:35,  1.31s/

Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.7 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 3.4444444444444446 / 5  (68.9):  17%|▏| 5/30 [00:06<00:30,  1.21s/

Backing off 1.0 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 4.444444444444445 / 6  (74.1):  20%|▏| 6/30 [00:06<00:25,  1.07s/i

Backing off 1.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.9 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.2 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.6 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 8.527777777777779 / 12  (71.1):  40%|▍| 12/30 [00:11<00:15,  1.18i

Backing off 3.6 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.9 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.8 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.0 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 9.861111111111112 / 14  (70.4):  47%|▍| 14/30 [00:13<00:14,  1.14i

Backing off 0.3 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.361111111111112 / 15  (69.1):  50%|▌| 15/30 [00:14<00:12,  1.17

Backing off 0.7 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 10.861111111111112 / 16  (67.9):  53%|▌| 16/30 [00:15<00:12,  1.10

Backing off 1.1 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.1 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 11.694444444444446 / 18  (65.0):  60%|▌| 18/30 [00:17<00:12,  1.06

Backing off 0.6 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.0 seconds after 2 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.5 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 1.3 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 3.6 seconds after 3 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 16.444444444444446 / 23  (71.5):  77%|▊| 23/30 [00:21<00:06,  1.05

Backing off 0.2 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {'max_tokens': 2048, 'n': 1, 'temperature': 0.0}
Backing off 0.6 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 0.3 seconds after 1 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}
Backing off 5.4 seconds after 4 tries calling function <function GPT3.request at 0x10dedca40> with kwargs {}


Average Metric: 21.944444444444446 / 30  (73.1): 100%|█| 30/30 [00:29<00:00,  1.03


Average Metric: 21.944444444444446 / 30  (73.1%)
Score: 73.15 for set: [16]
New best score: 73.15 for seed 15
Scores so far: [18.28, 72.5, 68.81, 61.43, 56.39, 55.03, 65.97, 70.22, 68.56, 70.41, 62.5, 61.51, 64.76, 66.3, 62.7, 60.56, 55.17, 65.81, 73.15]
Best score: 73.15
Average of max per entry across top 1 scores: 0.7314814814814815
Average of max per entry across top 2 scores: 0.9077777777777778
Average of max per entry across top 3 scores: 0.9466666666666667
Average of max per entry across top 5 scores: 0.975
Average of max per entry across top 8 scores: 1.0
Average of max per entry across top 9999 scores: 1.0
19 candidate programs found.
{'menu': 1, 'pub fare': 1, 'burgers': 1, 'steaks': 1, 'shepherds pie': 1, 'portabella lasagna': 1, 'vegetarians': 1}


In [11]:
from dspy.teleprompt import BootstrapFewShotWithRandomSearch

backend = dspy.JSONBackend(model="gpt-3.5-turbo")
with dspy.settings.context(backend=backend):
    uncompiled_pipeline = ABSAPipeline()
    pred = uncompiled_pipeline(test_dspy[0].review)
    # optimizer = BootstrapFewShotWithRandomSearch(metric=validate_venn)
    # compiled_pipeline = optimizer.compile(ABSAPipeline(), teacher=ABSAPipeline(), trainset=train_dspy[:30])
    # pred = compiled_pipeline(test_dspy[0].review)
print(pred.aspects_with_label)

AttributeError: 'dict' object has no attribute 'strip'

In [48]:
backend.inspect_history(0)

===INPUT KWARGS===
{ 'messages': [ { 'content': '\n'
                             '    Definition: The output will be the aspects '
                             '(both implicit and explicit) and the aspects '
                             'sentiment polarity.\n'
                             '    \n'
                             '\n'
                             '---\n'
                             '\n'
                             'Provided the following:\n'
                             'Review: a customer review\n'
                             '\n'
                             'Please return the following fields:\n'
                             'Aspects With Label: \n'
                             '        a single Python dictionary, where each '
                             'key is an aspect and the value is the label,\n'
                             '        with label 1 indicating positive '
                             'sentiment, 0 indicating neutral sentiment, and '
             

In [None]:
class SimpleBackend(dspy.TextBackend):
    model: str

    def _example_span(self, signature: Signature, example: Example) -> str:
        span = {}
        for name, _ in signature.fields.items():
            if name in example:
                span[name] = f"{example[name]}"

        return json.dumps(span)

        

    def prepare_request(self, signature: dspy.Signature, example: dspy.Example, config: dict, **_kwargs) -> dict:
        options = {**self.STANDARD_PARAMS, **config}

        prompt_spans = []

        # Start by getting the instructions
        prompt_spans.append(signature.instructions)

        # Generate Spans for All the demos
        for demo in example.demos:
            prompt_spans.append(self._example_span(signature, demo))

        # Generate Span for the active example
        prompt_spans.append(self._example_span(signature, example))

        content = "\n\n---\n\n".join([span.strip() for span in prompt_spans])

        messages = {"messages": [{"role": "user", "content": content}]}

        options.update(**messages)

In [54]:
Review2Aspects.fields.items()

dict_items([('review', FieldInfo(annotation=str, required=True, json_schema_extra={'desc': 'a customer review', '__dspy_field_type': 'input', 'prefix': 'Review:'})), ('aspects_with_label', FieldInfo(annotation=str, required=True, json_schema_extra={'desc': '\n        a single Python dictionary, where each key is an aspect and the value is the label,\n        with label 1 indicating positive sentiment, 0 indicating neutral sentiment, and -1\n        indicating negative sentiment.\n        ', '__dspy_field_type': 'output', 'prefix': 'Aspects With Label:'}))])