In [1]:
from pathlib import Path
from dotenv import load_dotenv

# import os
#
# os.environ["OPENAI_API_KEY"] = "..."
# os.environ["LANGCHAIN_API_KEY"] = "..."
# os.environ["LANGCHAIN_TRACING_V2"] = "true"

load_dotenv(Path("../.env.test"), override=True)

True

In [2]:
import nest_asyncio

nest_asyncio.apply()

In [13]:
import json

from zenbase.types import LMRequest, deflm
from langsmith import traceable
from langsmith.schemas import Run, Example
from langsmith.wrappers import wrap_openai
from openai import OpenAI

openai = wrap_openai(OpenAI())

# Define your LLM function
@traceable
def openai_json_response(inputs: dict) -> dict:
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples. Think step by step. Respond with a JSON object.",
        },
        {"role": "user", "content": json.dumps(inputs)}
    ]

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)

# Define your Langsmith evaluator
def score_answer(run: Run, example: Example):
    output = str(run.outputs["answer"])
    target = example.outputs["answer"].split("#### ")[-1]
    return {
        "key": "correctness",
        "score": int(output == target),
    }

In [14]:
# Evaluate using LangSmith
from langsmith import Client, evaluate

langsmith = Client()
evalset = list(langsmith.list_examples(dataset_name="gsm8k-test-examples"))

evaluate_kwargs = dict(
    data=evalset,
    evaluators=[score_answer],
    client=langsmith,
    max_concurrency=2,
)

evaluate(openai_json_response, **evaluate_kwargs)

View the evaluation results for experiment: 'yellow-part-12' at:
https://smith.langchain.com/o/b0308fb6-cdef-5df3-affa-b8dba287e3ed/datasets/1b7abb1a-8922-4eba-b0b6-b617241d8794/compare?selectedSessions=17db3be3-eaeb-4545-be1a-a16789d96d45




0it [00:00, ?it/s]

<ExperimentResults yellow-part-12>

In [15]:
# Wrap your existing chain with @deflm and take in a `LMRequest` object
# An LMRequest has the inputs for your chain and has a `zenbase` attribute.
# This `zenbase` attribute includes the fields that Zenbase optimises.
@deflm
@traceable
def openai_json_response(request: LMRequest) -> dict:
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples. Think step by step. Respond with a JSON object.",
        },
    ]

    for demo in request.zenbase.task_demos:
        messages += [
            {"role": "user", "content": json.dumps(demo.inputs)},
            {"role": "assistant", "content": json.dumps(demo.outputs)},
        ]
    messages.append({"role": "user", "content": json.dumps(request.inputs)})

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)

In [16]:
from zenbase.helpers.langchain import ZenLangSmith
from zenbase.optim.metric.labeled_few_shot import LabeledFewShot

demoset = ZenLangSmith.examples_to_demos(
    langsmith.list_examples(dataset_name="gsm8k-golden-demos")
)
optimizer = LabeledFewShot(demoset=demoset, shots=3)

best_fn, candidates = optimizer.perform(
    # Pass deflm decorated function
    openai_json_response,
    # Exactly the same as what you are passing to your evaluate function
    evaluator=ZenLangSmith.metric_evaluator(**evaluate_kwargs),
    samples=2,
    rounds=1,
)

View the evaluation results for experiment: 'zenbase-multi-lateral-composite-open-system-f8e07e82' at:
https://smith.langchain.com/o/b0308fb6-cdef-5df3-affa-b8dba287e3ed/datasets/1b7abb1a-8922-4eba-b0b6-b617241d8794/compare?selectedSessions=41a222d1-fc2c-4ee5-85fb-892a707ae0e0




0it [00:00, ?it/s]

View the evaluation results for experiment: 'zenbase-enterprise-wide-system-worthy-archive-fb7fa631' at:
https://smith.langchain.com/o/b0308fb6-cdef-5df3-affa-b8dba287e3ed/datasets/1b7abb1a-8922-4eba-b0b6-b617241d8794/compare?selectedSessions=02baafbd-5c56-4ed8-8963-8c5a02a74df6




0it [00:00, ?it/s]

In [17]:
# Now you can use your zenbase fn
best_fn({"question": "If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?"})

{'answer': 'I have 30% + Mo has 24.5% = 54.5% combined shares assigned.\nThis means there are 100% - 54.5% = 45.5% unassigned shares.\nThus, there are 45.5% of 10M shares unassigned, which is 10,000,000 * 45.5 / 100 = <<10000000*45.5/100>>4550000 shares unassigned.\n#### 4550000'}

In [18]:
best_fn.zenbase.task_demos

(LMDemo(inputs={'question': 'James writes a 3-page letter to 2 different friends twice a week.  How many pages does he write a year?'}, outputs={'answer': 'He writes each friend 3*2=<<3*2=6>>6 pages a week\nSo he writes 6*2=<<6*2=12>>12 pages every week\nThat means he writes 12*52=<<12*52=624>>624 pages a year\n#### 624'}),
 LMDemo(inputs={'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?'}, outputs={'answer': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72'}),
 LMDemo(inputs={'question': 'Betty is saving money for a new wallet which costs $100. Betty has only half of the money she needs. Her parents decided to give her $15 for that purpose, and her grandparents twice as much as her parents. How much more money does Betty need to buy the wallet?'}, outputs={'answer': "In the beginning, Be

In [19]:
# You can also save the zenbase params for re-use
import pickle

pickled_zenbase = pickle.dumps(best_fn.zenbase)
openai_json_response.zenbase = pickle.loads(pickled_zenbase)

openai_json_response({"question": "What is 2 + 2?"}) # uses the best few-shot demos

{'answer': '4'}