# Import the Zenbase Library

In [1]:
import sys
import subprocess

def install_package(package):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    except subprocess.CalledProcessError as e:
        print(f"Failed to install {package}: {e}")
        raise

def install_packages(packages):
    for package in packages:
        install_package(package)

try:
    # Check if running in Google Colab
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Install the zenbase package if running in Google Colab
    # install_package('zenbase')
    # Install the zenbse package from a GitHub branch if running in Google Colab
    install_package('git+https://github.com/zenbase-ai/lib.git@main#egg=zenbase&subdirectory=py')

    # List of other packages to install in Google Colab
    additional_packages = [
        'python-dotenv',
        'arize-phoenix[evals]',
        'openai',
        'langchain',
        'langchain_openai'
    ]
    
    # Install additional packages
    install_packages(additional_packages)

# Now import the zenbase library
try:
    import zenbase
except ImportError as e:
    print("Failed to import zenbase: ", e)
    raise

# Configure the Environment

In [2]:
from pathlib import Path
from dotenv import load_dotenv

# import os
#
# os.environ["OPENAI_API_KEY"] = "..."

load_dotenv(Path("../../.env.test"), override=True)

True

In [3]:
import nest_asyncio

nest_asyncio.apply()

# Initial Setup


In [4]:
# initiate the phoenix app
import phoenix as px
px.launch_app()
# initiate the phoenix client
arize_phoenix = px.Client()

🌍 To view the Phoenix app in your browser, visit http://localhost:6006/
📖 For more information on how to use Phoenix, check out https://docs.arize.com/phoenix


In [5]:
from openai import OpenAI
openai = OpenAI()

In [6]:
from zenbase.utils import ksuid
from zenbase.adaptors.arize import ZenArizeAdaptor
zen_arize_adaptor = ZenArizeAdaptor(arize_phoenix)

# setup datasets
import datasets
gsm8k_dataset = datasets.load_dataset("gsm8k", "main")
TESTSET_SIZE = 2
TRAINSET_SIZE = 5
VALIDATIONSET_SIZE = 2


def create_dataset_with_examples(zen_arize_adaptor: ZenArizeAdaptor, prefix: str, item_set: list) -> str:
    dataset_name = ksuid(prefix=prefix)

    inputs = [{"question": example["question"]} for example in item_set]
    expected_outputs = [{"answer": example["answer"]} for example in item_set]
    zen_arize_adaptor.add_examples_to_dataset(dataset_name, inputs, expected_outputs)
    return dataset_name

train_set = create_dataset_with_examples(
        zen_arize_adaptor,
        "GSM8K_train_set",
        list(gsm8k_dataset["train"].select(range(TRAINSET_SIZE))),
    )

validation_set = create_dataset_with_examples(
        zen_arize_adaptor,
        "GSM8K_validation_set",
        list(gsm8k_dataset["train"].select(range(TRAINSET_SIZE + 1, TRAINSET_SIZE + VALIDATIONSET_SIZE + 1))),
    )

test_set = create_dataset_with_examples(
        zen_arize_adaptor,
        "GSM8K_test_set",
        list(gsm8k_dataset["test"].select(range(TESTSET_SIZE))),
    )

📤 Uploading dataset...
💾 Examples uploaded: http://localhost:6006/datasets/RGF0YXNldDox/examples
🗄️ Dataset version ID: RGF0YXNldFZlcnNpb246MQ==
📤 Uploading dataset...
💾 Examples uploaded: http://localhost:6006/datasets/RGF0YXNldDoy/examples
🗄️ Dataset version ID: RGF0YXNldFZlcnNpb246Mg==
📤 Uploading dataset...
💾 Examples uploaded: http://localhost:6006/datasets/RGF0YXNldDoz/examples
🗄️ Dataset version ID: RGF0YXNldFZlcnNpb246Mw==


# Now, you probably already have some LLM code.

It could use the OpenAI SDK, LangChain, or anything really. But it looks something like this:

In [7]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

def solver(inputs):
    inputs = inputs["inputs"]
    messages = [
        (
            "system",
            """You are an expert math solver. Solve the given problem using the provided plan and operations.
        Return only the final numerical answer, without any additional text or explanation.""",
        ),
    ]

    messages.extend(
        [
            ("user", "Question: {question}"),
            ("user", "Plan: {plan}"),
            ("user", "Mathematical Operation: {operation}"),
            ("user", "Provide the final numerical answer:"),
        ]
    )

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()

    plan = planner_chain(inputs)
    operation = operation_finder({"plan": plan["plan"], "question": inputs["question"]})

    inputs_to_answer = {
        "question": inputs["question"],
        "plan": plan["plan"],
        "operation": operation["operation"],
    }
    answer = chain.invoke(inputs_to_answer)
    return {"answer": answer}

def planner_chain(inputs):
    messages = [
        (
            "system",
            """You are an expert math solver. Create a step-by-step plan to solve the given problem.
        Be clear and concise in your steps.""",
        ),
        ("user", "Problem: {question}\n\nProvide a step-by-step plan to solve this problem:"),
    ]

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()
    plan = chain.invoke(inputs)
    return {"plan": plan}

def operation_finder(inputs):
    messages = [
        (
            "system",
            """You are an expert math solver. Identify the overall mathematical operation needed to solve the
             problem
        based on the given plan. Use simple operations like addition, subtraction, multiplication, and division.""",
        ),
        ("user", "Question: {question}"),
        ("user", "Plan: {plan}"),
        ("user", "Identify the primary mathematical operation needed:"),
    ]

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()
    operation = chain.invoke(inputs)
    return {"operation": operation}


In [8]:
solver({"inputs": {"question": "What is 2 + 2?"}})

{'answer': '4'}

## And let's say you have an eval function like this

In [9]:
def score_answer(output: str, expected: dict):
    """The first argument is the return value from the `langchain_chain` function above."""

    score = int(output["answer"] == expected["outputs"]["answer"].split("#### ")[-1])
    return score


## Then you're probably evaluating like this

In [10]:
from phoenix.experiments import run_experiment

experiment = run_experiment(
                arize_phoenix.get_dataset(name=test_set),
                solver,
                experiment_name="Experiment-Name",
                evaluators=[score_answer],
            )

🧪 Experiment started.
📺 View dataset experiments: http://localhost:6006/datasets/RGF0YXNldDoz/experiments
🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoz/compare?experimentId=RXhwZXJpbWVudDox


running tasks |          | 0/2 (0.0%) | ⏳ 00:00<? | ?it/s

✅ Task runs completed.
🧠 Evaluation started.


running experiment evaluations |          | 0/2 (0.0%) | ⏳ 00:00<? | ?it/s


🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoz/compare?experimentId=RXhwZXJpbWVudDox

Experiment Summary (07/11/24 06:37 PM -0600)
--------------------------------------------
      evaluator  n  n_scores  avg_score
0  score_answer  2         2        1.0

Tasks Summary (07/11/24 06:37 PM -0600)
---------------------------------------
   n_examples  n_runs  n_errors
0           2       2         0


 # Now, how can we optimize this score of 0.6?

## First, initialize the Zenbase ZenbaseTracer and import the Langfuse helper

In [11]:
from zenbase.core.managers import ZenbaseTracer
zenbase_tracer = ZenbaseTracer()

from zenbase.adaptors.arize import ZenArizeAdaptor
zen_arize_adaptor = ZenArizeAdaptor(arize_phoenix)

## Hook up Zenbase to your functions

1. Use the `zenbase_tracer` decorator.
2. Change function inputs to request
3. Use request's `zenbase.task_demos` to get the few-shot examples for the task and add them however you would like into your prompt.
4. If you need to use just a few examples, you can use `request.zenbase.task_demos[:2]` to get the first two examples.

In [12]:
from zenbase.types import LMRequest
global_demo_inputs = {
    'demo.inputs["inputs"]': [],
    'demo.inputs': [],
    'demo_input["question"]': []
}
global_demo_outputs = {
    'demo.outputs["outputs"]': [],
    'demo.outputs': [],
    'demo_output["answer"]': []
}

@zenbase_tracer  # it is 1
def solver(request: LMRequest):  # it is 2
    request.inputs = request.inputs["inputs"]
    messages = [
        (
            "system",
            """You are an expert math solver. Solve the given problem using the provided plan and operations.
        Return only the final numerical answer, without any additional text or explanation.""",
        ),
    ]

    for demo in request.zenbase.task_demos:  # it is 3
        demo_input = demo.inputs["inputs"]["question"]
        demo_output = demo.outputs["outputs"]["answer"]

        messages += [
            ("user", f"Example Question: {demo_input}"),
            ("assistant", f"Example Answer: {demo_output}"),
        ]  # it is 4

    messages.extend(
        [
            ("user", "Question: {question}"),
            ("user", "Plan: {plan}"),
            ("user", "Mathematical Operation: {operation}"),
            ("user", "Provide the final numerical answer:"),
        ]
    )

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()

    plan = planner_chain(request.inputs)
    operation = operation_finder({"plan": plan["plan"], "question": request.inputs["question"]})

    inputs_to_answer = {
        "question": request.inputs["question"],
        "plan": plan["plan"],
        "operation": operation["operation"],
    }
    answer = chain.invoke(inputs_to_answer)
    return {"answer": answer}

@zenbase_tracer  # it is 1
def planner_chain(request: LMRequest):  # it is 2
    messages = [
        (
            "system",
            """You are an expert math solver. Create a step-by-step plan to solve the given problem.
        Be clear and concise in your steps.""",
        ),
        ("user", "Problem: {question}\n\nProvide a step-by-step plan to solve this problem:"),
    ]

    if request.zenbase.task_demos:  # it is 3
        for demo in request.zenbase.task_demos[:2]:  # it is 4
            messages += [
                ("user", demo.inputs["inputs"]["question"]),
                ("assistant", demo.outputs["outputs"]["plan"]),
            ]

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()
    plan = chain.invoke(request.inputs)
    return {"plan": plan}

@zenbase_tracer  # it is 1
def operation_finder(request: LMRequest):  # it is 2
    messages = [
        (
            "system",
            """You are an expert math solver. Identify the overall mathematical operation needed to solve the
             problem
        based on the given plan. Use simple operations like addition, subtraction, multiplication, and division.""",
        ),
        ("user", "Question: {question}"),
        ("user", "Plan: {plan}"),
        ("user", "Identify the primary mathematical operation needed:"),
    ]

    if request.zenbase.task_demos:  # it is 3
        for demo in request.zenbase.task_demos[:2]:  # it is 4
            messages += [
                ("user", demo.inputs["inputs"]["question"]),
                ("user", demo.inputs["inputs"]["plan"]),
                ("assistant", demo.outputs["outputs"]["operation"]),
            ]

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()
    operation = chain.invoke(request.inputs)
    return {"operation": operation}


In [13]:
return_langchain = solver({"inputs": {"question": "What is 2 + 2?"}})

## Now we can optimize!

### Set up your optimizer:

In [14]:
from zenbase.optim.metric.bootstrap_few_shot import BootstrapFewShot

SHOTS = 2
SAMPLES = 2

evaluator_kwargs = dict(
    dataset=arize_phoenix.get_dataset(name=test_set), evaluators=[score_answer]

)

bootstrap_few_shot = BootstrapFewShot(
    shots=SHOTS,
    training_set=train_set,
    test_set=test_set,
    validation_set=validation_set,
    evaluator_kwargs=evaluator_kwargs,
    zen_adaptor=zen_arize_adaptor,
)


### Do the optimization

In [15]:
# Empty the traces
zenbase_tracer.all_traces = {}
# Run the optimization
best_fn, candidates = bootstrap_few_shot.perform(
    solver,
    samples=SAMPLES,
    rounds=1,
    trace_manager=zenbase_tracer,
)

🧪 Experiment started.
📺 View dataset experiments: http://localhost:6006/datasets/RGF0YXNldDoz/experiments
🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoz/compare?experimentId=RXhwZXJpbWVudDoy


running tasks |          | 0/2 (0.0%) | ⏳ 00:00<? | ?it/s

✅ Task runs completed.
🧠 Evaluation started.


running experiment evaluations |          | 0/2 (0.0%) | ⏳ 00:00<? | ?it/s


🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoz/compare?experimentId=RXhwZXJpbWVudDoy

Experiment Summary (07/11/24 06:37 PM -0600)
--------------------------------------------
      evaluator  n  n_scores  avg_score
0  score_answer  2         2        0.0

Tasks Summary (07/11/24 06:37 PM -0600)
---------------------------------------
   n_examples  n_runs  n_errors
0           2       2         0
🧪 Experiment started.
📺 View dataset experiments: http://localhost:6006/datasets/RGF0YXNldDoy/experiments
🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoy/compare?experimentId=RXhwZXJpbWVudDoz


running tasks |          | 0/2 (0.0%) | ⏳ 00:00<? | ?it/s

✅ Task runs completed.
🧠 Evaluation started.


running experiment evaluations |          | 0/2 (0.0%) | ⏳ 00:00<? | ?it/s


🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoy/compare?experimentId=RXhwZXJpbWVudDoz

Experiment Summary (07/11/24 06:37 PM -0600)
--------------------------------------------
      evaluator  n  n_scores  avg_score
0  score_answer  2         2        0.5

Tasks Summary (07/11/24 06:37 PM -0600)
---------------------------------------
   n_examples  n_runs  n_errors
0           2       2         0
🧪 Experiment started.
📺 View dataset experiments: http://localhost:6006/datasets/RGF0YXNldDoy/experiments
🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoy/compare?experimentId=RXhwZXJpbWVudDo0


running tasks |          | 0/2 (0.0%) | ⏳ 00:00<? | ?it/s

✅ Task runs completed.
🧠 Evaluation started.


running experiment evaluations |          | 0/2 (0.0%) | ⏳ 00:00<? | ?it/s


🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoy/compare?experimentId=RXhwZXJpbWVudDo0

Experiment Summary (07/11/24 06:37 PM -0600)
--------------------------------------------
      evaluator  n  n_scores  avg_score
0  score_answer  2         2        0.5

Tasks Summary (07/11/24 06:37 PM -0600)
---------------------------------------
   n_examples  n_runs  n_errors
0           2       2         0
🧪 Experiment started.
📺 View dataset experiments: http://localhost:6006/datasets/RGF0YXNldDox/experiments
🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDox/compare?experimentId=RXhwZXJpbWVudDo1


running tasks |          | 0/5 (0.0%) | ⏳ 00:00<? | ?it/s

✅ Task runs completed.
🧠 Evaluation started.


running experiment evaluations |          | 0/5 (0.0%) | ⏳ 00:00<? | ?it/s


🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDox/compare?experimentId=RXhwZXJpbWVudDo1

Experiment Summary (07/11/24 06:37 PM -0600)
--------------------------------------------
      evaluator  n  n_scores  avg_score
0  score_answer  5         5        0.6

Tasks Summary (07/11/24 06:37 PM -0600)
---------------------------------------
   n_examples  n_runs  n_errors
0           5       5         0
🧪 Experiment started.
📺 View dataset experiments: http://localhost:6006/datasets/RGF0YXNldDoz/experiments
🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoz/compare?experimentId=RXhwZXJpbWVudDo2


running tasks |          | 0/2 (0.0%) | ⏳ 00:00<? | ?it/s

✅ Task runs completed.
🧠 Evaluation started.


running experiment evaluations |          | 0/2 (0.0%) | ⏳ 00:00<? | ?it/s


🔗 View this experiment: http://localhost:6006/datasets/RGF0YXNldDoz/compare?experimentId=RXhwZXJpbWVudDo2

Experiment Summary (07/11/24 06:38 PM -0600)
--------------------------------------------
      evaluator  n  n_scores  avg_score
0  score_answer  2         2        0.0

Tasks Summary (07/11/24 06:38 PM -0600)
---------------------------------------
   n_examples  n_runs  n_errors
0           2       2         0


### Use your optimized function


In [16]:
for k,v in global_demo_inputs.items():
    print(f"{k}: {len(v)}")
    
for k,v in global_demo_outputs.items():
    print(f"{k}: {len(v)}")

demo.inputs["inputs"]: 0
demo.inputs: 0
demo_input["question"]: 0
demo.outputs["outputs"]: 0
demo.outputs: 0
demo_output["answer"]: 0


In [17]:
zenbase_tracer.all_traces = {}
best_fn({"inputs":{"question": "What is 2+2?"}})

{'answer': '42'}

### Introspect function traces

In [18]:
function_traces = [v for k, v in zenbase_tracer.all_traces.items()][0]["optimized"]


### Check the optimized parameters for planner_chain

In [19]:
from pprint import pprint

pprint(function_traces["planner_chain"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'inputs': {'question': 'Natalia sold clips to 48 of her '
                                       'friends in April, and then she sold '
                                       'half as many clips in May. How many '
                                       'clips did Natalia sell altogether in '
                                       'April and May?'}},
        outputs={'outputs': {'plan': 'Step 1: Calculate the number of clips '
                                     'Natalia sold in May by dividing the '
                                     'number of clips sold in April by 2.\n'
                                     'Number of clips sold in May = 48 / 2 = '
                                     '24\n'
                                     '\n'
                                     'Step 2: Add the number of clips sold in '
                                     'April and May to find the total number '
                                     'of clips sold altogether.\n'
          

### Check the optimized parameters for operation_finder chain

In [20]:
from pprint import pprint

pprint(function_traces["operation_finder"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'inputs': {'plan': 'Step 1: Calculate the number of clips '
                                   'Natalia sold in May by dividing the number '
                                   'of clips sold in April by 2.\n'
                                   'Number of clips sold in May = 48 / 2 = 24\n'
                                   '\n'
                                   'Step 2: Add the number of clips sold in '
                                   'April and May to find the total number of '
                                   'clips sold altogether.\n'
                                   'Total clips sold = 48 (April) + 24 (May) = '
                                   '72\n'
                                   '\n'
                                   'Step 3: Natalia sold 72 clips altogether '
                                   'in April and May.',
                           'question': 'Natalia sold clips to 48 of her '
                                       'friends in April, and 

### Check the optimized parameters for solver

In [21]:
from pprint import pprint

pprint(function_traces["solver"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'inputs': {'question': 'Natalia sold clips to 48 of her '
                                       'friends in April, and then she sold '
                                       'half as many clips in May. How many '
                                       'clips did Natalia sell altogether in '
                                       'April and May?'}},
        outputs={'outputs': {'answer': '72'}},
        adaptor_object=None),
 LMDemo(inputs={'inputs': {'question': 'Julie is reading a 120-page book. '
                                       'Yesterday, she was able to read 12 '
                                       'pages and today, she read twice as '
                                       'many pages as yesterday. If she wants '
                                       'to read half of the remaining pages '
                                       'tomorrow, how many pages should she '
                                       'read?'}},
        outputs={'outputs': {'answer': 

## How to save the function and load it later

### Save the optimized function args to a file

In [22]:
bootstrap_few_shot.save_optimizer_args("bootstrap_few_shot_args.zenbase")

### Load the optimized function args with the function

In [23]:
bootstrap_few_shot.save_optimizer_args("bootstrap_few_shot_args.zenbase")

optimized_function = bootstrap_few_shot.load_optimizer_and_function("bootstrap_few_shot_args.zenbase", solver, zenbase_tracer)

### Use the loaded function and make sure it loaded the demos.


In [24]:
zenbase_tracer.all_traces = {}
optimized_function({"inputs":{"question": "If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?"}})
function_traces = [v for k, v in zenbase_tracer.all_traces.items()][0]["optimized"]
from pprint import pprint

pprint(function_traces["solver"]["args"]["request"].zenbase.task_demos)
pprint(function_traces["planner_chain"]["args"]["request"].zenbase.task_demos)
pprint(function_traces["operation_finder"]["args"]["request"].zenbase.task_demos)

[LMDemo(inputs={'inputs': {'question': 'Natalia sold clips to 48 of her '
                                       'friends in April, and then she sold '
                                       'half as many clips in May. How many '
                                       'clips did Natalia sell altogether in '
                                       'April and May?'}},
        outputs={'outputs': {'answer': '72'}},
        adaptor_object=None),
 LMDemo(inputs={'inputs': {'question': 'Julie is reading a 120-page book. '
                                       'Yesterday, she was able to read 12 '
                                       'pages and today, she read twice as '
                                       'many pages as yesterday. If she wants '
                                       'to read half of the remaining pages '
                                       'tomorrow, how many pages should she '
                                       'read?'}},
        outputs={'outputs': {'answer': 