In [1]:
from getpass import getpass
import os

api_key = getpass("API Key : ")
os.environ['OPENAI_API_KEY'] = f"{api_key}"

from openai import OpenAI

client = OpenAI()

def query_openai(system_prompt, message):
    messages = []
    if system_prompt:
        messages.append({
            "role": "system",
            "content": system_prompt,
        })
    messages.append({
        "role": "user",
        "content": message,
    })
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages
    )
    output = completion.choices[0].message
    return output.content

import requests
import json

def query_llama(system_prompt, message):
    messages = []
    if system_prompt:
        messages.append({
            "role": "system",
            "content": system_prompt,
        })
    messages.append({
        "role": "user",
        "content": message,
    })
    url = "http://localhost:11434/api/chat"
    data = {
        "model": "llama3.1",
        "stream": False,
        "messages": messages
    }
    response = requests.post(url, data=json.dumps(data))
    return response.json()["message"]["content"]

In [2]:
!pip install promptbench

Collecting promptbench
  Downloading promptbench-0.0.3-py3-none-any.whl (129 kB)
[K     |████████████████████████████████| 129 kB 2.3 MB/s eta 0:00:01
[?25hCollecting transformers-stream-generator==0.0.5
  Downloading transformers-stream-generator-0.0.5.tar.gz (13 kB)
Collecting tokenizers==0.15.0
  Downloading tokenizers-0.15.0-cp310-cp310-macosx_11_0_arm64.whl (2.5 MB)
[K     |████████████████████████████████| 2.5 MB 8.0 MB/s eta 0:00:01
[?25hCollecting sentencepiece==0.1.99
  Downloading sentencepiece-0.1.99-cp310-cp310-macosx_11_0_arm64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 48.8 MB/s eta 0:00:01
[?25hCollecting nltk==3.8.1
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 173.0 MB/s eta 0:00:01
[?25hCollecting datasets>=2.15.0
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[K     |████████████████████████████████| 547 kB 96.9 MB/s eta 0:00:01
[?25hCollecting torch==2.1.1
 

In [3]:
import promptbench as pb

# print all supported datasets in promptbench
print('All supported datasets: ')
print(pb.SUPPORTED_DATASETS)

# load a dataset, sst2, for instance.
# if the dataset is not available locally, it will be downloaded automatically.
dataset_name = "gsm8k"
dataset = pb.DatasetLoader.load_dataset(dataset_name)

# print the first 3 examples
dataset[:3]

All supported datasets: 
['sst2', 'cola', 'qqp', 'mnli', 'mnli_matched', 'mnli_mismatched', 'qnli', 'wnli', 'rte', 'mrpc', 'mmlu', 'squad_v2', 'un_multi', 'iwslt2017', 'math', 'bool_logic', 'valid_parentheses', 'gsm8k', 'csqa', 'bigbench_date', 'bigbench_object_tracking', 'last_letter_concat', 'numersense', 'qasc', 'bbh', 'drop', 'arc-easy', 'arc-challenge']


Downloading readme:   0%|          | 0.00/7.94k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/2.31M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/419k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/7473 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1319 [00:00<?, ? examples/s]

[{'content': "Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?",
  'label': '18'},
 {'content': 'A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?',
  'label': '3'},
 {'content': 'Josh decides to try flipping a house.  He buys a house for $80,000 and then puts in $50,000 in repairs.  This increased the value of the house by 150%.  How much profit did he make?',
  'label': '70000'}]

In [7]:
model = pb.LLMModel(model='gpt-3.5-turbo',
                    api_key = os.environ['OPENAI_API_KEY'],
                    max_new_tokens=150)

In [8]:
method = pb.PEMethod(method='emotion_prompt',
                        dataset=dataset_name,
                        verbose=True,  # if True, print the detailed prompt and response
                        prompt_id = 1  # for emotion_prompt
                        )

In [9]:
results = method.test(dataset,
                      model,
                      num_samples=3 # if don't set the num_samples, method will use all examples in the dataset
                      )
results

  0%|          | 1/1319 [00:02<48:23,  2.20s/it]

Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?
This is very important to my career.
Please output your answer at the end as ##<your answer (arabic numerals)>

To find out how much Janet makes at the farmers' market every day, we first need to calculate how many eggs she has left after eating three for breakfast and using four for muffins.

Janet has 16 eggs per day.
She eats 3 for breakfast and uses 4 for muffins, so she uses 3 + 4 = 7 eggs.
Therefore, she has 16 - 7 = 9 eggs left to sell.

She sells each fresh duck egg for $2.
So, she makes 9 * $2 = $18 every day at the farmers' market.

Therefore, Janet makes $18 every day at the farmers' market.##18


  0%|          | 2/1319 [00:03<35:35,  1.62s/it]

A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?
This is very important to my career.
Please output your answer at the end as ##<your answer (arabic numerals)>

To make a robe, you need 2 bolts of blue fiber and half that amount of white fiber, which is 1 bolt. Therefore, in total, it takes 2 bolts of blue fiber and 1 bolt of white fiber.

Adding them together, the total number of bolts needed for the robe is 2 + 1 = 3 bolts.

##3


  0%|          | 3/1319 [00:05<40:10,  1.83s/it]

Josh decides to try flipping a house.  He buys a house for $80,000 and then puts in $50,000 in repairs.  This increased the value of the house by 150%.  How much profit did he make?
This is very important to my career.
Please output your answer at the end as ##<your answer (arabic numerals)>

To find out how much profit Josh made, we first need to calculate the new value of the house after the repairs.

The increase in value due to the repairs is 150% of the original value of the house, which is $80,000. So, the increase in value is 150/100 * $80,000 = $120,000.

The new value of the house after the repairs is the original value plus the increase in value: $80,000 + $120,000 = $200,000.

Josh's total investment in the house is the purchase price plus the cost of repairs: $80,000 + $50,000 = $130,000.

Therefore, Josh's profit is the new value of the





0.6666666666666666