In [2]:
from dotenv import load_dotenv
import aisuite as ai
import os
import json

load_dotenv()

# client = ai.Client({"groq": {"api_key": os.getenv("GROQ_API_KEY")}})
# model = "groq:llama-3.1-70b-versatile"

client = ai.Client({"openai": {"api_key": os.getenv("OPENAI_API_KEY")}})
model = "openai:gpt-4o-mini"

In [8]:
messages = [
    {"role": "system", "content": "Respond in Pirate English."},
    {"role": "user", "content": "Tell me a joke."},
]

response = client.chat.completions.create(model=model, messages=messages, temperature=0.75)

print(response.choices[0].message.content)

Why did the pirate go to school? 

To improve his "arrrrrrrticulation!" Ha-ha!


In [4]:
# run task 069 for 5 runs

task = "natural-instructions/tasks/task069_abductivenli_classification.json"
runs = 5

with open(task, "r") as f:
    data = json.load(f)

print(data["Definition"])

for i in range(runs):
    input = data["Instances"][i]["input"]
    output = data["Instances"][i]["output"][0]
    messages = [
        {"role": "system", "content": data["Definition"][0]},
        {"role": "user", "content": input},
    ]
    response = client.chat.completions.create(model=model, messages=messages, temperature=0.75)
    print("agreement: ", output == response.choices[0].message.content, "output: ", response.choices[0].message.content, "ground_truth: ", output)



['In this task, you will be shown a short story with a beginning, two potential middles, and an ending. Your job is to choose the middle statement that makes the story coherent / plausible by writing "1" or "2" in the output. If both sentences are plausible, pick the one that makes most sense.']
agreement:  True output:  1 ground_truth:  1
agreement:  True output:  1 ground_truth:  1
agreement:  True output:  2 ground_truth:  2
agreement:  True output:  1 ground_truth:  1
agreement:  False output:  2 ground_truth:  1


In [4]:
# formats from paper "How I learned to start worrying about prompt formatting"

# Utility for Roman numeral conversion
def to_roman(num):
    roman_dict = {
        1: "I", 2: "II", 3: "III", 4: "IV", 5: "V", 6: "VI", 7: "VII", 8: "VIII", 9: "IX", 10: "X"
    }
    return roman_dict.get(num, str(num))

def to_lower_roman(num):
    return to_roman(num).lower()

# Format classes
S1 = ["", " ", "\n", " -- ", "; \n", " || ", "< sep >", " - ", "\n "]
S2 = ["", " ", "  ", "\t"]  # No space, single, double, tab
C = ["", " ::: ", " :: ", " : ", "\n\t", "\n ", ": ", " - ", "\t"]
Fcasing = [lambda x: x, lambda x: x.title(), lambda x: x.upper(), lambda x: x.lower()]
Fitem1 = [
    lambda x: f"({x})",
    lambda x: f"{x}.",
    lambda x: f"{x})",
    lambda x: f"{x} )",
    lambda x: f"[{x}]",
    lambda x: f"<{x}>"
]
Fitem2 = [
    lambda x: x + 1,
    lambda x: f"A{x}",
    lambda x: f"a{x}",
    lambda x: f"{0x215F + x}",
    to_roman,
    to_lower_roman
]

# Prompt formatting functions
def format_field(descriptor, separator, casing, value):
    """
    Formats a single field with a descriptor, separator, casing, and placeholder value.
    """
    descriptor = casing(descriptor)
    return f"{descriptor}{separator}{value}"

def format_prompt(fields, field_separator, space):
    """
    Combines multiple formatted fields with a given separator and spacing.
    """
    return field_separator.join(fields).replace(" ", space)

def format_enumeration(descriptor, items, separator, space, casing, item_formatter):
    """
    Formats enumerations (e.g., multiple-choice options) with specific formatting for items.
    """
    formatted_items = [format_field(descriptor, separator, casing, item_formatter(i)) for i in items]
    return format_prompt(formatted_items, field_separator=space, space=" ")

# Example main prompt constructor
def construct_prompt(
    instruction, examples, task, separator, space, casing, field_separator, item_formatter, enumerator_format
):
    """
    Constructs a complete prompt with instruction, examples, task, and formatting.
    """
    instruction = casing(instruction)
    formatted_examples = [
        format_field("Example", separator, casing, example) for example in examples
    ]
    formatted_task = format_field("Task", separator, casing, task)
    enumeration = format_enumeration(
        "Option", range(1, 4), separator, space, casing, item_formatter
    )
    return format_prompt([instruction] + formatted_examples + [formatted_task, enumeration], field_separator, space)

# Example usage
instruction = "Classify the text into one of the options"
examples = ["Example A", "Example B"]
task = "Choose the best answer"
separator = ": "
space = "\n"
casing = Fcasing[2]  # Uppercase
field_separator = S1[3]  # "--"
item_formatter = Fitem2[4]  # Roman numerals
enumerator_format = Fitem1[0]  # Parentheses

formatted_prompt = construct_prompt(
    instruction, examples, task, separator, space, casing, field_separator, item_formatter, enumerator_format
)
print(formatted_prompt)

CLASSIFY
THE
TEXT
INTO
ONE
OF
THE
OPTIONS
--
EXAMPLE:
Example
A
--
EXAMPLE:
Example
B
--
TASK:
Choose
the
best
answer
--
OPTION:
I
OPTION:
II
OPTION:
III


In [7]:
# Using the PromptTemplate class to create prompt templates

from prompt_template import PromptTemplate

instruction = "Classify the text into one of the options"
examples = ["Example A", "Example B"]
task = "Choose the best answer"
separator = ": "
word_separator = " "
casing = Fcasing[0]             # as is
field_separator = S1[2]         # "\n"
item_formatter = Fitem2[4]      # Roman numerals
enumerator_format = Fitem1[0]   # Parentheses

template = PromptTemplate(instruction, task, " ", examples, separator, word_separator, casing, field_separator, item_formatter, enumerator_format)
formatted_prompt = template.construct_prompt()
print(formatted_prompt)


Classify the text into one of the options
Only respond with the answer, no other text or explanation.
Example: Example A
Example: Example B
Task: Choose the best answer



In [None]:
# Create different prompt mutations

instruction = "Classify the text into one of the options"
examples = ["Example A", "Example B"]
task = "Choose the best answer"
separator = ": "
space = " "

for i in range(len(Fcasing)):
    casing = Fcasing[i]
    for j in range(len(S1)):
        field_separator = S1[j]
        for k in range(len(Fitem2)):
            item_formatter = Fitem2[k]
            for l in range(len(Fitem1)):
                enumerator_format = Fitem1[l]
                template = PromptTemplate(instruction,
                                    task,
                                    examples,
                                    separator,
                                    word_separator,
                                    casing,
                                    field_separator,
                                    item_formatter,
                                    enumerator_format)
                formatted_prompt = template.construct_prompt()
                #print(formatted_prompt)

In [None]:
# 02.12.2024
# - q&a dataset extend with prompts
# - orca dataset (https://huggingface.co/microsoft/Orca-2-13b)
# - shap value and lime -> phrase mutations as binary vector
# - estimate influence of each mutation on the result
# - look into implementations of this -> free on github maybe easy to use out of the box