In [1]:
import os
import dotenv
import dspy
import pandas as pd
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
from dspy.evaluate import Evaluate

  from .autonotebook import tqdm as notebook_tqdm


# API setup 

In [2]:
dotenv.load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
proxy_url = os.getenv("PROXY_URL")
model = "gpt-4o-mini"

if not api_key:
    raise ValueError("API key not found. Please check your environment variables.")

In [3]:
lm = dspy.LM(
    api_key=api_key,
    model=model,
    api_base=proxy_url,
    temperature=0.0,
)

dspy.configure(lm=lm)

# Data preparation

In [7]:
def create_example(row: pd.Series) -> dspy.Example:
    return dspy.Example(
        prompt=row["question"],
        completion=row["answer"],
        label=row["label"],
    ).with_inputs("prompt")

In [5]:
open_data = pd.read_csv("../data/open_domain_clean.csv", sep=";")
specific_data = pd.read_csv("../data/specific_domain_clean.csv", sep=";")

In [8]:
open_examples = []
specific_examples = []  

for _, row in open_data.iterrows():
    example = create_example(row=row)
    open_examples.append(example)

for _, row in specific_data.iterrows():
    example = create_example(row=row)
    specific_examples.append(example)

# Signature & Module

In [9]:
class ClasificationSignature(dspy.Signature):
    """Classify if a text is specific for a domain or not."""

    prompt = dspy.InputField(desc="The prompt to classify.")

    explanation = dspy.OutputField(desc="Reasoning behind the classification.")
    label = dspy.OutputField(desc="True, if the input text is domain specific, False otherwise.")
    

class ClassificationModule(dspy.Module):
    def __init__(self) -> None:
        super().__init__()

        self.prog = dspy.ChainOfThought(ClasificationSignature)
        
    def forward(self, prompt: str) -> ClasificationSignature:
        prediction = self.prog(prompt=prompt)
        return prediction

# Metric & Teleprompter

In [None]:
def parse_answer(answer: str) -> bool:
    answer = answer.lower()
    if "yes" in answer or "true" in answer:
        return True
    return False

In [None]:
def comparison_metric(gold, pred, trace=None) -> bool:
    return parse_answer(gold) == parse_answer(pred)

In [None]:
fewshot_optimizer = BootstrapFewShotWithRandomSearch(metric=comparison_metric, max_bootstrapped_demos=2, num_candidate_programs=8, num_threads=6)

# Output 

In [11]:
output = ClassificationModule()(prompt=open_examples[0].prompt)

In [12]:
print(output.label)

True
