In [17]:
import os
import dotenv
import dspy
import pandas as pd
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
from dspy.evaluate import Evaluate
import random

# API setup 

In [18]:
dotenv.load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
proxy_url = os.getenv("PROXY_URL")
model = "gpt-4o-mini"

if not api_key:
    raise ValueError("API key not found. Please check your environment variables.")

In [19]:
lm = dspy.LM(
    api_key=api_key,
    model=model,
    api_base=proxy_url,
    temperature=0.0,
)

dspy.configure(lm=lm)

# Data preparation

In [20]:
def create_example(row: pd.Series) -> dspy.Example:
    return dspy.Example(
        prompt=row["question"],
        completion=row["answer"],
        label=row["label"],
    ).with_inputs("prompt")

In [21]:
open_data = pd.read_csv("../data/open_domain_data.csv")
specific_data = pd.read_csv("../data/specific_domain_data.csv")

In [22]:
open_examples = list()
specific_examples = list()  

for _, row in open_data.iterrows():
    example = create_example(row=row)
    open_examples.append(example)

for _, row in specific_data.iterrows():
    example = create_example(row=row)
    specific_examples.append(example)

In [23]:
final_data = open_examples + specific_examples
random.shuffle(final_data)

# Signature & Module

In [30]:
class ClasificationSignature(dspy.Signature):
    """Classify if a text is specific for a domain or not. Target domain is law."""

    prompt = dspy.InputField(desc="The prompt to classify.")

    explanation = dspy.OutputField(desc="Reasoning behind the classification.")
    label = dspy.OutputField(desc="True, if the input text is domain specific, False otherwise.")
    

class ClassificationModule(dspy.Module):
    def __init__(self) -> None:
        super().__init__()

        self.prog = dspy.ChainOfThought(ClasificationSignature)
        
    def forward(self, prompt: str) -> ClasificationSignature:
        prediction = self.prog(prompt=prompt)
        return prediction

# Metric & Teleprompter

In [31]:
def parse_answer(answer: str) -> bool:
    answer = answer.lower()
    if "yes" in answer or "true" in answer:
        return True
    return False

In [32]:
def comparison_metric(gold, pred, trace=None) -> bool:
    return parse_answer(gold) == parse_answer(pred)

In [33]:
fewshot_optimizer = BootstrapFewShotWithRandomSearch(metric=comparison_metric, max_bootstrapped_demos=2, num_candidate_programs=8, num_threads=6)

Going to sample between 1 and 2 traces per predictor.
Will attempt to bootstrap 8 candidate sets.


# Output 

In [34]:
output = ClassificationModule()(prompt=open_examples[0].prompt)

In [35]:
print(output.label)

False
