In [None]:
import os
import dotenv
import dspy
import re
import pandas as pd
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
import tiktoken
import random
from sklearn.metrics import accuracy_score

# Hyperparameters

In [None]:
SEED = 22
PRICE_PER_1_000_000_TOKENS = 0.15 
PRICE_PER_1_000_000_TOKENS_OUTPUT = 0.60

# API setup 

In [3]:
dotenv.load_dotenv()

api_key = os.getenv("OPENAI_API_KEY")
proxy_url = os.getenv("PROXY_URL")
model = "gpt-4o-mini"
encoder = tiktoken.encoding_for_model("gpt-4-mini")

if not api_key:
    raise ValueError("API key not found. Please check your environment variables.")

In [4]:
lm = dspy.LM(
    api_key=api_key,
    model=model,
    api_base=proxy_url,
    temperature=0.0,
)

dspy.configure(lm=lm)

# Data preparation

In [5]:
def create_example(row: pd.Series) -> dspy.Example:
    return dspy.Example(
        prompt=row["question"],
        completion=row["answer"],
        label=row["label"],
    ).with_inputs("prompt")

In [6]:
open_data = pd.read_csv("../data/open_domain_data.csv")
specific_data = pd.read_csv("../data/specific_domain_data.csv")

In [7]:
open_examples = list()
specific_examples = list()  

for _, row in open_data.iterrows():
    example = create_example(row=row)
    open_examples.append(example)

for _, row in specific_data.iterrows():
    example = create_example(row=row)
    specific_examples.append(example)

In [None]:
final_data = open_examples + specific_examples
random.shuffle(final_data)

train_data = final_data.sample(n=100, random_state=SEED)
test_data = final_data.sample(n=500, random_state=SEED)

print(f"Train data: {len(train_data)}")
print(f"Test data: {len(test_data)}")

# Signature & Module

In [9]:
class ClasificationSignature(dspy.Signature):
    """Classify if a text is specific for a domain or not. Target domain is law."""

    prompt = dspy.InputField(desc="The prompt to classify.")

    #explanation = dspy.OutputField(desc="Reasoning behind the classification.")
    label = dspy.OutputField(desc="1, if the input text is law domain, 0 otherwise.")
    

class ClassificationModule(dspy.Module):
    def __init__(self) -> None:
        super().__init__()

        self.prog = dspy.ChainOfThought(ClasificationSignature)
        
    def forward(self, prompt: str) -> ClasificationSignature:
        prediction = self.prog(prompt=prompt)
        return prediction

# Metric & Teleprompter

In [10]:
def parse_answer(answer) -> bool:
    if isinstance(answer, str) and re.match(r"^[01]$", answer.strip()):
        return bool(int(answer))
    elif isinstance(answer, int) and answer in [0, 1]:
        return bool(answer)
    else:
        print(f"Unexpected non-binary label found: {answer}")
        return False

def evaluate_model(predictions, true_labels):
    parsed_preds = [parse_answer(pred) for pred in predictions]
    parsed_labels = [parse_answer(label) for label in true_labels]
    
    accuracy = accuracy_score(parsed_labels, parsed_preds)
    
    return accuracy

In [11]:
def comparison_metric(example, pred, trace=None) -> bool:
    return parse_answer(example.label) == parse_answer(pred.label)

In [None]:
fewshot_optimizer = BootstrapFewShotWithRandomSearch(
    metric=comparison_metric,
    max_bootstrapped_demos = 4,
    max_labeled_demos = 5,
    max_rounds = 1,
    num_candidate_programs = 5,
)

compiled_classification = fewshot_optimizer.compile(ClassificationModule(), trainset=train_data)

# Save the model
compiled_classification.save("classification_model.json")

# Price calculation

In [29]:
def count_tokens(prompt: str) -> int:
    """Count tokens in the given text for gpt-4-mini."""
    tokens = encoder.encode(prompt)
    return len(tokens)

In [None]:
def calculate_price(token_count: int) -> float:
    """Calculate the price based on token count."""
    return (token_count / 1_000_000) * PRICE_PER_1_000_000_TOKENS

# Output 

In [None]:
predictions = list()
true_labels = list()

total_tokens = int()

for example in test_data[:5000]:  
    prompt = example.prompt    
    output = compiled_classification(prompt)
    
    total_tokens += count_tokens(prompt)
    total_tokens += count_tokens(output.label)
    
    predictions.append(output.label)
    true_labels.append(example.label)

# Evaluate the model
evaluation_results = evaluate_model(predictions, true_labels)

print(f"Evaluation results: Accuracy {evaluation_results * 100}%")
print(f"Total price: {calculate_price(total_tokens)} USD")
