## Environment setup


In [None]:
%env DSP_CACHEBOOL=false

In [None]:
import logging

logging.basicConfig(encoding="utf-8", level=logging.INFO)

In [None]:
## Data preparation

### Define the data schema

In [None]:
from enum import Enum

from pydantic import BaseModel, Field


class Label(str, Enum):
    HAM = "ham"
    SPAM = "spam"
    SMISHING = "smishing"


class Input(BaseModel):
    text: str = Field(description="SMS text to be classified")


class Output(BaseModel):
    label: Label = Field(description="The predicted label for the SMS text")

In [None]:
import polars as pl

In [None]:
df = pl.read_csv("../data/sms_phishing.csv")

In [None]:
from dspy import Example

examples = []
for r in df.iter_rows(named=True):
    examples.append(
        Example(
            input=Input(text=r["TEXT"]), output=Output(label=r["LABEL"].lower())
        ).with_inputs("input")
    )

In [None]:
len(examples)

In [None]:
TRAIN_PCT = 0.96
VAL_PCT = 0.02
TEST_PCT = 0.02
train = examples[: int(len(examples) * TRAIN_PCT)]
val = examples[
    int(len(examples) * TRAIN_PCT) : int(len(examples) * (TRAIN_PCT + VAL_PCT))
]
test = examples[int(len(examples) * (TRAIN_PCT + VAL_PCT)) :]

In [None]:
len(train), len(val), len(test)

## Model connection
### Models
* llama3.2:3b
* llama3.1:8b
* gemma2:9b

In [None]:
import dspy

# MODEL = "llama3.2:3b-instruct-fp16"
# MODEL = "gemma2:2b"
MODEL = "qwen2.5:0.5b"
# MODEL = "gemma2:9b"

lm = dspy.OllamaLocal(
    model=MODEL,
)
dspy.configure(lm=lm)

dspy.settings.configure(lm=lm)
dspy.configure(experimental=True)

In [None]:
class SMSClassifierSignature(dspy.Signature):
    """
    Given an SMS text, predict whether it is ham, spam, or smishing.
    Output only the predicted label.
    """

    input: Input = dspy.InputField()
    output: Output = dspy.OutputField()

In [None]:
class SMSClassifier(dspy.Module):
    def __init__(self, lm):
        self.lm = lm
        super().__init__()
        dspy.configure(lm=lm)
        self.generate_answer = dspy.TypedPredictor(
            SMSClassifierSignature, max_retries=10
        )

    def forward(self, input):
        return self.generate_answer(input=input)

In [None]:
sms_classifier = SMSClassifier(lm)

In [None]:
from evaluation_helpers import validate_answer
from langfuse_extensions import EvaluateWithLangfuse

# from evaluation_helpers import EvaluateWithLangfuse
evaluator = EvaluateWithLangfuse(devset=test, num_threads=1, display_progress=True)
evaluator(sms_classifier, metric=validate_answer)

In [None]:
from dspy.teleprompt import BootstrapFewShotWithRandomSearch

# Set up the optimizer: we want to "bootstrap" (i.e., self-generate) 8-shot examples of your program's steps.
# The optimizer will repeat this 10 times (plus some initial attempts) before selecting its best attempt on the devset.
config = dict(
    max_bootstrapped_demos=4,
    max_labeled_demos=8,
    num_candidate_programs=16,
    num_threads=1,
    max_errors=10,
)

teleprompter = BootstrapFewShotWithRandomSearch(metric=validate_answer, **config)
optimized_program = teleprompter.compile(sms_classifier, trainset=train[:100])

In [None]:
evaluator = EvaluateWithLangfuse(devset=test, num_threads=1, display_progress=True)
evaluator(optimized_program, metric=validate_answer)

In [None]:
optimized_program.save("programs/sms_classifier-qwen2.5-0.5b.json")