# 1. Setup Environment

In [37]:
# !pip install dspy-ai
# !pip install sklearn

In [81]:
import os

import dspy
from sklearn.datasets import fetch_20newsgroups

In [157]:
os.environ["OPENAI_API_KEY"] = 'your_api_key'

In [158]:
turbo = dspy.OpenAI(model='gpt-4o-mini-2024-07-18')
dspy.settings.configure(lm=turbo)

# 2. Create Dataset

In [142]:
def convert_target_labels_to_names(sklearn_dataset):
    sklearn_dataset.target = [sklearn_dataset.target_names[i] for i in sklearn_dataset.target]
    return sklearn_dataset


newsgroups_dataset = convert_target_labels_to_names(fetch_20newsgroups(subset='all'))
dspy_examples = [
    dspy.Example(post=post, topic=topic).with_inputs('post')
    for post, topic in zip(newsgroups_dataset.data, newsgroups_dataset.target)
]
train, test = dspy_examples[:100], dspy_examples[100:200]

# 3. Create DSPy Classifier

In [143]:
class TopicPredictor(dspy.Signature):
    """Classify the topic of the newsgroup post."""

    post = dspy.InputField(desc='Newsgroup post')
    topic = dspy.OutputField(
        desc=f"Topic of the post. Possible values: {', '.join(f'`{i}`' for i in newsgroups_dataset.target_names)}"
    )


def validate_topic(example, pred, trace=None):
    return example.topic.lower() == pred.topic.lower()


classify = dspy.Predict(TopicPredictor)

In [None]:
evaluate = dspy.Evaluate(
    devset=test, metric=validate_topic, num_threads=4, display_progress=True, display_table=False
)
evaluate(classify)

# 4. Optimize with 4o + 4o-mini

In [None]:
teleprompter = dspy.teleprompt.COPRO(
    prompt_model=dspy.OpenAI(model='gpt-4o-2024-08-06', max_tokens=2000),
    metric=validate_topic,
    breadth=10,
    depth=3,
    init_temperature=1,
    track_stats=True,
    verbose=True,
)
initial_optimized_program = teleprompter.compile(
    classify.deepcopy(),
    trainset=train,
    eval_kwargs={'num_threads': 2, 'display_progress': True, 'display_table': 0},
)

In [161]:
initial_optimized_program.save(f"copro_4o_optimized")

In [None]:
evaluate(initial_optimized_program)

# 5. Optimize with only 4o-mini

In [None]:
teleprompter = dspy.teleprompt.COPRO(
    prompt_model=dspy.OpenAI(model='gpt-4o-mini-2024-07-18', max_tokens=2000),
    metric=validate_topic,
    breadth=10,
    depth=3,
    init_temperature=1,
    track_stats=True,
    verbose=True,
)
new_optimized_program = teleprompter.compile(
    classify.deepcopy(),
    trainset=train,
    eval_kwargs={'num_threads': 2, 'display_progress': True, 'display_table': 0},
)

In [162]:
new_optimized_program.save(f"copro_4o-mini_optimized")

In [None]:
evaluate(new_optimized_program)