# Emotion Classification with dspy.Predict

This notebook trains an emotion classifier using DSPy's `Predict` module and optimizes it with GEPA.

In [12]:
from typing import Literal

import os
import pandas as pd

import dspy
from dotenv import load_dotenv

load_dotenv()

True

In [13]:
GEMINI_KEY = os.getenv("GEMINI_API_KEY")

In [14]:
LABEL_MAPPING = {
    "0": "sadness",
    "1": "joy",
    "2": "love",
    "3": "anger",
    "4": "fear"
}

EMOTION_TYPES = list(LABEL_MAPPING.values())
EMOTION_TYPES

['sadness', 'joy', 'love', 'anger', 'fear']

## Load preprocessed data

In [15]:
# Load preprocessed train and validation data
df_train = pd.read_csv("data/train.csv")
df_val = pd.read_csv("data/val.csv")

print(f"Train size: {len(df_train)}")
print(f"Val size: {len(df_val)}")

df_train.head()

Train size: 100
Val size: 50


Unnamed: 0,user_message,emotion
0,i must say that i do feel better in myself and im really excited a...,joy
1,i feel so privileged to have spent so much time with him,joy
2,i went to see the entrance examination results at university i rej...,joy
3,i wish there was something i could do sitting here in the midwest ...,fear
4,i still can t shake the feeling of him loving us both equally,love


In [16]:
# Convert to DSPy format
train_data = df_train.to_dict(orient='records')
val_data = df_val.to_dict(orient='records')

dspy_train = [
    dspy.Example({
        "user_message": item["user_message"],
        "emotion": item["emotion"]
    }).with_inputs("user_message")
    for item in train_data
]

dspy_val = [
    dspy.Example({
        "user_message": item["user_message"],
        "emotion": item["emotion"]
    }).with_inputs("user_message")
    for item in val_data
]

len(dspy_train), len(dspy_val)

(100, 50)

## Configure DSPy

In [17]:
lm = dspy.LM(model="gemini/gemini-2.5-flash-lite")
dspy.configure(lm=lm)

## Define the emotion analyzer

In [18]:
EmotionTypes = Literal['sadness', 'joy', 'love', 'anger', 'fear']

class EmotionAnalyzer(dspy.Signature):
    """
    Determine the emotion of the user message.
    """
    user_message: str = dspy.InputField()
    emotion: EmotionTypes = dspy.OutputField()


class EmotionAnalyzerProgram(dspy.Module):
    def __init__(self):
        self.analyzer = dspy.Predict(EmotionAnalyzer)

    def forward(self, user_message: str) -> dspy.Prediction:
        emotion = self.analyzer(user_message=user_message)

        return emotion

In [19]:
em_analyzer = EmotionAnalyzerProgram()
em_analyzer("I am feeling good")

Prediction(
    emotion='joy'
)

## Define metrics

In [20]:
def score_emotion(ground_truth: str, prediction: str) -> bool:
    return ground_truth == prediction


def emotion_metric(ground_truth, pred, trace=None, pred_name=None, pred_trace=None):
    # Handle both Example objects and strings
    gt_emotion = ground_truth.emotion if hasattr(ground_truth, 'emotion') else ground_truth
    pred_emotion = pred.emotion if hasattr(pred, 'emotion') else pred

    score_emotion_value = score_emotion(gt_emotion, pred_emotion)

    return score_emotion_value

def feedback_emotion(ground_truth, pred) -> tuple:
    # Handle both Example objects and strings
    gt_emotion = ground_truth.emotion if hasattr(ground_truth, 'emotion') else ground_truth
    pred_emotion = pred.emotion if hasattr(pred, 'emotion') else pred

    score = 1.0 if gt_emotion == pred_emotion else 0.0

    if gt_emotion == pred_emotion:
        feedback = f"You correctly classified the sentiment of the message as `{gt_emotion}`. This message is indeed `{gt_emotion}`."
    else:
        feedback = f"You incorrectly classified the sentiment of the message as `{pred_emotion}`. The correct sentiment is `{gt_emotion}`. Think about how you could have reasoned to get the correct sentiment label."
    return feedback, score

def metric_with_feedback(ground_truth, pred, trace=None, pred_name=None, pred_trace=None):
    # Handle both Example objects and strings
    gt_emotion = ground_truth.emotion if hasattr(ground_truth, 'emotion') else ground_truth
    pred_emotion = pred.emotion if hasattr(pred, 'emotion') else pred

    feedback, score = feedback_emotion(gt_emotion, pred_emotion)
    
    return dspy.Prediction(score=score, feedback=feedback)

## Baseline evaluation

In [22]:
evaluate = dspy.Evaluate(
    devset=dspy_val,
    metric=emotion_metric,
    num_threads=4,
    display_table=True,
    display_progress=True
)

evaluate(em_analyzer)

Average Metric: 23.00 / 50 (46.0%): 100%|██████████| 50/50 [00:00<00:00, 163.81it/s]

2025/10/19 09:55:06 INFO dspy.evaluate.evaluate: Average Metric: 23 / 50 (46.0%)





Unnamed: 0,user_message,example_emotion,pred_emotion,emotion_metric
0,i only find out that they are looking and feeling complacent just ...,joy,sadness,
1,i can feel that my hopes have not been in vain she said,sadness,joy,
2,i know how much work goes into the creation and i feel the author ...,joy,love,
3,i feel shamed that i hoped for one last christmas because i know s...,sadness,sadness,✔️ [True]
4,i was feeling very bah humbugish coming out of this year s thanksg...,joy,anger,
5,i am already feeling so tortured for having to wait for the result...,anger,sadness,
6,i realize that i sound a little overdramatic when i say that but i...,sadness,anger,
7,i won t feel like the jolly green giant while clothes shopping,joy,sadness,
8,i am frankly surprised that you consider the minds of the quorum m...,fear,anger,
9,i mean i am happy for others but how can a person feel ok with som...,joy,sadness,


EvaluationResult(score=46.0, results=<list of 50 results>)

## Optimize with GEPA

In [24]:
from dspy import GEPA

optimizer = GEPA(
    metric=metric_with_feedback,
    auto="light",
    num_threads=20,
    track_stats=True,
    use_merge=False,
    reflection_lm=dspy.LM(model="gemini/gemini-2.5-flash", temperature=1.0, max_tokens=10000, api_key=GEMINI_KEY)
)

In [25]:
optimized_program = optimizer.compile(
    em_analyzer,
    trainset=dspy_train,
    valset=dspy_val,
)

2025/10/19 09:55:16 INFO dspy.teleprompt.gepa.gepa: Running GEPA for approx 580 metric calls of the program. This amounts to 3.87 full evals on the train+val set.
2025/10/19 09:55:16 INFO dspy.teleprompt.gepa.gepa: Using 50 examples for tracking Pareto scores. You can consider using a smaller sample of the valset to allow GEPA to explore more diverse solutions within the same budget.
GEPA Optimization:   0%|          | 0/580 [00:00<?, ?rollouts/s]2025/10/19 09:55:16 INFO dspy.evaluate.evaluate: Average Metric: 23.0 / 50 (46.0%)
2025/10/19 09:55:16 INFO dspy.teleprompt.gepa.gepa: Iteration 0: Base program full valset score: 0.46
GEPA Optimization:   9%|▊         | 50/580 [00:00<00:03, 169.44rollouts/s]2025/10/19 09:55:16 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Selected program 0 score: 0.46


Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:00<00:00,  3.99it/s]

2025/10/19 09:55:17 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)





2025/10/19 09:55:28 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Proposed new text for analyzer: Determine the primary emotion expressed by the author of the user_message.

When determining the emotion, focus on the speaker's perspective and what they are feeling, rather than solely on the actions described or the emotions of others mentioned in the message.

Pay close attention to the overall tone and context of the message.

Specific guidance for common distinctions:
- **Sadness vs. Anger:** If the message describes feelings of sorrow, grief, loss, disappointment, or humiliation, it is likely `sadness`. If the message expresses frustration, resentment, hostility, indignation, or describes aggressive actions or strong negative sentiment towards another person's actions, it is more likely `anger`.
- **Inferring Joy:** Joy can be expressed directly, but it can also be inferred from a sense of relief, vindication, contentment, or the absence of negative obligations or states (e.g., the r

Average Metric: 0.00 / 3 (0.0%): 100%|██████████| 3/3 [00:00<00:00,  4.99it/s]

2025/10/19 09:55:29 INFO dspy.evaluate.evaluate: Average Metric: 0.0 / 3 (0.0%)





2025/10/19 09:55:40 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Proposed new text for analyzer: Determine the predominant emotion expressed in the user message. Your analysis should go beyond surface-level keywords and consider the underlying intent, the broader context of the message, and the speaker's overall sentiment, especially when faced with seemingly contradictory phrases.

Pay particular attention to these nuanced interpretations:
1.  **Distinguishing Anger from Sadness in Tasks:** If the message describes a difficult or disliked task, even if it involves personal discomfort ("tortured"), and the speaker expresses frustration, dislike ("hate to do"), or uses coping mechanisms to endure it, the emotion is likely `anger` (frustration, annoyance) towards the task itself, rather than `sadness` about the situation.
2.  **Identifying Joy in the Face of Past Negativity or Challenges:** When a message contains negative descriptions of a person's past state or actions ("cover yourself

Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:00<00:00,  4.78it/s] 

2025/10/19 09:55:44 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)





2025/10/19 09:55:57 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Proposed new text for analyzer: Determine the predominant emotion expressed in the user message. Your analysis should go beyond surface-level keywords and consider the underlying intent, the broader context of the message, and the speaker's overall sentiment, especially when faced with seemingly contradictory phrases. The goal is to identify the single strongest and most overarching emotion.

Pay particular attention to these nuanced interpretations:

1.  **Distinguishing Anger from Sadness in Tasks:** If the message describes a difficult or disliked task, even if it involves personal discomfort ("tortured"), and the speaker expresses frustration, dislike ("hate to do"), or uses coping mechanisms to endure it, the emotion is likely `anger` (frustration, annoyance) towards the task itself, rather than `sadness` about the situation.

2.  **Identifying Anger in Response to Being Disliked/Hated:** If the message describes a s

Average Metric: 3.00 / 3 (100.0%): 100%|██████████| 3/3 [00:00<00:00,  3.72it/s]

2025/10/19 09:56:01 INFO dspy.evaluate.evaluate: Average Metric: 3.0 / 3 (100.0%)
2025/10/19 09:56:01 INFO dspy.teleprompt.gepa.gepa: Iteration 4: All subsample scores perfect. Skipping.
2025/10/19 09:56:01 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Reflective mutation did not propose a new candidate
GEPA Optimization:  29%|██▉       | 171/580 [00:45<01:59,  3.42rollouts/s]2025/10/19 09:56:01 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Selected program 0 score: 0.46



Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  5.25it/s]

2025/10/19 09:56:02 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)





2025/10/19 09:56:10 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Proposed new text for analyzer: Determine the emotion of the user message.

The goal is to identify the dominant emotion expressed in the provided `user_message`. Your output should be a single emotion label.

When classifying the emotion, it is crucial to consider the full context of the message. Do not rely solely on individual keywords, as their emotional implication can change based on the surrounding text and the overall situation described. Analyze both explicit emotional statements and implicit cues derived from the circumstances or tone.

Pay close attention to the nuanced differences between emotions, particularly for positive sentiments like 'joy' and 'love':
*   **Joy** typically represents feelings of happiness, excitement, enthusiasm, delight, contentment, or optimism, often arising from positive events, achievements, generosity, or a pleasant outlook.
*   **Love** generally indicates deep affection, profound

Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  4.46it/s]

2025/10/19 09:56:16 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)





2025/10/19 09:56:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Proposed new text for analyzer: The task is to determine the single dominant emotion expressed in the `user_message`.

Your response must be a single lowercase word, representing the identified emotion.

Refer to the following specific emotion definitions and their nuances when classifying:

*   **joy**: This emotion indicates happiness, delight, cheerfulness, or elation, often stemming from personal success, positive outcomes, or general contentment.
    *   *Example:* "i went to see the entrance examination results at university i rejoyced at my success" -> `joy`
*   **sadness**: This emotion encompasses feelings of unhappiness, sorrow, despair, or being overwhelmed and unable to cope with stressful situations. It can also signify disappointment or a sense of loss.
    *   *Example:* "i just feel extremely stressed because everything is happening so fast i cant manage to get my head around it" -> `sadness` (Note: Feelin

Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:00<00:00,  7.42it/s]

2025/10/19 09:56:26 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)





2025/10/19 09:56:44 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Proposed new text for analyzer: Determine the single predominant emotion expressed in the user message.

Carefully read and analyze the user_message for both explicit and implicit emotional cues.

**Reasoning Process:**
1.  **Look for Direct Emotional Language:** Identify any words or phrases that directly express an emotion (e.g., "discouraged," "grateful," "impatient").
2.  **Infer from Context and Situation:** If direct emotional language is absent or ambiguous, infer the emotion from the described situation, events, the user's reactions, or their internal thoughts and struggles.
3.  **Pay Attention to Underlying Tone:** Consider the overall tone, implication, or subtext of the message. Is it defensive, exasperated, resigned, frustrated, or accepting?

**Specific Guidance on Distinguishing Emotions (especially Sadness vs. Anger):**

*   **Sadness:** Often relates to feelings of loss, grief, despair, helplessness, disap

Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.72it/s]

2025/10/19 09:56:51 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)





2025/10/19 09:57:09 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Proposed new text for analyzer: Determine the single predominant emotion expressed in the user message.

Carefully read and analyze the user_message for both explicit and implicit emotional cues.

**Reasoning Process:**
1.  **Look for Direct Emotional Language:** Identify any words or phrases that directly express an emotion (e.g., "discouraged," "grateful," "impatient").
2.  **Infer from Context and Situation:** If direct emotional language is absent or ambiguous, infer the emotion from the described situation, events, the user's reactions, or their internal thoughts and struggles.
3.  **Pay Attention to Underlying Tone:** Consider the overall tone, implication, or subtext of the message. Is it defensive, exasperated, resigned, frustrated, or accepting?
4.  **Identify the Predominant Emotion:** While multiple emotions might be present, determine which single emotion is the strongest or most overarching in the message.

*

Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  3.95it/s]

2025/10/19 09:57:13 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)





2025/10/19 09:57:26 INFO dspy.teleprompt.gepa.gepa: Iteration 9: Proposed new text for analyzer: Determine the single predominant emotion expressed in the user message.

Carefully read and analyze the user_message for both explicit and implicit emotional cues.

**Reasoning Process:**
1.  **Identify Explicit Emotional Language:** Look for any words or phrases that directly express an emotion (e.g., "discouraged," "grateful," "impatient," "love," "angry," "sad").
2.  **Infer from Context, Situation, and Tone:** If direct emotional language is absent or ambiguous, infer the emotion from the described situation, events, the user's reactions, internal thoughts, struggles, or the overall tone (e.g., defensive, exasperated, resigned, frustrated, appreciative, warm, defiant).
3.  **Weigh Conflicting Cues (Predominance):** If multiple emotional cues are present, determine which emotion is the most central, overarching, or overpowering. The goal is to identify the *single predominant* emotion. A

Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  4.12it/s]

2025/10/19 09:57:28 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)





2025/10/19 09:57:52 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Proposed new text for analyzer: Determine the predominant emotion expressed in the user message. Your analysis should go beyond surface-level keywords and consider the underlying intent, the broader context of the message, and the speaker's overall sentiment, especially when faced with seemingly contradictory phrases. The goal is to identify the single strongest and most overarching emotion.

Pay particular attention to these nuanced interpretations:

1.  **Distinguishing Anger from Sadness in Tasks:** If the message describes a difficult or disliked task, even if it involves personal discomfort ("tortured"), and the speaker expresses frustration, dislike ("hate to do"), or uses coping mechanisms to endure it, the emotion is likely `anger` (frustration, annoyance) towards the task itself, rather than `sadness` about the situation.

2.  **Identifying Anger in Response to Being Disliked/Hated:** If the message describes a 

Average Metric: 0.00 / 3 (0.0%): 100%|██████████| 3/3 [00:00<00:00,  5.08it/s]

2025/10/19 09:57:56 INFO dspy.evaluate.evaluate: Average Metric: 0.0 / 3 (0.0%)





2025/10/19 09:58:14 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Proposed new text for analyzer: Determine the predominant emotion expressed in the user message. Your analysis should go beyond surface-level keywords and consider the underlying intent, the broader context of the message, and the speaker's overall sentiment, especially when faced with seemingly contradictory phrases. The goal is to identify the single strongest and most overarching emotion.

Pay particular attention to these nuanced interpretations:

1.  **Distinguishing Anger from Sadness in Tasks/Circumstances:** If the message describes a difficult or disliked task, even if it involves personal discomfort ("tortured"), and the speaker expresses frustration, dislike ("hate to do"), or uses coping mechanisms to endure it, the emotion is likely `anger` (frustration, annoyance) towards the task itself, rather than `sadness` about the situation. This also applies when the speaker expresses **impatience** or **questions a 

Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:00<00:00,  4.76it/s] 

2025/10/19 09:58:18 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)





2025/10/19 09:58:33 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Proposed new text for analyzer: Determine the predominant emotion expressed in the user message. Your analysis should go beyond surface-level keywords and consider the underlying intent, the broader context of the message, and the speaker's overall sentiment, especially when faced with seemingly contradictory phrases.

Pay particular attention to these nuanced interpretations and guidelines:

**Core Nuanced Interpretations (from previous instructions, still valid):**
1.  **Distinguishing Anger from Sadness in Tasks:** If the message describes a difficult or disliked task, even if it involves personal discomfort ("tortured"), and the speaker expresses frustration, dislike ("hate to do"), or uses coping mechanisms to endure it, the emotion is likely `anger` (frustration, annoyance) towards the task itself, rather than `sadness` about the situation. This directive applies specifically when the 'discomfort' is clearly direct

Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00,  4.37it/s] 

2025/10/19 09:58:36 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)





2025/10/19 09:58:46 INFO dspy.teleprompt.gepa.gepa: Iteration 13: Proposed new text for analyzer: Determine the predominant emotion expressed in the user message. Your analysis should go beyond surface-level keywords and consider the underlying intent, the broader context of the message, and the speaker's overall sentiment, especially when faced with seemingly contradictory phrases.

Pay particular attention to these nuanced interpretations and guidelines:

**Core Nuanced Interpretations (from previous instructions, still valid):**
1.  **Distinguishing Anger from Sadness in Tasks:** If the message describes a difficult or disliked task, even if it involves personal discomfort ("tortured"), and the speaker expresses frustration, dislike ("hate to do"), or uses coping mechanisms to endure it, the emotion is likely `anger` (frustration, annoyance) towards the task itself, rather than `sadness` about the situation. This directive applies specifically when the 'discomfort' is clearly direct

## Inspect optimized prompts

In [26]:
for name, pred in optimized_program.named_predictors():
    print("================================")
    print(f"Predictor: {name}")
    print("================================")
    print("Prompt:")
    print(pred.signature.instructions)
    print("*********************************")

Predictor: analyzer
Prompt:
Determine the predominant emotion expressed in the user message. Your analysis should go beyond surface-level keywords and consider the underlying intent, the broader context of the message, and the speaker's overall sentiment, especially when faced with seemingly contradictory phrases.

Pay particular attention to these nuanced interpretations and guidelines:

**Core Nuanced Interpretations (from previous instructions, still valid):**
1.  **Distinguishing Anger from Sadness in Tasks:** If the message describes a difficult or disliked task, even if it involves personal discomfort ("tortured"), and the speaker expresses frustration, dislike ("hate to do"), or uses coping mechanisms to endure it, the emotion is likely `anger` (frustration, annoyance) towards the task itself, rather than `sadness` about the situation. This directive applies specifically when the 'discomfort' is clearly directed at an undesirable *task* or chore.
2.  **Identifying Joy in the Fac

## Evaluate optimized program

In [27]:
evaluate(optimized_program)

Average Metric: 27.00 / 50 (54.0%): 100%|██████████| 50/50 [00:00<00:00, 160.13it/s]

2025/10/19 09:59:29 INFO dspy.evaluate.evaluate: Average Metric: 27 / 50 (54.0%)





Unnamed: 0,user_message,example_emotion,pred_emotion,emotion_metric
0,i only find out that they are looking and feeling complacent just ...,joy,anger,
1,i can feel that my hopes have not been in vain she said,sadness,joy,
2,i know how much work goes into the creation and i feel the author ...,joy,love,
3,i feel shamed that i hoped for one last christmas because i know s...,sadness,sadness,✔️ [True]
4,i was feeling very bah humbugish coming out of this year s thanksg...,joy,anger,
5,i am already feeling so tortured for having to wait for the result...,anger,fear,
6,i realize that i sound a little overdramatic when i say that but i...,sadness,anger,
7,i won t feel like the jolly green giant while clothes shopping,joy,sadness,
8,i am frankly surprised that you consider the minds of the quorum m...,fear,anger,
9,i mean i am happy for others but how can a person feel ok with som...,joy,sadness,


EvaluationResult(score=54.0, results=<list of 50 results>)