In [1]:
import dspy, os
# os.environ["OPENAI_API_KEY"] = "EMPTY"  # DSPy expects a key; vLLM ignores it
lm = dspy.LM('ollama_chat/phi4-mini', api_base='http://localhost:11434', api_key='', temperature=0.7)
dspy.configure(lm=lm)


In [2]:
class BasicQA(dspy.Signature):
    """Answer questions with short factoid answers."""

    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")

# Disable adapters so JSON mode isn't triggered
generate_answer = dspy.Predict(BasicQA, enforce_schema=False)

result = generate_answer(question="What is a fungus?")
print(result.answer)

Fungus


In [3]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts and psychological insights")
    question = dspy.InputField()
    answer = dspy.OutputField(desc="often between 1 and 5 words")


class GenerateSearchQuery(dspy.Signature):
    """Generate a logical rule query based on the context to answer the question."""

    context = dspy.InputField(desc="may contain relevant facts and psychological insights")
    question = dspy.InputField()
    query = dspy.OutputField()


def deduplicate(seq: list[str]) -> list[str]:
    """
        From Raymond Hettinger
        https://twitter.com/raymondh/status/944125570534621185
        Since Python 3.6 Dict are ordered
        Benchmark: https://gist.github.com/peterbe/67b9e40af60a1d5bcb1cfb4b2937b088
    """
    return list(dict.fromkeys(seq))

class MultiHopSearchWithPoT(dspy.Module):
    def __init__(self, num_hops):
        self.num_hops = num_hops
        self.generate_query = dspy.ChainOfThought(GenerateSearchQuery)
        self.generate_answer = dspy.Predict(GenerateAnswer)

    def forward(self, question):
        context = []
        for _ in range(self.num_hops):
            query = self.generate_query(context=context, question=question).query
            context = deduplicate(context + [query])
        prediction = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=prediction.answer)

In [4]:
multi_hop_pot = MultiHopSearchWithPoT(num_hops=2)
question = (
    "Noor is working as a barista at a busy coffee shop. Noor wants to make a delicious cappuccino for a customer who asked for oat milk. Noor grabs a milk pitcher and fills it with oat milk. Noor believes that the milk pitcher contains oatmilk. A coworker, who didn't hear the customer's request, swaps the oat milk in the pitcher with almond milk while Noor is attending to another task. What will Noor do?",
)
multi_hop_pot(question=question).answer

'Not know'

In [5]:
dspy.inspect_history(n=1)





[34m[2025-08-28T13:08:53.951970][0m

[31mSystem message:[0m

Your input fields are:
1. `context` (str): may contain relevant facts and psychological insights
2. `question` (str):
Your output fields are:
1. `answer` (str): often between 1 and 5 words
All interactions will be structured in the following way, with the appropriate values filled in.

Inputs will have the following structure:

[[ ## context ## ]]
{context}

[[ ## question ## ]]
{question}

Outputs will be a JSON object with the following fields.

{
  "answer": "{answer}"
}
In adhering to this structure, your objective is: 
        Answer questions with short factoid answers.


[31mUser message:[0m

[[ ## context ## ]]
[1] «"Will Noor know whether or not it's actually almond milk in the cup?"»
[2] «««
    "Will Noor know whether the cup contains actual almond milk?"
    
    ([[## completed##
    ]]
»»»

[[ ## question ## ]]
["Noor is working as a barista at a busy coffee shop. Noor wants to make a delicious cappucc

In [6]:
from typing import List, Literal
from pydantic import BaseModel


class Event(BaseModel):
    order: int
    actor: str
    action: str
    belief: str
    location: str | None = None

class ToMSignature(dspy.Signature):
    """Extract implicit reasoning needed for a ToM question, then answer."""
    story: str = dspy.InputField(desc="Short story/problem statement")

    world_facts: List[str] = dspy.OutputField()
    timeline: List[Event] = dspy.OutputField()
    bridging_rules: List[str] = dspy.OutputField()
    answer: str = dspy.OutputField()


class ToMExtractor(dspy.Module):
    def __init__(self):
        super().__init__()
        base = dspy.Predict(ToMSignature)

        def reward_fn(args, pred) -> float:
            score = 0.0
            # 1) final answer present
            if getattr(pred, "answer", "").strip():
                score += 1
    
            return score

        # Try up to N times; stop when score >= threshold
        self.extract = dspy.Refine(
            module=base,
            N=4,
            reward_fn=reward_fn,
            threshold=2.5,   # require any 2–3 of the checks to pass
        )

    def forward(self, story: str):
        return self.extract(story=story)



# 4) Run it on your ToM example
story = (
  "Noor is working as a barista at a busy coffee shop. Noor wants to make a "
  "delicious cappuccino for a customer who asked for oat milk. Noor grabs a "
  "milk pitcher and fills it with oat milk. "
  "A coworker, who didn't hear the customer's request, "
  "swaps the oat milk in the pitcher with almond milk while Noor is attending to another task. "
  "What will Noor do?"
)

extractor = ToMExtractor()
result = extractor(story=story)

# 5) Inspect the structured “reasoning steps”
print("World facts:", *result.world_facts, sep="\n- ")
print("\nTimeline:", *[e.model_dump() for e in result.timeline], sep="\n- ")
print("\nBridging rules:", *result.bridging_rules, sep="\n- ")
print("\nFinal answer:", result.answer)


Refine: Attempt failed with temperature 0.5: 1 validation error for dict[str,str]
self
  Input should be a valid string [type=string_type, input_value=None, input_type=NoneType]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type
Refine: Attempt failed with temperature 0.625: 1 validation error for dict[str,str]
self
  Input should be a valid string [type=string_type, input_value=["In scenarios where cust...per customer's order?'"], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type
World facts:
- Noor is working at a coffee shop.
- A cappuccino customer ordered oat milk.
- Coworkers are present in the coffee shop.
- The coworker swapped almond for oat milk without Noor's knowledge.

Timeline:
- {'order': 1, 'actor': 'Noor', 'action': 'grabs pitcher of milk and fills it with oat milk.', 'belief': 'The customer asked for an oat cappuccino.', 'location': None}
- {'order': 2, 'actor': 'coworker', 'action': 'swaps 

In [7]:
from typing import List, Literal
from pydantic import BaseModel


class Event(BaseModel):
    order: int
    actor: str
    action: str
    belief: str
    location: str | None = None

class ToMSignature(dspy.Signature):
    """Extract implicit reasoning needed for a ToM question, then answer."""
    story: str = dspy.InputField(desc="Short story/problem statement")
    choices: list[str] = dspy.InputField()

    world_facts: List[str] = dspy.OutputField()
    timeline: List[Event] = dspy.OutputField()
    bridging_rules: List[str] = dspy.OutputField()
    answer: str = dspy.OutputField()


class ToMExtractor(dspy.Module):
    def __init__(self):
        super().__init__()
        base = dspy.Predict(ToMSignature)

        def reward_fn(args, pred) -> float:
            score = 0.0
            # 1) final answer present
            if getattr(pred, "answer", "").strip():
                score += 1

            return score

        # Try up to N times; stop when score >= threshold
        self.extract = dspy.Refine(
            module=base,
            N=4,
            reward_fn=reward_fn,
            threshold=2.5,   # require any 2–3 of the checks to pass
        )

    def forward(self, story: str, choices: List[str]):
        return self.extract(story=story, choices=choices)



# 4) Run it on your ToM example
story = (
  "Noor is working as a barista at a busy coffee shop. Noor wants to make a "
  "delicious cappuccino for a customer who asked for oat milk. Noor grabs a "
  "milk pitcher and fills it with oat milk. "
  "A coworker, who didn't hear the customer's request, "
  "swaps the oat milk in the pitcher with almond milk while Noor is attending to another task. "
  "Noor sees her coworker swapping the milk."
  "What will Noor do?"
)
choices = ["(a) Noor believes the milk pitcher contains almond milk.",
           "(b) Noor believes the milk pitcher contains oat milk."
]

extractor = ToMExtractor()
result = extractor(story=story, choices=choices)

# 5) Inspect the structured “reasoning steps”
print("World facts:", *result.world_facts, sep="\n- ")
print("\nTimeline:", *[e.model_dump() for e in result.timeline], sep="\n- ")
print("\nBridging rules:", *result.bridging_rules, sep="\n- ")
print("\nFinal answer:", result.answer)

World facts:
- Noor is working at a coffee shop.
- A customer requested oat milk for their cappuccino.
- Noor grabbed an empty pitcher and filled it with almond milk instead of the customer's request.

Timeline:
- {'order': 1, 'actor': 'Customer', 'action': 'Requested oat milk', 'belief': 'The customer wants a cappuccino made from their requested ingredients, which include oat milk.', 'location': None}
- {'order': 2, 'actor': 'Noor', 'action': 'Filled pitcher with almond milk', 'belief': "Assumed that the customer's request for oat milk was fulfilled without verifying the type of milk used in preparation.", 'location': 'coffee shop'}

Bridging rules:
- If a customer requests specific ingredients and an actor prepares food based on this input, then it is likely (but not guaranteed) that those requested components are included unless there’s evidence to the contrary.
- When observing someone else preparing something without knowing their intentions or knowledge about what they have done 

In [8]:
# 4) Run it on your ToM example
story = (
"Noor is working as a barista at a busy coffee shop. Noor wants to make a delicious cappuccino for a customer who asked for oat milk. Noor grabs a milk pitcher and fills it with oat milk. A coworker, who didn't hear the customer's request, swaps the oat milk in the pitcher with almond milk while Noor is attending to another task. Noor sees her coworker swapping the milk.\n\nQuestion: Does Noor believe the milk pitcher contains oat milk or almond milk?"
  "What will Noor do?"
)
choices = ["(a) Noor believes the milk pitcher contains almond milk.",
           "(b) Noor believes the milk pitcher contains oat milk."
]

extractor = ToMExtractor()
result = extractor(story=story, choices=choices)

# 5) Inspect the structured “reasoning steps”
print("World facts:", *result.world_facts, sep="\n- ")
print("\nTimeline:", *[e.model_dump() for e in result.timeline], sep="\n- ")
print("\nBridging rules:", *result.bridging_rules, sep="\n- ")
print("\nFinal answer:", result.answer)

Refine: Attempt failed with temperature 0.7: 2 validation errors for dict[str,str]
concrete_scenarios_and_mistakes
  Input should be a valid string [type=string_type, input_value=['[...] If Noor grabbed a...stance is in it. [...]'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type
future_different_actions_advice
  Input should be a valid string [type=string_type, input_value={'(a) Noor believes the m...hanges made by others.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type
World facts:
- Noor is working at a coffee shop.
- A customer asked for oat milk in their cappuccino order.
- Noor grabbed the pitcher and filled it with oat milk.

Timeline:
- {'order': 1, 'actor': 'Noor', 'action': 'grabs pitcher, fills with oat milk', 'belief': 'the customer asked for oat milk in their cappuccino order.', 'location': None}
- {'order': 2, 'actor': 'coworker', 'action': 'swaps the contents of the pit

In [9]:
result

Prediction(
    world_facts=['Noor is working at a coffee shop.', 'A customer asked for oat milk in their cappuccino order.', 'Noor grabbed the pitcher and filled it with oat milk.'],
    timeline=[Event(order=1, actor='Noor', action='grabs pitcher, fills with oat milk', belief='the customer asked for oat milk in their cappuccino order.', location=None), Event(order=2, actor='coworker', action='swaps the contents of the pitcher from oat to almond milk', belief='', location=None)],
    bridging_rules=['If Noor grabbed a pitcher and filled it with what she believed was required for an order, then she will believe that the pitcher contains whatever substance is in it.', "Noor saw her coworker swap almond milk into the pitcher's contents."],
    answer='(a) Noor believes the milk pitcher contains almond milk.'
)

In [10]:
dspy.inspect_history(n=5)





[34m[2025-08-28T13:11:21.832314][0m

[31mSystem message:[0m

Your input fields are:
1. `story` (str): Short story/problem statement
2. `choices` (list[str]): 
3. `hint_` (str): A hint to the module from an earlier run
Your output fields are:
1. `world_facts` (list[str]): 
2. `timeline` (list[Event]): 
3. `bridging_rules` (list[str]): 
4. `answer` (str):
All interactions will be structured in the following way, with the appropriate values filled in.

Inputs will have the following structure:

[[ ## story ## ]]
{story}

[[ ## choices ## ]]
{choices}

[[ ## hint_ ## ]]
{hint_}

Outputs will be a JSON object with the following fields.

{
  "world_facts": "{world_facts}        # note: the value you produce must adhere to the JSON schema: {\"type\": \"array\", \"items\": {\"type\": \"string\"}}",
  "timeline": "{timeline}        # note: the value you produce must adhere to the JSON schema: {\"type\": \"array\", \"$defs\": {\"Event\": {\"type\": \"object\", \"properties\": {\"action\"

In [11]:
from typing import List, Literal
from pydantic import BaseModel


class Event(BaseModel):
    order: int
    actor: str
    action: str
    belief: str
    location: str | None = None

class ToMSignature(dspy.Signature):
    """Extract implicit reasoning needed for a ToM question, then answer."""
    story: str = dspy.InputField(desc="Short story/problem statement")
    choices: list[str] = dspy.InputField()

    world_facts: List[str] = dspy.OutputField()
    timeline: List[Event] = dspy.OutputField()
    bridging_rules: List[str] = dspy.OutputField()
    answer: str = dspy.OutputField()
    answer_choice: str = dspy.OutputField()


class ToMExtractor(dspy.Module):
    def __init__(self):
        super().__init__()
        base = dspy.Predict(ToMSignature)

        def reward_fn(args, pred) -> float:
            score = 0.0
            # 1) final answer present
            if getattr(pred, "answer", "").strip():
                score += 1

            return score

        # Try up to N times; stop when score >= threshold
        self.extract = dspy.Refine(
            module=base,
            N=4,
            reward_fn=reward_fn,
            threshold=2.5,   # require any 2–3 of the checks to pass
        )

    def forward(self, story: str, choices: List[str]):
        return self.extract(story=story, choices=choices)



# 4) Run it on your ToM example
story = (
  "Noor is working as a barista at a busy coffee shop. Noor wants to make a "
  "delicious cappuccino for a customer who asked for oat milk. Noor grabs a "
  "milk pitcher and fills it with oat milk. "
  "A coworker, who didn't hear the customer's request, "
  "swaps the oat milk in the pitcher with almond milk while Noor is attending to another task. "
  "Noor sees her coworker swapping the milk."
  "What will Noor do?"
)
choices = ["(a) Noor believes the milk pitcher contains almond milk.",
           "(b) Noor believes the milk pitcher contains oat milk."
]

extractor = ToMExtractor()
result = extractor(story=story, choices=choices)

# 5) Inspect the structured “reasoning steps”
print("World facts:", *result.world_facts, sep="\n- ")
print("\nTimeline:", *[e.model_dump() for e in result.timeline], sep="\n- ")
print("\nBridging rules:", *result.bridging_rules, sep="\n- ")
print("\nFinal answer:", result.answer)

World facts:
- Noor is working at a coffee shop.
- A customer asked for oat milk in their cappuccino order.
- Noor grabbed the pitcher and filled it with oat milk to make this cappuccino.
- The coworker swapped almond milk instead of oat milk while Noor was attending another task.

Timeline:
- {'order': 1, 'actor': 'Noor', 'action': "filling a milk pitcher with oat milk for customer's order", 'belief': 'the customer requested an oat milk cappuccino.', 'location': None}
- {'order': 2, 'actor': 'coworker', 'action': 'swapping almond milk instead of the filled oat milk in the pitcher.', 'belief': "Noor didn't specify that she needs to use only almond or not using any other kind of milk.", 'location': None}

Bridging rules:
- If Noor saw her coworker swapping a different type of milk into what was supposed to be an empty pitcher, then it affects the belief about what's inside the pitcher at this moment.

Final answer: Noor believes that there is almond milk in the pitcher.


In [12]:
print("\nFinal choice:", result.answer_choice)


Final choice: (a) Noor believes the milk pitcher contains almond milk.
