stanfordnlp · mikeedjones · Oct 5, 2024 · Oct 5, 2024 · Oct 5, 2024 · Oct 5, 2024
diff --git a/dsp/modules/dummy_lm.py b/dsp/modules/dummy_lm.py
@@ -5,7 +5,7 @@
 
 
 # This testing module was moved in PR #735 to patch Arize Phoenix logging
-class DummyLM(LM):
+class DSPDummyLM(LM):
     """Dummy language model for unit testing purposes."""
 
     def __init__(self, answers: Union[list[str], dict[str, str]], follow_examples: bool = False):
@@ -61,7 +61,7 @@ def basic_request(self, prompt, n=1, **kwargs) -> dict[str, list[dict[str, str]]
                 },
             )
 
-            RED, GREEN, RESET = "\033[91m", "\033[92m", "\033[0m"
+            RED, _, RESET = "\033[91m", "\033[92m", "\033[0m"
             print("=== DummyLM ===")
             print(prompt, end="")
             print(f"{RED}{answer}{RESET}")

diff --git a/dsp/utils/settings.py b/dsp/utils/settings.py
@@ -1,8 +1,30 @@
 import threading
+from copy import deepcopy
 from contextlib import contextmanager
 
 from dsp.utils.utils import dotdict
 
+DEFAULT_CONFIG = dotdict(
+    lm=None,
+    adapter=None,
+    rm=None,
+    branch_idx=0,
+    reranker=None,
+    compiled_lm=None,
+    force_reuse_cached_compilation=False,
+    compiling=False,
+    skip_logprobs=False,
+    trace=[],
+    release=0,
+    bypass_assert=False,
+    bypass_suggest=False,
+    assert_failures=0,
+    suggest_failures=0,
+    langchain_history=[],
+    experimental=False,
+    backoff_time=10,
+)
+
 
 class Settings:
     """DSP configuration settings."""
@@ -25,27 +47,9 @@ def __new__(cls):
             #  TODO: remove first-class support for re-ranker and potentially combine with RM to form a pipeline of sorts
             #  eg: RetrieveThenRerankPipeline(RetrievalModel, Reranker)
             #  downstream operations like dsp.retrieve would use configs from the defined pipeline.
-            config = dotdict(
-                lm=None,
-                adapter=None,
-                rm=None,
-                branch_idx=0,
-                reranker=None,
-                compiled_lm=None,
-                force_reuse_cached_compilation=False,
-                compiling=False,  # TODO: can probably be removed
-                skip_logprobs=False,
-                trace=[],
-                release=0,
-                bypass_assert=False,
-                bypass_suggest=False,
-                assert_failures=0,
-                suggest_failures=0,
-                langchain_history=[],
-                experimental=False,
-                backoff_time = 10
-            )
-            cls._instance.__append(config)
+
+            # make a deepcopy of the default config to avoid modifying the default config
+            cls._instance.__append(deepcopy(DEFAULT_CONFIG))
 
         return cls._instance
 

diff --git a/dspy/adapters/chat_adapter.py b/dspy/adapters/chat_adapter.py
@@ -1,14 +1,15 @@
-import re
 import ast
 import json
+import re
 import textwrap
+import pydantic
 
+from typing import get_args, get_origin
 from pydantic import TypeAdapter
-import pydantic
 from .base import Adapter
-from typing import get_origin, get_args
 
-field_header_pattern = re.compile(r'\[\[ ## (\w+) ## \]\]')
+
+field_header_pattern = re.compile(r"\[\[ ## (\w+) ## \]\]")
 
 
 class ChatAdapter(Adapter):
@@ -21,9 +22,11 @@ def format(self, signature, demos, inputs):
         # Extract demos where some of the output_fields are not filled in.
         incomplete_demos = [demo for demo in demos if not all(k in demo for k in signature.fields)]
         complete_demos = [demo for demo in demos if demo not in incomplete_demos]
-        incomplete_demos = [demo for demo in incomplete_demos \
-                            if any(k in demo for k in signature.input_fields) and \
-                                any(k in demo for k in signature.output_fields)]
+        incomplete_demos = [
+            demo
+            for demo in incomplete_demos
+            if any(k in demo for k in signature.input_fields) and any(k in demo for k in signature.output_fields)
+        ]
 
         demos = incomplete_demos + complete_demos
 
@@ -32,44 +35,52 @@ def format(self, signature, demos, inputs):
         for demo in demos:
             messages.append(format_turn(signature, demo, role="user", incomplete=demo in incomplete_demos))
             messages.append(format_turn(signature, demo, role="assistant", incomplete=demo in incomplete_demos))
-        
+
         messages.append(format_turn(signature, inputs, role="user"))
 
         return messages
-    
+
     def parse(self, signature, completion, _parse_values=True):
         sections = [(None, [])]
 
         for line in completion.splitlines():
             match = field_header_pattern.match(line.strip())
-            if match: sections.append((match.group(1), []))
-            else: sections[-1][1].append(line)
+            if match:
+                sections.append((match.group(1), []))
+            else:
+                sections[-1][1].append(line)
 
-        sections = [(k, '\n'.join(v).strip()) for k, v in sections]
+        sections = [(k, "\n".join(v).strip()) for k, v in sections]
 
         fields = {}
         for k, v in sections:
             if (k not in fields) and (k in signature.output_fields):
                 try:
                     fields[k] = parse_value(v, signature.output_fields[k].annotation) if _parse_values else v
                 except Exception as e:
-                    raise ValueError(f"Error parsing field {k}: {e}.\n\n\t\tOn attempting to parse the value\n```\n{v}\n```")
+                    raise ValueError(
+                        f"Error parsing field {k}: {e}.\n\n\t\tOn attempting to parse the value\n```\n{v}\n```"
+                    )
 
         if fields.keys() != signature.output_fields.keys():
             raise ValueError(f"Expected {signature.output_fields.keys()} but got {fields.keys()}")
 
         return fields
 
+
 def format_blob(blob):
-    if '\n' not in blob and "«" not in blob and "»" not in blob: return f"«{blob}»"
+    if "\n" not in blob and "«" not in blob and "»" not in blob:
+        return f"«{blob}»"
 
-    modified_blob = blob.replace('\n', '\n    ')
+    modified_blob = blob.replace("\n", "\n    ")
     return f"«««\n    {modified_blob}\n»»»"
 
 
 def format_list(items):
-    if len(items) == 0: return "N/A"
-    if len(items) == 1: return format_blob(items[0])
+    if len(items) == 0:
+        return "N/A"
+    if len(items) == 1:
+        return format_blob(items[0])
 
     return "\n".join([f"[{idx+1}] {format_blob(txt)}" for idx, txt in enumerate(items)])
 
@@ -89,82 +100,90 @@ def format_fields(fields):
         v = _format_field_value(v)
         output.append(f"[[ ## {k} ## ]]\n{v}")
 
-    return '\n\n'.join(output).strip()
-        
+    return "\n\n".join(output).strip()
+
 
 def parse_value(value, annotation):
-    if annotation is str: return str(value)
+    if annotation is str:
+        return str(value)
     parsed_value = value
     if isinstance(value, str):
-        try: parsed_value = json.loads(value)
+        try:
+            parsed_value = json.loads(value)
         except json.JSONDecodeError:
-            try: parsed_value = ast.literal_eval(value)
-            except (ValueError, SyntaxError): parsed_value = value
+            try:
+                parsed_value = ast.literal_eval(value)
+            except (ValueError, SyntaxError):
+                parsed_value = value
     return TypeAdapter(annotation).validate_python(parsed_value)
 
 
-def format_turn(signature, values, role, incomplete=False):       
+def format_turn(signature, values, role, incomplete=False):
     content = []
 
     if role == "user":
         field_names = signature.input_fields.keys()
         if incomplete:
             content.append("This is an example of the task, though some input or output fields are not supplied.")
     else:
-        field_names, values = list(signature.output_fields.keys()) + ['completed'], {**values, 'completed': ''}
+        field_names, values = list(signature.output_fields.keys()) + ["completed"], {**values, "completed": ""}
 
     if not incomplete:
         if not set(values).issuperset(set(field_names)):
             raise ValueError(f"Expected {field_names} but got {values.keys()}")
-    
+
     content.append(format_fields({k: values.get(k, "Not supplied for this particular example.") for k in field_names}))
 
     if role == "user":
-        content.append("Respond with the corresponding output fields, starting with the field " +
-                       ", then ".join(f"`{f}`" for f in signature.output_fields) +
-                       ", and then ending with the marker for `completed`.")
+        content.append(
+            "Respond with the corresponding output fields, starting with the field "
+            + ", then ".join(f"`{f}`" for f in signature.output_fields)
+            + ", and then ending with the marker for `completed`."
+        )
 
-    return {"role": role, "content": '\n\n'.join(content).strip()}
+    return {"role": role, "content": "\n\n".join(content).strip()}
 
 
 def get_annotation_name(annotation):
     origin = get_origin(annotation)
     args = get_args(annotation)
     if origin is None:
-        if hasattr(annotation, '__name__'):
+        if hasattr(annotation, "__name__"):
             return annotation.__name__
         else:
             return str(annotation)
     else:
-        args_str = ', '.join(get_annotation_name(arg) for arg in args)
-        return f"{origin.__name__}[{args_str}]"
+        args_str = ", ".join(get_annotation_name(arg) for arg in args)
+        return f"{get_annotation_name(origin)}[{args_str}]"
+
 
 def enumerate_fields(fields):
     parts = []
     for idx, (k, v) in enumerate(fields.items()):
         parts.append(f"{idx+1}. `{k}`")
         parts[-1] += f" ({get_annotation_name(v.annotation)})"
-        parts[-1] += f": {v.json_schema_extra['desc']}" if v.json_schema_extra['desc'] != f'${{{k}}}' else ''
+        parts[-1] += f": {v.json_schema_extra['desc']}" if v.json_schema_extra["desc"] != f"${{{k}}}" else ""
+
+    return "\n".join(parts).strip()
 
-    return '\n'.join(parts).strip()
 
 def prepare_instructions(signature):
     parts = []
     parts.append("Your input fields are:\n" + enumerate_fields(signature.input_fields))
     parts.append("Your output fields are:\n" + enumerate_fields(signature.output_fields))
     parts.append("All interactions will be structured in the following way, with the appropriate values filled in.")
 
-    parts.append(format_fields({f : f"{{{f}}}" for f in signature.input_fields}))
-    parts.append(format_fields({f : f"{{{f}}}" for f in signature.output_fields}))
-    parts.append(format_fields({'completed' : ""}))
+    parts.append(format_fields({f: f"{{{f}}}" for f in signature.input_fields}))
+    parts.append(format_fields({f: f"{{{f}}}" for f in signature.output_fields}))
+    parts.append(format_fields({"completed": ""}))
 
     instructions = textwrap.dedent(signature.instructions)
-    objective = ('\n' + ' ' * 8).join([''] + instructions.splitlines())
+    objective = ("\n" + " " * 8).join([""] + instructions.splitlines())
     parts.append(f"In adhering to this structure, your objective is: {objective}")
 
     # parts.append("You will receive some input fields in each interaction. " +
     #              "Respond only with the corresponding output fields, starting with the field " +
     #              ", then ".join(f"`{f}`" for f in signature.output_fields) +
     #              ", and then ending with the marker for `completed`.")
 
-    return '\n\n'.join(parts).strip()
+    return "\n\n".join(parts).strip()
diff --git a/dspy/utils/dummies.py b/dspy/utils/dummies.py
@@ -1,15 +1,18 @@
 import random
 import re
+from collections import defaultdict
 from typing import Union
 
 import numpy as np
 
-from dsp.modules import LM
+from dsp.modules import LM as DSPLM
 from dsp.utils.utils import dotdict
+from dspy.adapters.chat_adapter import field_header_pattern
+from dspy.clients.lm import LM
 
 
-class DummyLM(LM):
-    """Dummy language model for unit testing purposes."""
+class DSPDummyLM(DSPLM):
+    """Dummy language model for unit testing purposes subclassing DSP LM class."""
 
     def __init__(self, answers: Union[list[str], dict[str, str]], follow_examples: bool = False):
         """Initializes the dummy language model.
@@ -64,7 +67,7 @@ def basic_request(self, prompt, n=1, **kwargs) -> dict[str, list[dict[str, str]]
                 },
             )
 
-            RED, GREEN, RESET = "\033[91m", "\033[92m", "\033[0m"
+            RED, _, RESET = "\033[91m", "\033[92m", "\033[0m"
             print("=== DummyLM ===")
             print(prompt, end="")
             print(f"{RED}{answer}{RESET}")
@@ -94,6 +97,61 @@ def get_convo(self, index) -> str:
         return self.history[index]["prompt"] + " " + self.history[index]["response"]["choices"][0]["text"]
 
 
+class DummyLM(LM):
+    def __init__(self, answers: Union[list[str], dict[str, str]], follow_examples: bool = False):
+        super().__init__("dummy", "chat", 0.0, 1000, True)
+        self.answers = answers
+        if isinstance(answers, list):
+            self.answers = iter(answers)
+        self.follow_examples = follow_examples
+
+    def _use_example(self, messages):
+        # find all field names
+        fields = defaultdict(int)
+        for message in messages:
+            if "content" in message:
+                if ma := field_header_pattern.match(message["content"]):
+                    fields[message["content"][ma.start() : ma.end()]] += 1
+        # find the fields which are missing from the final turns
+        max_count = max(fields.values())
+        output_fields = [field for field, count in fields.items() if count != max_count]
+
+        # get the output from the last turn that has the output fields as headers
+        final_input = messages[-1]["content"].split("\n\n")[0]
+        for input, output in zip(reversed(messages[:-1]), reversed(messages)):
+            if any(field in output["content"] for field in output_fields) and final_input in input["content"]:
+                return output["content"]
+
+    def __call__(self, prompt=None, messages=None, **kwargs):
+        # Build the request.
+        outputs = []
+        for _ in range(kwargs.get("n", 1)):
+            messages = messages or [{"role": "user", "content": prompt}]
+            kwargs = {**self.kwargs, **kwargs}
+
+            if self.follow_examples:
+                outputs.append(self._use_example(messages))
+            elif isinstance(self.answers, dict):
+                outputs.append(
+                    next((v for k, v in self.answers.items() if k in messages[-1]["content"]), "No more responses")
+                )
+            else:
+                outputs.append(next(self.answers, "No more responses"))
+
+            # Logging, with removed api key & where `cost` is None on cache hit.
+            kwargs = {k: v for k, v in kwargs.items() if not k.startswith("api_")}
+            entry = dict(prompt=prompt, messages=messages, kwargs=kwargs)
+            entry = dict(**entry, outputs=outputs, usage=0)
+            entry = dict(**entry, cost=0)
+            self.history.append(entry)
+
+        return outputs
+
+    def get_convo(self, index):
+        """Get the prompt + anwer from the ith message."""
+        return self.history[index]["messages"], self.history[index]["outputs"]
+
+
 def dummy_rm(passages=()) -> callable:
     if not passages:
 

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,13 @@
+import pytest
+
+import dspy
+from dsp.utils.settings import DEFAULT_CONFIG
+
+
+@pytest.fixture(autouse=True)
+def clear_settings():
+    """Ensures that the settings are cleared after each test."""
+
+    yield
+
+    dspy.settings.configure(**DEFAULT_CONFIG, inherit_config=False)
diff --git a/tests/dsp_LM/__init__.py b/tests/dsp_LM/__init__.py
diff --git a/tests/dsp_LM/evaluate/__init__.py b/tests/dsp_LM/evaluate/__init__.py