Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dsp/modules/dummy_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


# This testing module was moved in PR #735 to patch Arize Phoenix logging
class DummyLM(LM):
class DSPDummyLM(LM):
"""Dummy language model for unit testing purposes."""

def __init__(self, answers: Union[list[str], dict[str, str]], follow_examples: bool = False):
Expand Down Expand Up @@ -61,7 +61,7 @@ def basic_request(self, prompt, n=1, **kwargs) -> dict[str, list[dict[str, str]]
},
)

RED, GREEN, RESET = "\033[91m", "\033[92m", "\033[0m"
RED, _, RESET = "\033[91m", "\033[92m", "\033[0m"
print("=== DummyLM ===")
print(prompt, end="")
print(f"{RED}{answer}{RESET}")
Expand Down
46 changes: 25 additions & 21 deletions dsp/utils/settings.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,30 @@
import threading
from copy import deepcopy
from contextlib import contextmanager

from dsp.utils.utils import dotdict

DEFAULT_CONFIG = dotdict(
lm=None,
adapter=None,
rm=None,
branch_idx=0,
reranker=None,
compiled_lm=None,
force_reuse_cached_compilation=False,
compiling=False,
skip_logprobs=False,
trace=[],
release=0,
bypass_assert=False,
bypass_suggest=False,
assert_failures=0,
suggest_failures=0,
langchain_history=[],
experimental=False,
backoff_time=10,
)


class Settings:
"""DSP configuration settings."""
Expand All @@ -25,27 +47,9 @@ def __new__(cls):
# TODO: remove first-class support for re-ranker and potentially combine with RM to form a pipeline of sorts
# eg: RetrieveThenRerankPipeline(RetrievalModel, Reranker)
# downstream operations like dsp.retrieve would use configs from the defined pipeline.
config = dotdict(
lm=None,
adapter=None,
rm=None,
branch_idx=0,
reranker=None,
compiled_lm=None,
force_reuse_cached_compilation=False,
compiling=False, # TODO: can probably be removed
skip_logprobs=False,
trace=[],
release=0,
bypass_assert=False,
bypass_suggest=False,
assert_failures=0,
suggest_failures=0,
langchain_history=[],
experimental=False,
backoff_time = 10
)
cls._instance.__append(config)

# make a deepcopy of the default config to avoid modifying the default config
cls._instance.__append(deepcopy(DEFAULT_CONFIG))

return cls._instance

Expand Down
99 changes: 59 additions & 40 deletions dspy/adapters/chat_adapter.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import re
import ast
import json
import re
import textwrap
import pydantic

from typing import get_args, get_origin
from pydantic import TypeAdapter
import pydantic
from .base import Adapter
from typing import get_origin, get_args

field_header_pattern = re.compile(r'\[\[ ## (\w+) ## \]\]')

field_header_pattern = re.compile(r"\[\[ ## (\w+) ## \]\]")


class ChatAdapter(Adapter):
Expand All @@ -21,9 +22,11 @@ def format(self, signature, demos, inputs):
# Extract demos where some of the output_fields are not filled in.
incomplete_demos = [demo for demo in demos if not all(k in demo for k in signature.fields)]
complete_demos = [demo for demo in demos if demo not in incomplete_demos]
incomplete_demos = [demo for demo in incomplete_demos \
if any(k in demo for k in signature.input_fields) and \
any(k in demo for k in signature.output_fields)]
incomplete_demos = [
demo
for demo in incomplete_demos
if any(k in demo for k in signature.input_fields) and any(k in demo for k in signature.output_fields)
]

demos = incomplete_demos + complete_demos

Expand All @@ -32,44 +35,52 @@ def format(self, signature, demos, inputs):
for demo in demos:
messages.append(format_turn(signature, demo, role="user", incomplete=demo in incomplete_demos))
messages.append(format_turn(signature, demo, role="assistant", incomplete=demo in incomplete_demos))

messages.append(format_turn(signature, inputs, role="user"))

return messages

def parse(self, signature, completion, _parse_values=True):
sections = [(None, [])]

for line in completion.splitlines():
match = field_header_pattern.match(line.strip())
if match: sections.append((match.group(1), []))
else: sections[-1][1].append(line)
if match:
sections.append((match.group(1), []))
else:
sections[-1][1].append(line)

sections = [(k, '\n'.join(v).strip()) for k, v in sections]
sections = [(k, "\n".join(v).strip()) for k, v in sections]

fields = {}
for k, v in sections:
if (k not in fields) and (k in signature.output_fields):
try:
fields[k] = parse_value(v, signature.output_fields[k].annotation) if _parse_values else v
except Exception as e:
raise ValueError(f"Error parsing field {k}: {e}.\n\n\t\tOn attempting to parse the value\n```\n{v}\n```")
raise ValueError(
f"Error parsing field {k}: {e}.\n\n\t\tOn attempting to parse the value\n```\n{v}\n```"
)

if fields.keys() != signature.output_fields.keys():
raise ValueError(f"Expected {signature.output_fields.keys()} but got {fields.keys()}")

return fields


def format_blob(blob):
if '\n' not in blob and "«" not in blob and "»" not in blob: return f"«{blob}»"
if "\n" not in blob and "«" not in blob and "»" not in blob:
return f"«{blob}»"

modified_blob = blob.replace('\n', '\n ')
modified_blob = blob.replace("\n", "\n ")
return f"«««\n {modified_blob}\n»»»"


def format_list(items):
if len(items) == 0: return "N/A"
if len(items) == 1: return format_blob(items[0])
if len(items) == 0:
return "N/A"
if len(items) == 1:
return format_blob(items[0])

return "\n".join([f"[{idx+1}] {format_blob(txt)}" for idx, txt in enumerate(items)])

Expand All @@ -89,82 +100,90 @@ def format_fields(fields):
v = _format_field_value(v)
output.append(f"[[ ## {k} ## ]]\n{v}")

return '\n\n'.join(output).strip()
return "\n\n".join(output).strip()


def parse_value(value, annotation):
if annotation is str: return str(value)
if annotation is str:
return str(value)
parsed_value = value
if isinstance(value, str):
try: parsed_value = json.loads(value)
try:
parsed_value = json.loads(value)
except json.JSONDecodeError:
try: parsed_value = ast.literal_eval(value)
except (ValueError, SyntaxError): parsed_value = value
try:
parsed_value = ast.literal_eval(value)
except (ValueError, SyntaxError):
parsed_value = value
return TypeAdapter(annotation).validate_python(parsed_value)


def format_turn(signature, values, role, incomplete=False):
def format_turn(signature, values, role, incomplete=False):
content = []

if role == "user":
field_names = signature.input_fields.keys()
if incomplete:
content.append("This is an example of the task, though some input or output fields are not supplied.")
else:
field_names, values = list(signature.output_fields.keys()) + ['completed'], {**values, 'completed': ''}
field_names, values = list(signature.output_fields.keys()) + ["completed"], {**values, "completed": ""}

if not incomplete:
if not set(values).issuperset(set(field_names)):
raise ValueError(f"Expected {field_names} but got {values.keys()}")

content.append(format_fields({k: values.get(k, "Not supplied for this particular example.") for k in field_names}))

if role == "user":
content.append("Respond with the corresponding output fields, starting with the field " +
", then ".join(f"`{f}`" for f in signature.output_fields) +
", and then ending with the marker for `completed`.")
content.append(
"Respond with the corresponding output fields, starting with the field "
+ ", then ".join(f"`{f}`" for f in signature.output_fields)
+ ", and then ending with the marker for `completed`."
)

return {"role": role, "content": '\n\n'.join(content).strip()}
return {"role": role, "content": "\n\n".join(content).strip()}


def get_annotation_name(annotation):
origin = get_origin(annotation)
args = get_args(annotation)
if origin is None:
if hasattr(annotation, '__name__'):
if hasattr(annotation, "__name__"):
return annotation.__name__
else:
return str(annotation)
else:
args_str = ', '.join(get_annotation_name(arg) for arg in args)
return f"{origin.__name__}[{args_str}]"
args_str = ", ".join(get_annotation_name(arg) for arg in args)
return f"{get_annotation_name(origin)}[{args_str}]"
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change to account for the origin of Literal not having a __name__ attribute.



def enumerate_fields(fields):
parts = []
for idx, (k, v) in enumerate(fields.items()):
parts.append(f"{idx+1}. `{k}`")
parts[-1] += f" ({get_annotation_name(v.annotation)})"
parts[-1] += f": {v.json_schema_extra['desc']}" if v.json_schema_extra['desc'] != f'${{{k}}}' else ''
parts[-1] += f": {v.json_schema_extra['desc']}" if v.json_schema_extra["desc"] != f"${{{k}}}" else ""

return "\n".join(parts).strip()

return '\n'.join(parts).strip()

def prepare_instructions(signature):
parts = []
parts.append("Your input fields are:\n" + enumerate_fields(signature.input_fields))
parts.append("Your output fields are:\n" + enumerate_fields(signature.output_fields))
parts.append("All interactions will be structured in the following way, with the appropriate values filled in.")

parts.append(format_fields({f : f"{{{f}}}" for f in signature.input_fields}))
parts.append(format_fields({f : f"{{{f}}}" for f in signature.output_fields}))
parts.append(format_fields({'completed' : ""}))
parts.append(format_fields({f: f"{{{f}}}" for f in signature.input_fields}))
parts.append(format_fields({f: f"{{{f}}}" for f in signature.output_fields}))
parts.append(format_fields({"completed": ""}))

instructions = textwrap.dedent(signature.instructions)
objective = ('\n' + ' ' * 8).join([''] + instructions.splitlines())
objective = ("\n" + " " * 8).join([""] + instructions.splitlines())
parts.append(f"In adhering to this structure, your objective is: {objective}")

# parts.append("You will receive some input fields in each interaction. " +
# "Respond only with the corresponding output fields, starting with the field " +
# ", then ".join(f"`{f}`" for f in signature.output_fields) +
# ", and then ending with the marker for `completed`.")

return '\n\n'.join(parts).strip()
return "\n\n".join(parts).strip()
66 changes: 62 additions & 4 deletions dspy/utils/dummies.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import random
import re
from collections import defaultdict
from typing import Union

import numpy as np

from dsp.modules import LM
from dsp.modules import LM as DSPLM
from dsp.utils.utils import dotdict
from dspy.adapters.chat_adapter import field_header_pattern
from dspy.clients.lm import LM


class DummyLM(LM):
"""Dummy language model for unit testing purposes."""
class DSPDummyLM(DSPLM):
"""Dummy language model for unit testing purposes subclassing DSP LM class."""

def __init__(self, answers: Union[list[str], dict[str, str]], follow_examples: bool = False):
"""Initializes the dummy language model.
Expand Down Expand Up @@ -64,7 +67,7 @@ def basic_request(self, prompt, n=1, **kwargs) -> dict[str, list[dict[str, str]]
},
)

RED, GREEN, RESET = "\033[91m", "\033[92m", "\033[0m"
RED, _, RESET = "\033[91m", "\033[92m", "\033[0m"
print("=== DummyLM ===")
print(prompt, end="")
print(f"{RED}{answer}{RESET}")
Expand Down Expand Up @@ -94,6 +97,61 @@ def get_convo(self, index) -> str:
return self.history[index]["prompt"] + " " + self.history[index]["response"]["choices"][0]["text"]


class DummyLM(LM):
def __init__(self, answers: Union[list[str], dict[str, str]], follow_examples: bool = False):
super().__init__("dummy", "chat", 0.0, 1000, True)
self.answers = answers
if isinstance(answers, list):
self.answers = iter(answers)
self.follow_examples = follow_examples

def _use_example(self, messages):
# find all field names
fields = defaultdict(int)
for message in messages:
if "content" in message:
if ma := field_header_pattern.match(message["content"]):
fields[message["content"][ma.start() : ma.end()]] += 1
# find the fields which are missing from the final turns
max_count = max(fields.values())
output_fields = [field for field, count in fields.items() if count != max_count]

# get the output from the last turn that has the output fields as headers
final_input = messages[-1]["content"].split("\n\n")[0]
for input, output in zip(reversed(messages[:-1]), reversed(messages)):
if any(field in output["content"] for field in output_fields) and final_input in input["content"]:
return output["content"]

def __call__(self, prompt=None, messages=None, **kwargs):
# Build the request.
outputs = []
for _ in range(kwargs.get("n", 1)):
messages = messages or [{"role": "user", "content": prompt}]
kwargs = {**self.kwargs, **kwargs}

if self.follow_examples:
outputs.append(self._use_example(messages))
elif isinstance(self.answers, dict):
outputs.append(
next((v for k, v in self.answers.items() if k in messages[-1]["content"]), "No more responses")
)
else:
outputs.append(next(self.answers, "No more responses"))

# Logging, with removed api key & where `cost` is None on cache hit.
kwargs = {k: v for k, v in kwargs.items() if not k.startswith("api_")}
entry = dict(prompt=prompt, messages=messages, kwargs=kwargs)
entry = dict(**entry, outputs=outputs, usage=0)
entry = dict(**entry, cost=0)
self.history.append(entry)

return outputs

def get_convo(self, index):
"""Get the prompt + anwer from the ith message."""
return self.history[index]["messages"], self.history[index]["outputs"]


def dummy_rm(passages=()) -> callable:
if not passages:

Expand Down
13 changes: 13 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest

import dspy
from dsp.utils.settings import DEFAULT_CONFIG


@pytest.fixture(autouse=True)
def clear_settings():
"""Ensures that the settings are cleared after each test."""

yield

dspy.settings.configure(**DEFAULT_CONFIG, inherit_config=False)
Empty file added tests/dsp_LM/__init__.py
Empty file.
Empty file.
Loading