In [50]:
import langwatch
import dotenv

dotenv.load_dotenv()

True

In [None]:
from datamodel_code_generator import InputFileType, generate
import json
import re
import io
from contextlib import redirect_stdout

json_schema = {
    "type": "enum",
    "enum": ["high", "medium", "low"],
    # "properties": {
    #     "importance": {"type": "number", "title": "Importance"},
    #     "reasoning": {"type": "string", "title": "Reasoning"},
    # },
    # # "required": ["reasoning", "importance"],
    # "title": "NestedAnswerImportance",
}

# Generate code. The result is a string of the Python code.
code_buffer = io.StringIO()

model_name = json_schema.get("title", "Model") # <- field name

# Redirect stdout to the buffer while calling generate()
with redirect_stdout(code_buffer):
    generate(
        json.dumps(json_schema),
        input_file_type=InputFileType.JsonSchema,
        class_name=model_name
    )

# Get the generated code as a string
output = code_buffer.getvalue()
output = re.sub(r"# generated by[\s\S]*?from __future__ import annotations", "", output).strip()
output = re.sub(r"class (.*)?\(BaseModel\):\n    __root__: ", r"\1 = ", output).strip()

print(output)

namespace = {}
exec(output, namespace, namespace)
Model = namespace[model_name]

Model

from enum import Enum


class ModelFoo(Enum):
    high = 'high'
    medium = 'medium'
    low = 'low'


KeyError: 'Model_Foo'

In [70]:
from typing import Any, Type, cast
import dspy
from dspy.signatures.signature import Signature
from dspy.adapters.types.image import try_expand_image_tags

from langwatch_nlp.studio.utils import SerializableWithStringFallback
from pydantic import Field


class TemplateAdapter(dspy.JSONAdapter):
    """
    This is a "TemplateAdapter" DSPy Adapter, that avoid modifying the messages as much as possible,
    and instead uses a {{mustache}} template formating to fill in the inputs on the messages.

    This adapter does not append any text to the system prompt like DSPy normally does and uses json for the outputs
    by default, this matches much better what users expect comming from OpenAI standards, and will allow them to simply
    pick up the same prompts and json schemas and use in any other frameworks as is, since all of them adhere to the
    raw OpenAI way of interating with LLMs.
    """

    def format(
        self,
        signature: Type[Signature],
        demos: list[dict[str, Any]],
        inputs: dict[str, Any],
    ) -> list[dict[str, Any]]:
        inputs_copy = dict(inputs)

        # If the signature and inputs have conversation history, we need to format the conversation history and
        # remove the history field from the signature.
        history_field_name = cast(str, self._get_history_field_name(signature))
        if history_field_name:
            # In order to format the conversation history, we need to remove the history field from the signature.
            signature_without_history = signature.delete(history_field_name)
            conversation_history = self.format_conversation_history(
                signature_without_history,
                history_field_name,
                inputs_copy,
            )

        _messages = getattr(signature, "_messages", Field(default=[])).default

        messages = []
        messages.append(
            {
                "role": "system",
                "content": self._format_template_inputs(
                    signature.instructions, inputs_copy
                ),
            }
        )
        messages.extend(self.format_demos(signature, demos))
        if history_field_name:
            messages.extend(conversation_history)
        messages.extend(
            [
                m | {"content": self._format_template_inputs(m["content"], inputs_copy)}
                for m in _messages
            ]
        )

        messages = try_expand_image_tags(messages)

        return messages

    def _format_template_inputs(
        self, template: str, inputs: dict[str, Any]
    ) -> dict[str, Any]:
        """
        Format the template inputs filling the {{ input }} placeholders.
        """

        class SafeDict(dict):
            def __missing__(self, key):
                return "{{" + key + "}}"

        template_fmt = template.replace("{{", "{").replace("}}", "}")
        str_inputs: dict[str, str] = {}
        for k, v in inputs.items():
            str_inputs[k] = (
                v
                if type(v) == str
                else json.dumps(v, cls=SerializableWithStringFallback)
            )
        return template_fmt.format_map(SafeDict(str_inputs))  # type: ignore

In [74]:
import dspy
from dspy.clients.lm import litellm

with langwatch.trace() as trace:
    trace.autotrack_dspy()

    lm = dspy.LM(model="openai/gpt-4o-mini")
    dspy.configure(adapter=TemplateAdapter())


    class AnswerImportance(dspy.Signature):
        """You are a helpful assistant that answers questions and provides information."""

        _messages = [{
            "role": "user",
            "content": "User is asking: {{question}}"
        }]

        question: str = dspy.InputField(description="The question to answer")
        importance: Model = dspy.OutputField(
            description="The importance of the answer"
        )
        answer: str = dspy.OutputField(description="The answer to the question")

    predict = dspy.Predict(AnswerImportance)
    predict.set_lm(lm)

    prediction = predict(question="what is the capital of France???")

prediction

messages [{'role': 'system', 'content': 'You are a helpful assistant that answers questions and provides information.'}]


Prediction(
    importance=<Model.high: 'high'>,
    answer='I am here to assist you with any questions or information you need. Please let me know how I can help!'
)

In [73]:
litellm.completion(
    model="openai/gpt-4o-mini",
    messages=[
        {
            "role": "system",
            "content": """Your input fields are:
1. `question` (str): The question to answer
Your output fields are:
1. `importance` (Model): The importance of the answer
2. `answer` (str): The answer to the question
All interactions will be structured in the following way, with the appropriate values filled in.

Inputs will have the following structure:

[[ ## question ## ]]
{question}

Outputs will be a JSON object with the following fields.

[[ ## importance ## ]]
{importance}        # note: the value you produce must adhere to the JSON schema: {"type": "string", "title": "Model"}

[[ ## answer ## ]]
{answer}
In adhering to this structure, your objective is:
        Given the fields `question`, produce the fields `importance`, `answer`.""",
        },
        {
            "role": "user",
            "content": """[[ ## question ## ]]
What is the capital of France????????????????

Respond with a JSON object in the following order of fields: `importance` (must be formatted as a valid Python Model), then `answer`.""",
        },
        # {"role": "user", "content": "What is the capital of France?"},
    ],
    response_format={
        "type": "json_schema",
        "json_schema": {
            "name": "DSPyProgramOutputs",
            "strict": True,
            "schema": {
                "additionalProperties": False,
                "$defs": {"Model": {"title": "Model", "type": "string"}},
                "properties": {
                    # "importance": {"title": "Importance", "type": "number"},
                    # "importance": {"$ref": "#/$defs/Model"},
                    "importance": {"title": "Importance", "type": "string", "enum": ["high", "medium", "low"]},
                    "answer": {"title": "Answer", "type": "string"},
                },
                "required": ["importance", "answer"],
                "title": "DSPyProgramOutputs",
                "type": "object",
            },
        },
    },
)

ModelResponse(id='chatcmpl-BT6f6U4scevsik1gXQJqN7Z9IaQoe', created=1746276820, model='gpt-4o-mini-2024-07-18', object='chat.completion', system_fingerprint='fp_0392822090', choices=[Choices(finish_reason='stop', index=0, message=Message(content='{"importance":"high","answer":"The capital of France is Paris."}', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'refusal': None}, annotations=[]))], usage=Usage(completion_tokens=16, prompt_tokens=292, total_tokens=308, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None)), service_tier='default')