## Extract structured information from unstructered language
From a user request we want to extract all the possible info.

#### definitions

In [1]:
from enum import Enum
class SortRecommendationsBy(str, Enum):
    """
    Enum representing sorting criteria for excursions.

    Attributes:
        price (str): Sort by price.
        rating (str): Sort by rating.
        relevance (str): Sort by relevance.
    """

    price = "price"
    rating = "rating"
    relevance = "relevance"


class Sentiment(str, Enum):
    """
    Enum representing different sentiments.

    Attributes:
        positive (str): Positive sentiment.
        negative (str): Negative sentiment.
        neutral (str): Neutral sentiment.
    """

    positive = "positive"
    negative = "negative"
    neutral = "neutral"

PROPERTY_DESCRIPTIONS: dict[str, str] = {
    "is_courtesy_greeting": (
        "Whether the "
        "latest user intent is greetings; e.g. the "
        "user just says hi, thanks or good bye as their latest "
        "message. "
        "Emojis or similar symbols are not part of this."
    ),

    "is_end_conversation": (
        """Whether the user wants to end the """
        """conversation in the latest message. This includes """
        """phrases or expressions indicating a desire to """
        """terminate the interaction, such as "bye", """
        """"goodbye", "that's all", "no more questions", """
        """"that's all for now", or similar statements """
        """suggesting the user has no further inquiries or """
        """needs. Also includes ignoring or giving a """
        """negative response to the assistant question """
    ),

    "is_excursion_search": (
        "Whether the user "
        "wants to search or get some "
        "suggestions or recommendations for an excursion, "
        "tour, experience, activity or trip to do."
    ),

    "is_excursion_followup_question": (
        "Whether "
        "the user wants to ask a follow-up question "
        "about a specific excursion, tour, experience, "
        "activity or trip to do."
        "Hence, It will be a question that ask clarification"
        "about an experience or activity referred "
        "previously in the conversation."
    ),

    "is_booking_cancellation": (
        "Whether the"
        " user wants to cancel an existing booking "
        "or reservation."
    ),

    "is_booking_modification": (
        "Whether the user"
        " wants to modify or change an existing "
        "booking or reservation."
    ),

    "is_faq": (
        "Indicates if the user's question pertains to frequently "
        "asked general topics. It can't be a question "
        "that refer to a specific tour, excursion or activity mentioned"
        "previously in the conversation, in this case is"
        " 'is_excursion_followup_question'."
        "It can't be a question on how to contact or speak with an agent."
        "It can be a question related to how the user"
        "can perform some concrete actions,"
        "including: how to cancel or modify bookings or "
        "reservations, "
        "add participants to existing bookings, modify booking "
        "dates, "
        "group bookings (>10 participants), gift card purchases, "
        "resolving issues post-purchase or during a tour, payment "
        "options, "
        "supplier-related inquiries or collaborations, adding "
        "products to "
        "cart, locating specific products, using a smartphone for "
        "vouchers, "
        "handling cancelled activities/excursions or refused "
        "tickets, "
        "managing lateness for tours, addressing missing vouchers "
        "or tickets, "
        "resolving checkout issues, checking refund status, "
        "understanding "
        "rate differences for modified dates, ensuring payment "
        "security, "
        "understanding the charging process, using promotional "
        "codes, "
        "requesting invoices, confirming pickups, handling "
        "absences or "
        "choosing meeting points, booking flights, hotels, "
        "or car rentals, "
        "booking via telephone, understanding activity or "
        "experience "
        "inclusions, determining the exact duration of an "
        "activity or "
        "experience, transportation services, differences between "
        "open "
        "and dated tickets, selecting entry times, verifying "
        "bookings or "
        "reservations, using vouchers, confirming reservations, "
        "registration requirements, participant limits, or making "
        "any "
        "booking or reservation."
    ),

    "is_requesting_assistance": (
        "Detects if the user is explicitly requesting to speak with or be "
        "connected to a human agent/representative in their most recent "
        "message. "
        "This intent should only be true when the user makes a direct request "
        "for human assistance in their latest message."
    ),

    "has_provided_enough_context": (
        "Detects if the user need is clear enough so the assistant can "
        "respond without human intervention. "
        "If user asks for human assistance without providing the reason "
        "or context, this should be set to False. "
        "This evaluates the cumulative context from all messages, not just "
        "the latest one."
    ),

    # General conversation metadata to extract related to the latest user
    # message.
    "user_need": (
        "A concise, detailed and informative representation of "
        "the latest user's need as expressed throughout the "
        "conversation. This should resemble a search query "
        "suitable for a search engine like Google."
    ),

    "user_sentiment": (
        "The user sentiment at this point in the conversation."
        f" Possible values are: {', '.join(Sentiment.__members__)}"
    ),

    "language": "language of text (must be ISO 639-1 code)",

    "nr_top_excursions_to_retrieve": (
        "Number of top excursions,"
        "tours or activities to look for,"
        " at this point in the conversation."
    ),

    "excursion_location": "City, region or country the user is considering.",

    "geo_distance_from_location": (
        "maximum distance from location"
        " that the user can allow."
    ),

    "sort_by": "Sorting criterion to order the recommended excursions.",

    "nr_people": "Number of people that will participate in the excursion.",

    "excursion_url": (
        "The unique URL for the specific excursion or activity,"
        " that the user is referring to."
    ),

    "max_budget_condition": "The maximum budget the user can afford.",

    "min_budget_condition": (
        "The minimum budget at "
        "least the user wants to spend."
    ),

    "is_looking_for_nat_geo_activity": (
        "Whether the user is "
        "looking for a 'nat geo' activity to do."
    ),

    "is_looking_for_tui_collection_activity": (
        "Whether the user "
        "is looking for a 'tui collection' activity to do."
    ),

    "is_looking_for_sustainable_activity": (
        "Whether the user is looking for"
        " a 'Green and Fair' sustainable activity to do."
    ),

    "is_looking_for_transfer_activity": (
        "Whether the user is looking for a transfer service."
    ),

    "nr_times_asked_more_info": (
        "Number of times the assistant has asked the user for "
        "providing more information about their request."
    ),

    "recommendations": (
        "List of recommended TUI website urls"
    ),

    "fake_urls_removed": (
        "List of fake urls detected and removed"
    ),

    "is_agent_handoff": (
        "Whether the conversation is being handed off to an "
        "agent"
    ),

}


#### pydantic model

In [2]:
from pydantic import BaseModel, Field
from typing import Optional

class Metadata(BaseModel):
    """
    Assistant message metadata model, to be used as part of the response of
    /assistant_response.
    """

    # Boolean mask of all possible user intents.
    is_courtesy_greeting: bool = Field(
        default=False,
        description=PROPERTY_DESCRIPTIONS["is_courtesy_greeting"],
    )
    is_end_conversation: bool = Field(
        default=False,
        description=PROPERTY_DESCRIPTIONS["is_end_conversation"],
    )
    is_excursion_search: bool = Field(
        default=False,
        description=PROPERTY_DESCRIPTIONS["is_excursion_search"],
    )
    is_excursion_followup_question: bool = Field(
        default=False,
        description=PROPERTY_DESCRIPTIONS[
            "is_excursion_followup_question"],
    )
    is_booking_cancellation: bool = Field(
        default=False,
        description=PROPERTY_DESCRIPTIONS["is_booking_cancellation"],
    )
    is_booking_modification: bool = Field(
        default=False,
        description=PROPERTY_DESCRIPTIONS["is_booking_modification"],
    )
    is_faq: bool = Field(
        default=False,
        description=PROPERTY_DESCRIPTIONS["is_faq"],
    )
    is_requesting_assistance: bool = Field(
        default=False,
        description=PROPERTY_DESCRIPTIONS["is_requesting_assistance"],
    )
    has_provided_enough_context: bool = Field(
        default=False,
        description=PROPERTY_DESCRIPTIONS["has_provided_enough_context"],
    )

    # General conversation metadata to extract related to the latest user
    # message.
    user_need: str = Field(
        default="",
        description=PROPERTY_DESCRIPTIONS["user_need"],
    )
    user_sentiment: Sentiment = Field(
        default=Sentiment.neutral,
        description=PROPERTY_DESCRIPTIONS["user_sentiment"],
    )
    language: str = Field(
        default="en",
        description=PROPERTY_DESCRIPTIONS["language"],
    )

    # Additional fields related to search excursion related to the latest
    # user messages.
    nr_top_excursions_to_retrieve: Optional[int] = Field(
        default=3,
        description=PROPERTY_DESCRIPTIONS["nr_top_excursions_to_retrieve"],
    )
    excursion_location: Optional[str] = Field(
        default=None,
        description=PROPERTY_DESCRIPTIONS["excursion_location"],
    )
    geo_distance_from_location: Optional[str] = Field(
        default=None,
        description=PROPERTY_DESCRIPTIONS["geo_distance_from_location"],
    )
    sort_by: Optional[SortRecommendationsBy] = Field(
        default=None,
        description=PROPERTY_DESCRIPTIONS["sort_by"],
    )
    nr_people: Optional[int] = Field(
        default=None,
        description=PROPERTY_DESCRIPTIONS["nr_people"],
    )
    excursion_url: Optional[str] = Field(
        default=None,
        description=PROPERTY_DESCRIPTIONS["excursion_url"],
    )
    max_budget_condition: Optional[float] = Field(
        default=None,
        description=PROPERTY_DESCRIPTIONS["max_budget_condition"],
    )
    min_budget_condition: Optional[float] = Field(
        default=None,
        description=PROPERTY_DESCRIPTIONS["min_budget_condition"],
    )
    is_looking_for_nat_geo_activity: Optional[bool] = Field(
        default=None,
        description=PROPERTY_DESCRIPTIONS[
            "is_looking_for_nat_geo_activity"],
    )
    is_looking_for_tui_collection_activity: Optional[bool] = Field(
        default=None,
        description=PROPERTY_DESCRIPTIONS[
            "is_looking_for_tui_collection_activity"],
    )
    is_looking_for_sustainable_activity: Optional[bool] = Field(
        default=None,
        description=PROPERTY_DESCRIPTIONS[
            "is_looking_for_sustainable_activity"],
    )
    is_looking_for_transfer_activity: Optional[bool] = Field(
        default=None,
        description=PROPERTY_DESCRIPTIONS["is_looking_for_transfer_activity"],
    )
    is_agent_handoff: bool = Field(
        default=False,
        description=PROPERTY_DESCRIPTIONS["is_agent_handoff"],
    )


### extractor with dspy

##### configure the language model

In [10]:
import dspy
#gpt3 = dspy.OpenAI('gpt-3.5-turbo-0125', max_tokens=1000)
claude = dspy.Claude()
dspy.configure(lm=claude)

##### configure the signature and the predictor

In [11]:

from dspy.functional import TypedPredictor, TypedChainOfThought
from dspy import Signature, InputField, OutputField

class MetadataSignature(Signature):
    """ Extract all travel information in the given email """
    user_message: str = InputField()
    metadata: list[Metadata] = OutputField()

predictor_basic = TypedPredictor(MetadataSignature)
predictor_cot = TypedChainOfThought(MetadataSignature)

##### predict

In [12]:
MESSAGE = "can you suggest me something to do in Barcellona for less than 40 euros? It would be nice if it is a sustainable activity"

In [13]:

res = predictor_basic(user_message=MESSAGE)

 		You are using the client Claude, which will be removed in DSPy 2.6.
 		Changing the client is straightforward and will let you use new features (Adapters) that improve the consistency of LM outputs, especially when using chat LMs. 

 		Learn more about the changes and how to migrate at
 		https://github.com/stanfordnlp/dspy/blob/main/examples/migration.ipynb


AttributeError: 'TypeError' object has no attribute 'message'