In [1]:
from semantic_router import Route
from collections import defaultdict
from semantic_router.encoders import HuggingFaceEncoder
from semantic_router import RouteLayer
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
df_synthetic = pd.read_json("synthetic_intetions.json")

X_syn = df_synthetic[['Id','Message']]
y_syn = df_synthetic['Intention'].to_list()

In [3]:
# Split the dataset with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X_syn, y_syn, test_size=0.1, random_state=0, shuffle=True, stratify=y_syn
)

In [4]:
y_train = [None if i == "None" else i for i in y_train]
y_test = [None if i == "None" else i for i in y_test]

In [5]:
company_information_messages = []
delete_activities_messages = []
activity_search_messages = []
review_user_messages = []
review_actvity_messages = []
make_reservation_messages = []
accept_reservation_messages = []
reject_reservation_messages = []
check_reservations_messages = []
check_reviews_messages = []
check_number_reservations_messages = []

for message, label in zip(X_train["Message"], y_train):
    if label == "company_information":
        company_information_messages.append(message)
    elif label == "delete_activities":
        delete_activities_messages.append(message)
    elif label == "activity_search":
        activity_search_messages.append(message)
    elif label == "review_user":
        review_user_messages.append(message)
    elif label == "review_activity":
        review_actvity_messages.append(message)
    elif label == "make_reservation":
        make_reservation_messages.append(message)
    elif label == "accept_reservation":
        accept_reservation_messages.append(message)
    elif label == "reject_reservation":
        reject_reservation_messages.append(message)
    elif label == "check_reservations":
        check_reservations_messages.append(message)
    elif label == "check_reviews":
        check_reviews_messages.append(message)
    elif label == "check_number_reservations":
        check_number_reservations_messages.append(message)

company_information = Route(
    name="company_information",
    description="The user wants to obtain information about the company or its chatbot and webpage.",
    utterances=company_information_messages,
)
delete_activities = Route(
    name="delete_activities",
    description="The user wants to delete a specific activity.",
    utterances=delete_activities_messages,
)
activity_search = Route(
    name="activity_search",
    description="The user wants to find an activity that matches their interests",
    utterances=activity_search_messages,
)
review_user = Route(
    name="review_user",
    description="The host of the activity wants to leave reviews about a participant of a specific activity",
    utterances=review_user_messages,
)
review_actvity = Route(
    name="review_actvity",
    description="The user wants to review a specific activity they took part in and maybe leave feedback",
    utterances=review_actvity_messages,
)
make_reservation = Route(
    name="make_reservation",
    description="The user wants to make a reservation to a specific activity",
    utterances=make_reservation_messages,
)
accept_reservation = Route(
    name="accept_reservation",
    description="The host of the activity wants to accept the reservation of a specific user to a specific activity",
    utterances=accept_reservation_messages,
)
reject_reservation = Route(
    name="reject_reservation",
    description="The host of the activity wants to reject the reservation of a specific user to a specific activity",
    utterances=reject_reservation_messages,
)
check_reservations = Route(
    name="check_reservations",
    description="The host of the activity wants to check the reservations to a specific activity",
    utterances=check_reservations_messages,
)
check_reviews = Route(
    name="check_reviews",
    description="The host of the activity wants to check the reviews to a specific activity",
    utterances=check_reviews_messages,
)
check_number_reservations = Route(
    name="check_number_reservations",
    description="The host of the activity wants to check the number of reservations to a specific activity",
    utterances=check_number_reservations_messages,
)

In [6]:
routes = [company_information, delete_activities, activity_search, review_user, review_actvity,
            make_reservation, accept_reservation, reject_reservation, check_reservations, check_reviews,
            check_number_reservations]

encoder = HuggingFaceEncoder()

In [7]:
hf_rl = RouteLayer(encoder=encoder, routes=routes)

In [8]:
route_thresholds = hf_rl.get_thresholds()
print("Updated route thresholds:", route_thresholds)
accuracy = hf_rl.evaluate(X=X_test["Message"].to_list(), y=y_test)
print(f"Accuracy: {accuracy*100:.2f}%")

Updated route thresholds: {'company_information': 0.5, 'delete_activities': 0.5, 'activity_search': 0.5, 'review_user': 0.5, 'review_actvity': 0.5, 'make_reservation': 0.5, 'accept_reservation': 0.5, 'reject_reservation': 0.5, 'check_reservations': 0.5, 'check_reviews': 0.5, 'check_number_reservations': 0.5}


Generating embeddings:   0%|          | 0/1 [00:00<?, ?it/s]

Accuracy: 77.05%


In [9]:
# Call the fit method
# DONT RUN, IF RUN IT OVERIDES AND NEEDS TO RESTART THE KERNEL
#hf_rl.fit(X=X_train["Message"].to_list(), y=y_train, max_iter=500)

Generating embeddings:   0%|          | 0/2 [00:00<?, ?it/s]

Training:   0%|          | 0/500 [00:00<?, ?it/s]

In [10]:
#RESULTS OF FIT

#route_thresholds = hf_rl.get_thresholds()
#print("Updated route thresholds:", route_thresholds)
#accuracy = hf_rl.evaluate(X=X_test["Message"].to_list(), y=y_test)
#print(f"Accuracy: {accuracy*100:.2f}%")

# better with no optimization

Updated route thresholds: {'company_information': 0.6767676767676768, 'delete_activities': 0.5454545454545455, 'activity_search': 0.8787878787878789, 'review_user': 0.5151515151515152, 'review_actvity': 0.9223140495867769, 'make_reservation': 0.6867258443016019, 'accept_reservation': 0.5656565656565657, 'reject_reservation': 0.395959595959596, 'check_reservations': 0.6710743801652893, 'check_reviews': 0.33333333333333337, 'check_number_reservations': 0.010101010101010102}


Generating embeddings:   0%|          | 0/1 [00:00<?, ?it/s]

Accuracy: 57.38%


### Train Set

In [43]:
# Convert train data to the required format for the forward method
X_train_messages = X_train["Message"].to_list()

predicted_labels = [hf_rl(mens).name for mens in X_train_messages]

# Initialize counters for correct and incorrect predictions per label
results = defaultdict(lambda: {"correct": 0, "incorrect": 0})

# Calculate per-intention accuracy
for true_label, predicted_label in zip(y_train, predicted_labels):
    if true_label == predicted_label:
        results[true_label]["correct"] += 1
    else:
        results[true_label]["incorrect"] += 1

# Compute total accuracy per label
accuracy_per_label = {
    label: results[label]["correct"] / (results[label]["correct"] + results[label]["incorrect"])
    for label in results
}

results_df = pd.DataFrame([
    {
        "Label": label,
        "Correct": data["correct"],
        "Incorrect": data["incorrect"],
        "Accuracy": accuracy_per_label[label]
    }
    for label, data in results.items()
])

In [45]:
correct_matches = sum(1 for x, y in zip(y_train, predicted_labels) if x == y)
accuracy = correct_matches / len(y_train)

print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.88


In [44]:
display(results_df)

Unnamed: 0,Label,Correct,Incorrect,Accuracy
0,accept_reservation,48,0,1.0
1,make_reservation,41,3,0.931818
2,activity_search,49,0,1.0
3,reject_reservation,49,0,1.0
4,check_reservations,43,3,0.934783
5,company_information,48,0,1.0
6,check_number_reservations,46,0,1.0
7,review_activity,0,51,0.0
8,review_user,40,5,0.888889
9,check_reviews,41,3,0.931818


### Test Set

In [46]:
# Convert test data to the required format for the forward method
X_test_messages = X_test["Message"].to_list()

predicted_labels = [hf_rl(mens).name for mens in X_test_messages]

# Initialize counters for correct and incorrect predictions per label
results = defaultdict(lambda: {"correct": 0, "incorrect": 0})

# Calculate per-intention accuracy
for true_label, predicted_label in zip(y_test, predicted_labels):
    if true_label == predicted_label:
        results[true_label]["correct"] += 1
    else:
        results[true_label]["incorrect"] += 1

# Compute total accuracy per label
accuracy_per_label = {
    label: results[label]["correct"] / (results[label]["correct"] + results[label]["incorrect"])
    for label in results
}

results_df = pd.DataFrame([
    {
        "Label": label,
        "Correct": data["correct"],
        "Incorrect": data["incorrect"],
        "Accuracy": accuracy_per_label[label]
    }
    for label, data in results.items()
])

In [47]:
correct_matches = sum(1 for x, y in zip(y_test, predicted_labels) if x == y)
accuracy = correct_matches / len(y_test)

print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.77


In [48]:
display(results_df)

Unnamed: 0,Label,Correct,Incorrect,Accuracy
0,check_reservations,1,4,0.2
1,company_information,6,0,1.0
2,check_number_reservations,5,0,1.0
3,,3,0,1.0
4,activity_search,5,0,1.0
5,check_reviews,4,1,0.8
6,review_user,5,0,1.0
7,make_reservation,4,1,0.8
8,reject_reservation,6,0,1.0
9,accept_reservation,4,1,0.8


In [11]:
for (index, row), label in zip(X_test.iterrows(), y_test):
    message = row["Message"]
    prediction = hf_rl(message)

    if prediction.name == label:
        continue
    else:
        print(f"Id: {index}")
        print(f"Message: {message}")
        print(f"True Route: {label}, Predicted Route: {prediction.name}")
        print()

Id: 453
Message: Can you tell me who is attending the language exchange?
True Route: check_reservations, Predicted Route: None

Id: 468
Message: Who has a reservation for the escape room challenge?
True Route: check_reservations, Predicted Route: make_reservation

Id: 480
Message: List the reviews for the wine tasting event.
True Route: check_reviews, Predicted Route: review_actvity

Id: 448
Message: Who has a reservation for the live theater performance?
True Route: check_reservations, Predicted Route: check_number_reservations

Id: 455
Message: Who has a reservation for the historical walking tour?
True Route: check_reservations, Predicted Route: check_number_reservations

Id: 164
Message: The salsa dancing class was a blast! Great energy and fun music. A solid 5.
True Route: review_activity, Predicted Route: check_reviews

Id: 287
Message: Can I make a reservation for the cooking class on Thai food?
True Route: make_reservation, Predicted Route: check_reservations

Id: 210
Message: 

# Imports/Definition

In [18]:
from langchain.prompts import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
)
from pydantic import BaseModel, Field


class PromptTemplate(BaseModel):
    """

    Defines templates for system and human messages used in a conversation.

    Attributes:
    ----------
    system_template : str
        Template for the system message in the conversation.

    human_template : str
        Template for the human message in the conversation.

    """

    system_template: str = Field(
        description="Template for the system message in the conversation"
    )
    human_template: str = Field(
        description="Template for the human message in the conversation"
    )


def generate_prompt_templates(
    prompt_template: PromptTemplate, memory: bool
) -> ChatPromptTemplate:
    """Generate a chat prompt template based on given templates and memory
       setting.


    Parameters:
    ----------
    prompt_template:
        An instance of PromptTemplate containing system and human templates.
    memory: bool
    A boolean flag indicating whether to include chat history in the prompt.

    Returns:
    -------
    ChatPromptTemplate
        A configured ChatPromptTemplate with specified message structure.
    """
    # Create prompt template including chat history if memory is enabled
    if memory:
        prompt = ChatPromptTemplate.from_messages(
            [
                SystemMessagePromptTemplate.from_template(
                    prompt_template.system_template
                ),
                MessagesPlaceholder(variable_name="chat_history"),
                HumanMessagePromptTemplate.from_template(
                    prompt_template.human_template
                ),
            ]
        )
    else:
        # Create prompt template without chat history
        prompt = ChatPromptTemplate.from_messages(
            [
                SystemMessagePromptTemplate.from_template(
                    prompt_template.system_template
                ),
                HumanMessagePromptTemplate.from_template(
                    prompt_template.human_template
                ),
            ]
        )

    return prompt


def generate_agent_prompt_template(
    prompt_template: PromptTemplate,
) -> ChatPromptTemplate:
    """Generate a chat prompt template based on given templates and memory
    setting.

    Parameters:
    ----------
        prompt_template: PromptTemplate
            An instance of PromptTemplate containing system and
            human templates.

    Returns:
    -------
        A configured ChatPromptTemplate with specified message structure.
    """
    prompt = ChatPromptTemplate.from_messages(
        [
            SystemMessagePromptTemplate.from_template(prompt_template.system_template),
            MessagesPlaceholder(variable_name="chat_history"),
            HumanMessagePromptTemplate.from_template(prompt_template.human_template),
            MessagesPlaceholder(variable_name="agent_scratchpad"),
        ]
    )

    return prompt

In [20]:
from typing import Literal
from langchain.output_parsers import PydanticOutputParser
from langchain.schema.runnable.base import Runnable
from langchain_openai import ChatOpenAI

class IntentClassification(BaseModel):
    """
    Represents the input model for intent classification os user queries.
    Attributes:
    ----------
        intent : Literal
            The classified intent of the user query, chosen from predefined options.
    """

    intent: Literal["company_information",
                    "delete_activities",
                    "activity_search",
                    "review_user",
                    "review_activity",
                    "make_reservation",
                    "accept_reservation",
                    "reject_reservation",
                    "check_reservations",
                    "check_reviews",
                    "check_number_reservations",
                    "chitchat"] = Field(
        ...,
        description="The classified intent of the user query",
    )


class RouterChain(Runnable):
    """
    A chain for processing user inputs and classifying intents using an LLM.

    Attributes:
    ----------
    llm : ChatOpenAI
        The language model used for natural language processing
        and generating responses.
    prompt : PromptTemplate
        The template used for constructing the system and human prompts for
        the language model.
    output_parser : PydanticOutputParser
        A parser to validate and format the output and return the response
        as a string.
    chain : Runnable
        The chain combining the prompt, language model, and output parser to
        process inputs.

    Methods:
    -------
    __init__(self,
                 llm=ChatOpenAI(),
                 memory = False):
        Initializes the RouterChain with a language model and memory
        settings.

    invoke(self, inputs, config=None, **kwargs):
        Processes the user query, classifies the intent and resturns it in
        a structures way.
    """

    def __init__(self, llm=ChatOpenAI(temperature=0.0, model='gpt-3.5-turbo'),
                 memory=False):
        """
        Initializes the RouterChain with a language model and memory
        settings.

        Parameters:
        ----------
        llm : ChatOpenAI
            The language model used for natural language processing and
            generating responses.
        memory : bool
            Whether or not to use memory for the language model.

        """

        super().__init__()

        self.llm = llm
        prompt_template = PromptTemplate(
            system_template="""
            You are an expert classifier of user intentions for the BeAlive activity
            recommendation platform.
            Your role is to accurately identify the user's 
            intent based on their query and the context provided by the conversation history.
            Analyze the user's query in the conversation history context and classify 
            it into one of the intents: "company_information", "delete_activities", 
            "activity_search", "review_user", "review_activity", "make_reservation",
            "accept_reservation", "reject_reservation", "check_reservations", 
            "check_reviews", "check_number_reservations". 
            You'll use the following detailed descriptions to classify the user's intent:

            1. **company_information:**  
            The user wants to obtain information about the company or its chatbot and webpage.
            Terms like "How", "What" and "Where" or similar may be present.


            2. **delete_activities:**  
           The user wants to delete a specific activity. Generally will include terms like 'delete' or
           'remove' or similar, and must specify an activity name.

            3. **activity_search:**  
            The user wants to find an activity that best matches their interests and the preferences he is asking for.
            Generally, will have a description of what the user is looking for in an activity, additionally it
            may also include specific cities or time periods.

            4. **review_user:**
            The host of the activity wants to leave reviews about a participant of a specific activity.
            It must specify an activity name and a username, and will likely contain a review
            describing user behavior and potentially a 1 to 5 rating.

            5. **review_activity:**
            The user wants to review a specific activity they took part in and maybe leave feedback.
            It must specify an activity name, and will likely contain a review
            describing good or bad elements about that activity and potentially a 1 to 5 rating.

            6. **make_reservation:**
            The user wants to make a reservation for a specific activity. Must contain an activity name
            and may contain terms like "I want to reserve..." or "Book me a spot for..." or similar.

            7. **accept_reservation:**
            The host of the activity wants to accept a reservation from a specific user for a specific activity.
            An activity name and a username must be specified and terms like "I want to accept..." or simply
            "Accept" or similar should be present.

            8. **reject_reservation:**
            The host of the activity wants to reject a reservation from a specific user for a specific activity.
            An activity name and a username must be specified and terms like "I want to reject..." or simply
            "Reject", "decline" or similar should be present.

            9. **check_reservations:**
            The host of the activity wants to check the reservations to a specific activity. Must contain an
            activity name and will specify a request to check reservations or participants of that activity

            10. **check_reviews:**
            The host of the activity wants to check the reviews to a specific activity. Must contain an
            activity name and will specify a request to check reviews or opinions about that activity

            11. **check_number_reservations:**
            The host of the activity wants to check the number of reservations to a specific activity.
            Must contain an activity name and will specify a request to check the number of reservations
            of that activity. Terms like "How full...", "Is activity_name full?" or simply "How many reservations..."
            or "How many spots left in..." or similiar may be present.


            12. **chitchat:**
            The user is simply making small talk, not asking any questions or making any requests relevant
            to the activity recommendation system. The user could be asking about random topics or simply
            rambling about meaningless (in this context) topics. Use it when you feel that any other option is incorrect. 


            **Input:**

            - User Input: {user_input}  
            - Conversation History: {chat_history}

            **Output Format:**

            - Follow the specified output format and use these detailed descriptions:
            {format_instructions}
            """,
            human_template="User Query: {user_input}",
        )

        self.prompt = generate_prompt_templates(prompt_template, memory=memory)

        self.output_parser = PydanticOutputParser(pydantic_object=IntentClassification)
        self.format_instructions = self.output_parser.get_format_instructions()
        self.chain = (self.prompt | self.llm | self.output_parser)

    def invoke(self, inputs, config=None, **kwargs):
        """
        Processes the user query, classifies the intent and returns it in
        a structures way.

        Parameters:
        ----------
        inputs : dict
            A dictionary containing the user's input.
        config : optional
            Configuration settings for the chain.
        **kwargs :
            Additional keyword arguments.

        Returns:
        -------
        IntentClassification
            The classified intent as a structured object.
        """

        return self.chain.invoke(
                {
                    "user_input": inputs["user_input"],
                    "chat_history": inputs["chat_history"],
                    "format_instructions": self.format_instructions,
                }, config
            )

# LLM Router

### Train Set

In [49]:
router_chain = RouterChain(llm = ChatOpenAI(temperature=0.0, model="gpt-4o-mini"))

X_train_messages = X_train["Message"].to_list()

predicted_labels = [router_chain.invoke({"user_input": mens,
                                         "chat_history": []}).intent for mens in X_train_messages]

In [50]:
y_train_llm = ['chitchat' if i is None  else i for i in y_train]

In [51]:
# Initialize counters for correct and incorrect predictions per label
results = defaultdict(lambda: {"correct": 0, "incorrect": 0})

# Calculate per-intention accuracy
for true_label, predicted_label in zip(y_train_llm, predicted_labels):
    if true_label == predicted_label:
        results[true_label]["correct"] += 1
    else:
        results[true_label]["incorrect"] += 1

# Compute total accuracy per label
accuracy_per_label = {
    label: results[label]["correct"] / (results[label]["correct"] + results[label]["incorrect"])
    for label in results
}

results_df = pd.DataFrame([
    {
        "Label": label,
        "Correct": data["correct"],
        "Incorrect": data["incorrect"],
        "Accuracy": accuracy_per_label[label]
    }
    for label, data in results.items()
])

In [52]:
correct_matches = sum(1 for x, y in zip(y_train_llm, predicted_labels) if x == y)
accuracy = correct_matches / len(y_train_llm)

print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.97


In [53]:
display(results_df)

Unnamed: 0,Label,Correct,Incorrect,Accuracy
0,accept_reservation,48,0,1.0
1,make_reservation,44,0,1.0
2,activity_search,49,0,1.0
3,reject_reservation,42,7,0.857143
4,check_reservations,46,0,1.0
5,company_information,45,3,0.9375
6,check_number_reservations,41,5,0.891304
7,review_activity,51,0,1.0
8,review_user,45,0,1.0
9,check_reviews,44,0,1.0


### Test Set

In [54]:
router_chain = RouterChain(llm = ChatOpenAI(temperature=0.0, model="gpt-4o-mini"))

X_test_messages = X_test["Message"].to_list()

predicted_labels = [router_chain.invoke({"user_input": mens,
                                         "chat_history": []}).intent for mens in X_test_messages]

In [55]:
y_test_llm = ['chitchat' if i is None else i for i in y_test]

In [56]:
# Initialize counters for correct and incorrect predictions per label
results = defaultdict(lambda: {"correct": 0, "incorrect": 0})

# Calculate per-intention accuracy
for true_label, predicted_label in zip(y_test_llm, predicted_labels):
    if true_label == predicted_label:
        results[true_label]["correct"] += 1
    else:
        results[true_label]["incorrect"] += 1

# Compute total accuracy per label
accuracy_per_label = {
    label: results[label]["correct"] / (results[label]["correct"] + results[label]["incorrect"])
    for label in results
}

results_df = pd.DataFrame([
    {
        "Label": label,
        "Correct": data["correct"],
        "Incorrect": data["incorrect"],
        "Accuracy": accuracy_per_label[label]
    }
    for label, data in results.items()
])

In [57]:
correct_matches = sum(1 for x, y in zip(y_test_llm, predicted_labels) if x == y)
accuracy = correct_matches / len(y_test_llm)

print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.98


In [58]:
display(results_df)

Unnamed: 0,Label,Correct,Incorrect,Accuracy
0,check_reservations,5,0,1.0
1,company_information,5,1,0.833333
2,check_number_reservations,5,0,1.0
3,chitchat,3,0,1.0
4,activity_search,5,0,1.0
5,check_reviews,5,0,1.0
6,review_user,5,0,1.0
7,make_reservation,5,0,1.0
8,reject_reservation,6,0,1.0
9,accept_reservation,5,0,1.0


In [61]:
for (index, row), label , pred in zip(X_test.iterrows(), y_test_llm, predicted_labels):
    message = row["Message"]

    if pred == label:
        continue
    else:
        print(f"Id: {index}")
        print(f"Message: {message}")
        print(f"True Route: {label}, Predicted Route: {pred}")
        print()

Id: 25
Message: How do I check my reservation status?
True Route: company_information, Predicted Route: check_reservations

