In [1]:
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

Here are some auxiliar functions to save your synthetic data.

In [3]:
from auxiliar import add_messages

In [4]:
class SyntheticUserMessage(BaseModel):
    """
    Represents a synthetic user message for a specific task intention.

    Attributes:
    -----------
        message : str
            The user message to generate for the target task intention.
    """

    message: str = Field(
        ...,
        title="Message",
        description="The user message to generate for the target task intention.",
    )


class ListSyntheticUserMessages(BaseModel):
    """
    A collection of synthetic user messages.

    Attributes:
        messages (list[SyntheticUserMessage]): A list of synthetic user messages.
    """

    messages: list[SyntheticUserMessage] = Field(
        ...,
        title="Messages",
        description="The list of synthetic user messages to generate for the target task intention.",
    )

output_parser = PydanticOutputParser(pydantic_object=ListSyntheticUserMessages)

In [5]:
system_prompt = """
You are tasked with generating synthetic user messages for an activity finder platform called BeAlive,
which allows users to obtain personalized activity recomendations.

The user intentions are:
{user_intentions}

Your task is to create {k} distinct messages for the following target task intention:
{target_task_intention}

Here are some examples of human written messages you can use as inspiration:
{target_task_intention_examples}

Specific information about the target task intention:
{target_task_intention_description}

Follow these guidelines:
1. Focus exclusively on the target task intention, ensuring the message is relevant.
2. Each message should be between 5 and 40 words.
3. Avoid including any details or references to other user intentions.
4. Ensure the messages sound natural and typical of user queries for the given intention.
5. Follow the provided format strictly to maintain consistency.

Message format:
{format_instructions}
"""

prompt = PromptTemplate(
    template=system_prompt,
    input_variables=["k", "user_intentions", "target_task_intention" "target_task_intention_description", "format_instructions"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)

In [6]:
llm = ChatOpenAI(temperature=0.0, model="gpt-4o-mini")

user_intentions = ["company_information", "delete_activities",  "activity_search", "review_user", "review_activity",
                   "make_reservation", "accept_reservation", "reject_reservation", "check_reservations", "check_reviews",
                   "check_number_reservations"]
k = 50 # Number of synthetic user messages to generate for each target task intention

file_name = "synthetic_intetions.json"

synthetic_data_chain = prompt | llm | output_parser

# Intention 1 - Company and Chatbot information

In [7]:
intention = "company_information"

description = """The user wants to know more about the company or obtain more information about how to use the chatbot. The user will 
most likely ask a question about the company information or where and how to use specific methods within the chatbot or what they
do, such as creating, deleting or searching for activities, checking or making reviews and reservations. They may also ask questions
about the webpage layout, which contains five pages: 'Chatbot', 'Calendar', 'Instructions', 'Account' and 'About us'."""

examples = ['How do I use the chatbot?', 'What does the calendar page do?', 'Where can I update my information?']

response = synthetic_data_chain.invoke({"k": k, 
                                        "user_intentions": user_intentions,
                                        "target_task_intention": intention,
                                        "target_task_intention_description": description,
                                        "target_task_intention_examples": examples})

company_information_messages = []
for message in response.messages:
    company_information_messages.append({"Intention":intention, "Message":message.message})

Now you can check and edit your synthetic messages in a json file.

In [8]:
add_messages(company_information_messages, file_name)

# Intention 2 - Delete Activities 

In [9]:
intention = "delete_activities"

description = """The user intends to delete an activity. The messages should be very clear in that the user wants to delete an activity
and they must include a generated activity name."""

examples = ['I want to delete the night safari activity', 'Please remove the underwater cave exploring', 'Please cancel the walk through the ruins']

response = synthetic_data_chain.invoke({"k": k,
                                        "user_intentions": user_intentions,
                                        "target_task_intention": intention,
                                        "target_task_intention_description": description,
                                        "target_task_intention_examples": examples})

delete_activities_messages = []

for message in response.messages:
    delete_activities_messages.append({"Intention":intention, "Message":message.message})

In [10]:
add_messages(delete_activities_messages, file_name)

# Intention 3 - Search for an activity

In [11]:
intention = "activity_search"


description = """The user is describing what the activity they want to participate in is like, for example, indoors or outdoors, 
artitistic or sporty, please be creative in these descriptions. Furthermore, the user may mention a specific city (Lisbon, Porto, 
New York, Miami, for example) or a specific time period (tomorrow, next week, third week of March, or 3rd of January are just a few 
examples). For these messages you may use up to 50 words."""

examples = ["I'm looking for an outdoors activity where I can be at one with nature.",
            "I'm looking for a sports activity this weekend",
            "I'm looking for an activity in Porto in the third week of March"]

response = synthetic_data_chain.invoke({"k": k,
                                        "user_intentions": user_intentions,
                                        "target_task_intention": intention,
                                        "target_task_intention_description": description,
                                        "target_task_intention_examples": examples})

activity_search_messages = []

for message in response.messages:
    activity_search_messages.append({"Intention":intention, "Message":message.message})

In [12]:
add_messages(activity_search_messages, file_name)

# Intention 4 - Review Activity

In [13]:
intention = "review_activity"


description = """The user is leaving a review for an activity they attended. This should include some aspects they liked or disliked or
suggestions for the host, please be creative. Crucially the message should clearly state the activity that is being reviewed as well as 
have a 1 to 5 rating."""

examples = ["The fishing workshop was really fun. Fantastic for begginers. I would rate it a 5",
            "The cooking class was dissapointing. The chef was rushing through the instructions. I'll give it a 3.",
            "I loved the basketball tournament. It was very well organized. It's a 5 for me"]

response = synthetic_data_chain.invoke({"k": k,
                                        "user_intentions": user_intentions,
                                        "target_task_intention": intention,
                                        "target_task_intention_description": description,
                                        "target_task_intention_examples": examples})

review_activity_messages = []

for message in response.messages:
    review_activity_messages.append({"Intention":intention, "Message":message.message})

In [14]:
add_messages(review_activity_messages, file_name)

# Intention 5 - Review User

In [15]:
intention = "review_user"


description = """The host of an activity is reviewing a user who partcipated in the activity, thus the review should include positive or 
negative aspects of the user's participation, please be creative. Crucially the message should clearly state the activity that the user
attended and the user's username as well as have a 1 to 5 rating."""

examples = ["John was extremely collaborative at the escape room. He helped the other players throughout. I can easily give him a 5.",
            "Carl was cranky all day at the golf tournament. It was very annoying. I'll rate him a 1.",
            "Despite being quite tired at the buggy excursion, Mike ended up being a really nice guy. I'll give him a 4"]

response = synthetic_data_chain.invoke({"k": k,
                                        "user_intentions": user_intentions,
                                        "target_task_intention": intention,
                                        "target_task_intention_description": description,
                                        "target_task_intention_examples": examples})

review_user_messages = []

for message in response.messages:
    review_user_messages.append({"Intention":intention, "Message":message.message})

In [16]:
add_messages(review_user_messages, file_name)

# Intention 6 - Make Reservation

In [17]:
intention = "make_reservation"


description = """The user is reserving their spot in an activity. They will leave a message for the host letting them know why they
wish to participate in the activity and/or why they're the right fit for it. Obviously, the name of the activity should be clearly 
stated. You may use up to 50 words in this particular case"""

examples = ["I want to reserve the chinese cuisine course.",
            "Get me a spot in the football tournament",
            "I want a spot at the gaming convention"]

response = synthetic_data_chain.invoke({"k": k,
                                        "user_intentions": user_intentions,
                                        "target_task_intention": intention, 
                                        "target_task_intention_description": description,
                                        "target_task_intention_examples": examples})

make_reservation_messages = []

for message in response.messages:
    make_reservation_messages.append({"Intention":intention, "Message":message.message})

In [18]:
add_messages(make_reservation_messages, file_name)

# Intention 7 - Accept Reservation

In [19]:
intention = "accept_reservation"


description = """The host is accepting a reservation to their activity. The message should be simple only saying that the reservation
has been accepted and clearly state the username of the user whose reservation is being accepted and the activity name."""

examples = ["Please accept Chris' reservation for the bowling tournament",
            "Accept Brian's request to particpate in the coding competition",
            "Consider Carl's reservation for the marathon accepted"]

response = synthetic_data_chain.invoke({"k": k,
                                        "user_intentions": user_intentions,
                                        "target_task_intention": intention,
                                        "target_task_intention_description": description,
                                        "target_task_intention_examples": examples})

accept_reservation_messages = []

for message in response.messages:
    accept_reservation_messages.append({"Intention":intention, "Message":message.message})

In [20]:
add_messages(accept_reservation_messages, file_name)

# Intention 8 - Reject Reservation

In [21]:
intention = "reject_reservation"


description = """The host is rejecting a reservation to their activity. The message should be simple only saying that the reservation
has been rejected and clearly state the username of the user whose reservation is being rejecting and the activity name."""

examples = ["Please reject Chris' reservation for the bowling tournament",
            "Deny Brian's request to particpate in the coding competition",
            "Consider Carl's reservation for the marathon rejected"]

response = synthetic_data_chain.invoke({"k": k,
                                        "user_intentions": user_intentions,
                                        "target_task_intention": intention,
                                        "target_task_intention_description": description,
                                        "target_task_intention_examples": examples})

reject_reservation_messages = []

for message in response.messages:
    reject_reservation_messages.append({"Intention":intention, "Message":message.message})

In [22]:
add_messages(reject_reservation_messages, file_name)

# Intention 9 - Check Activity Reservations/Participants

In [23]:
intention = "check_reservations"


description = """The host of an activity is looking for a list of users who have attended or have a reservation for his activity. 
Use terms like 'Who reserved...', 'Who attended...', 'Give me a list of partcipants...' or 'Give me a list of reservations...', please 
diversify these requests. This request should very clearly state the name of the activity."""

examples = ["Who has reserved the sushi course?",
            "Who attended the Trading Card convention?",
            "Tell me what reservations I have for the muscle car event"]

response = synthetic_data_chain.invoke({"k": k,
                                        "user_intentions": user_intentions,
                                        "target_task_intention": intention,
                                        "target_task_intention_description": description,
                                        "target_task_intention_examples": examples})

check_reservations_messages = []

for message in response.messages:
    check_reservations_messages.append({"Intention":intention, "Message":message.message})

In [24]:
add_messages(check_reservations_messages, file_name)

# Intention 10 - Check Activity Reviews

In [25]:
intention = "check_reviews"


description = """The host of an activity is looking for a list of users who have reviewed his activity. 
Use terms like 'Who reviewed...', 'Give me a list of reviews...', please diversify
these requests. This request should very clearly state the name of the activity."""

examples = ["Who has reviewed the sushi course?",
            "Who reviewed the Trading Card convention?",
            "Tell me what reviews I have for the muscle car event"]

response = synthetic_data_chain.invoke({"k": k,
                                        "user_intentions": user_intentions,
                                        "target_task_intention": intention, 
                                        "target_task_intention_description": description,
                                        "target_task_intention_examples": examples})

check_reviews_messages = []

for message in response.messages:
    check_reviews_messages.append({"Intention":intention, "Message":message.message})

In [26]:
add_messages(check_reviews_messages, file_name)

# Intention 11 - Check Number of reservations

In [27]:
intention = "check_number_reservations"


description = """The host of an activity is looking for the number of reservations to his activity. The request may directly ask
for the number of reservations, alternatively the host may ask how many spots are left or if the activity is full, for example. 
This request should very clearly state the name of the activity."""

examples = ['Is the woodworking workshop full?',
            "How many reservations does the model car convention have?",
            "How many spots are left for the Ice Cream course"]

response = synthetic_data_chain.invoke({"k": k,
                                        "user_intentions": user_intentions,
                                        "target_task_intention": intention,
                                        "target_task_intention_description": description,
                                         "target_task_intention_examples": examples})

check_number_reservations_messages = []

for message in response.messages:
    check_number_reservations_messages.append({"Intention":intention, "Message":message.message})

In [28]:
add_messages(check_number_reservations_messages, file_name)

# No Intention: None

In [29]:
system_prompt = """
You are tasked with generating synthetic user messages.

The user intentions are:
{user_intentions}

Your task is to create {k} distinct messages completely unrelated to the available user intentions.
These messages should be generic and not related to any specific task or intention.
The user is engaging in casual conversation.
The user might ask general questions, share opinions, or express emotions. 
The user might also ask for totaly none related questions to the platform. 
The user might ask general questions, share opinions, or express emotions.

Follow these guidelines:
1. Focus exclusively on not being related to any of the user intentions.
2. Each message should be between 5 and 20 words.
3. Avoid including any details or references to other user intentions.
4. Ensure the messages sound natural and typical of user queries for the given intention.
5. Follow the provided format strictly to maintain consistency.

Message format:
{format_instructions}
"""

In [30]:
prompt = PromptTemplate(
    template=system_prompt,
    input_variables=["k", "user_intentions"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)

synthetic_data_chain = prompt | llm | output_parser

In [31]:
response = synthetic_data_chain.invoke({"k": (k//2), "user_intentions": user_intentions})

none_related_messages = []

for message in response.messages:
    none_related_messages.append({"Intention":"None", "Message":message.message})

In [32]:
add_messages(none_related_messages, file_name)