In [1]:
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

Here are some auxiliar functions to save your synthetic data.

In [3]:
from auxiliar import add_messages

In [4]:
PRODUCT_DATABASE = {
    "Computers and Laptops": [
        "TechPro Ultrabook",
        "BlueWave Gaming Laptop",
        "PowerLite Convertible",
        "TechPro Desktop",
        "BlueWave Chromebook",
    ],
    "Smartphones and Accessories": [
        "SmartX ProPhone",
        "MobiTech PowerCase",
        "SmartX MiniPhone",
        "MobiTech Wireless Charger",
        "SmartX EarBuds",
    ],
    "Televisions and Home Theater Systems": [
        "CineView 4K TV",
        "SoundMax Home Theater",
        "CineView 8K TV",
        "SoundMax Soundbar",
        "CineView OLED TV",
    ],
    "Gaming Consoles and Accessories": [
        "GameSphere X",
        "ProGamer Controller",
        "GameSphere Y",
        "ProGamer Racing Wheel",
        "GameSphere VR Headset",
    ],
    "Audio Equipment": [
        "AudioPhonic Noise-Canceling Headphones",
        "WaveSound Bluetooth Speaker",
        "AudioPhonic True Wireless Earbuds",
        "WaveSound Soundbar",
        "AudioPhonic Turntable",
    ],
    "Cameras and Camcorders": [
        "FotoSnap DSLR Camera",
        "ActionCam 4K",
        "FotoSnap Mirrorless Camera",
        "ZoomMaster Camcorder",
        "FotoSnap Instant Camera",
    ],
}

In [5]:
class SyntheticUserMessage(BaseModel):

    message: str = Field(
        ...,
        title="Message",
        description="The user message to generate for the target task intention.",
    )


class ListSyntheticUserMessages(BaseModel):

    messages: list[SyntheticUserMessage] = Field(
        ...,
        title="Messages",
        description="The list of synthetic user messages to generate for the target task intention.",
    )

output_parser = PydanticOutputParser(pydantic_object=ListSyntheticUserMessages)

In [6]:
system_prompt = """
You are tasked with generating synthetic user messages for an e-commerce platform called Cobuy, which specializes in electronics and gadgets.

The user intentions are:
{user_intentions}

Your task is to create {k} distinct messages for the following target task intention:
{target_task_intention}

Specific information about the target task intention:
{target_task_intention_description}

Follow these guidelines:
1. Focus exclusively on the target task intention, ensuring the message is relevant.
2. Each message should be between 5 and 20 words.
3. Avoid including any details or references to other user intentions.
4. Ensure the messages sound natural and typical of user queries for the given intention.
5. Follow the provided format strictly to maintain consistency.

Message format:
{format_instructions}
"""

prompt = PromptTemplate(
    template=system_prompt,
    input_variables=["k", "user_intentions", "target_task_intention" "target_task_intention_description", "format_instructions"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)

In [7]:
llm = ChatOpenAI(temperature=0.0, model="gpt-4o-mini")

user_intentions = ["order_status", "create_order",  "product_information"]
k = 30 # Number of synthetic user messages to generate for each target task intention

file_name = "synthetic_intetions.json"

synthetic_data_chain = prompt | llm | output_parser

# Intention 1 - Order Status

In [8]:
intention = "order_status"

description = "The user wants to know the status of their order, to do so they provide their order number and ask for the current status of the order. \
                They might ask questions related to the delivery date, expected delivery time, or the current location of the order."

response = synthetic_data_chain.invoke({"k": k, "user_intentions": user_intentions, "target_task_intention": intention, "target_task_intention_description": description})

order_status_messages = []
for message in response.messages:
    order_status_messages.append({"Intention":intention, "Message":message.message})

Now you can check and edit your synthetic messages in a json file.

In [9]:
add_messages(order_status_messages, file_name)

# Intention 2 - Create Order 

In [10]:
intention = "create_order"

description = """The user intends to place an order for a product on the Cobuy platform. \
                In this scenario, it is assumed that the user has already browsed and selected a product but has not yet finalized the order. \
                 The user may express this intention in various ways. The user might not specify the exact quantity, in which case a default amount may be inferred, or they might simply refer to ordering the product without specifying the quantity. \
                 Additionally, the user will always refer to the product by its name or by refering to it for example as 'it' or 'this product', as the user has already selected the product. \
                The available products are {products}.              
                """

response = synthetic_data_chain.invoke({"k": k, "user_intentions": user_intentions, "target_task_intention": intention, "target_task_intention_description": description})

create_order_messages = []

for message in response.messages:
    create_order_messages.append({"Intention":intention, "Message":message.message})

In [12]:
add_messages(create_order_messages, file_name)

# Intention 3 - Product Information

In [13]:
intention = "product_information"


description = """The user is interested in obtaining information about a specific product available on the Cobuy platform. \
                This includes asking for information such as the product's features, specifications, price, warranty, brand, model number, and description. \
                The user migth just ask for information about a category of products, in which case the user is interested in general information about the products in that category. \
                The user will always refer to the product by its name or by refering to it as 'it' or 'this product'. \
                The available products are {products}.
                """

response = synthetic_data_chain.invoke({"k": k, "user_intentions": user_intentions, "target_task_intention": intention, "target_task_intention_description": description})

product_information_messages = []

for message in response.messages:
    product_information_messages.append({"Intention":intention, "Message":message.message})

In [14]:
add_messages(product_information_messages, file_name)

# No Intention: None

In [15]:
system_prompt = """
You are tasked with generating synthetic user messages.

The user intentions are:
{user_intentions}

Your task is to create {k} distinct messages completely unrelated to the available user intentions.
These messages should be generic and not related to any specific task or intention.
The user is engaging in casual conversation.
The user might ask general questions, share opinions, or express emotions. 
The user might also ask for totaly none related questions to the platform. 
The user might ask general questions, share opinions, or express emotions.

Follow these guidelines:
1. Focus exclusively on not being related to any of the user intentions.
2. Each message should be between 5 and 20 words.
3. Avoid including any details or references to other user intentions.
4. Ensure the messages sound natural and typical of user queries for the given intention.
5. Follow the provided format strictly to maintain consistency.

Message format:
{format_instructions}
"""

In [16]:
prompt = PromptTemplate(
    template=system_prompt,
    input_variables=["k", "user_intentions"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)

synthetic_data_chain = prompt | llm | output_parser

In [17]:
response = synthetic_data_chain.invoke({"k": (k//3), "user_intentions": user_intentions})

none_related_messages = []

for message in response.messages:
    none_related_messages.append({"Intention":"None", "Message":message.message})

In [18]:
add_messages(none_related_messages, file_name)