In [1]:
from semantic_router import Route
from semantic_router.encoders import OpenAIEncoder, HuggingFaceEncoder
from semantic_router import RouteLayer
import pandas as pd

In [2]:
# Load the dataframes json files
df_synthetic = pd.read_json("synthetic_intetions.json")

X_syn = df_synthetic[['Id','Message']]
y_syn = df_synthetic['Intention'].to_list()

In [3]:
# Load the dataframes json files
df_new = pd.read_json("new_intentions.json")

X_new = df_new[['Id','Message']]
y_new = df_new['Intention'].to_list()

In [4]:
from sklearn.model_selection import train_test_split

# Split the dataset with stratification
X_train, X_test, y_train, y_test = train_test_split(
    X_syn, y_syn, test_size=0.1, random_state=0, stratify=y_syn
)

In [5]:
# Concatenate the train and new dataframes
X_final = pd.concat([X_train, X_new], ignore_index=True)

# Concatenate the train and new labels
y_final = y_train + y_new

In [6]:
# Replace "None" with None
y_final = [None if i == "None" else i for i in y_final]
y_test = [None if i == "None" else i for i in y_test]

In [7]:
order_status_messages = []
create_order_messages = []
product_information_messages = []

for message, label in zip(X_final["Message"], y_final):
    if label == 'order_status':
        order_status_messages.append(message)
    elif label == 'create_order':
        create_order_messages.append(message)
    elif label == 'product_information':
        product_information_messages.append(message)

order_status = Route(
    name="order_status",
    description="The user wants to know the status of their order.",
    utterances=order_status_messages,
)

create_order = Route(
    name="create_order",
    description="The user intends to place an order for a product on the Cobuy platform.",
    utterances=create_order_messages,
)

product_information = Route(
    name="product_information",
    description="The user is interested in obtaining information about a specific product available on the Cobuy platform.",
    utterances=product_information_messages,
)

In [8]:
routes = [order_status, create_order, product_information]
encoder = HuggingFaceEncoder()

In [9]:
hf_rl = RouteLayer(encoder=encoder, routes=routes) #aggregation = "mean", "max" or "sum". #top_k = 5

In [10]:
# Call the fit method
hf_rl.fit(X=X_final["Message"].to_list(), y=y_final, max_iter=500)

Generating embeddings:   0%|          | 0/1 [00:00<?, ?it/s]

Training:   0%|          | 0/500 [00:00<?, ?it/s]

In [11]:
accuracy = hf_rl.evaluate(X=X_test["Message"].to_list(), y=y_test)
print(f"Accuracy: {accuracy*100:.2f}%")

Generating embeddings:   0%|          | 0/1 [00:00<?, ?it/s]

Accuracy: 90.91%


In [12]:
hf_rl.to_json("layer.json")

[32m2024-11-19 13:30:08 INFO semantic_router.utils.logger Saving route config to layer.json[0m
