## Playing around with Intent Recognition

In [None]:
%pip install python-dotenv pandas openai

In [2]:
from dotenv import load_dotenv
from pathlib import Path
load_dotenv()
import os
import pandas as pd


In [3]:
file_path = "./data/1.0/data/en-US.jsonl"

# Read the JSONL file
df = pd.read_json(file_path, lines=True)

In [None]:
df 

### Get all the intents from the fraction of a dataset

In [None]:
df_sampled = df[df["partition"].str.contains("test")]
print(len(df_sampled))
df_sampled = df_sampled.sample(frac=0.3, random_state=5)
print(len(df_sampled))
intents = set(df_sampled['intent'])
# print(len(intents))
test_set = df_sampled['utt']
print(len(intents))
print(len(test_set))
print(test_set)

print(intents)

In [51]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "identify_intent",
            "description": "Identify the intent of the message",
            "parameters": {
                "type": "object",
                "properties": {
                    "intent": {
                        "type": "string",
                        "description": "The intent of the user message, what is the message about.",
                        "enum": list(intents)
                    },
                },
                "required": ["intent"],
            },
        },
    },
]

In [52]:
from openai import OpenAI
import json

openai = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

# to call chatgpt
def call_gpt(entry: str) -> str:
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0.1,
        tools=tools,
        messages=[
            {
                "role": "system",
                "content": f"You are an intent classification system. Your goal is to identify the intent of the message.",
            },
            {"role": "user", "content": f"{entry}"},
        ],
    )
    response = response.choices[0].message.tool_calls
    if response: 
        try:
            intent = json.loads(response[0].function.arguments)["intent"]
        except Exception as e:
            return "No intent argument found"
    else:
        return "No tool call"
    return intent

In [None]:
from tqdm import tqdm
generated_intents = list()
for entry in tqdm(test_set, 'processing'):
    intent = call_gpt(entry)
    # print(intent)
    generated_intents.append(intent)

print(generated_intents)

In [68]:
df_sampled['generated_intent'] = generated_intents
df_sampled.drop(columns=["genereated_intents"], inplace=True)
df_sampled.to_csv("./data/1.0/data/en-US-labeled.csv")

In [None]:
df_sampled

In [None]:
counter = 0
for i, row in df_sampled.iterrows():
    if row['intent'] == row['generated_intent']:
        counter += 1
    else:
        print(
            f"This is the predicted one - {row['generated_intent']} and it's the actual intent {row['intent']} at index - {i}"
        )

In [74]:
print(counter)
print(f"Ratio - {counter} out of {len(df_sampled)} are correct, accuracy is = {(counter / len(df_sampled)) * 100} %.")

707
Ratio - 707 out of 892 are correct, accuracy is = 79.26008968609865 %.
