In [2]:
from transformers import pipeline
import time
from utils import examples, intents, entities
from language_model import llm_generate
import ast

LLM Inference accuracy(Performance), time(Latency)

In [33]:
from utils import examples, intents, entities
llm_out = []
for user_input, true_intent, true_entity in zip(examples, intents, entities):
        llm_output = llm_generate(user_input)
        llm_out.append(ast.literal_eval(llm_output))
        print(f"User Input: {user_input}")
        print(f"LLM Output: {llm_output}")
        print(f"True Intent: {true_intent}, True Entities: {true_entity}")
        print("-" * 50) 


User Input: Open YouTube
LLM Output: {"intent": "open_app", "entities": [{"type": "app_name", "value": "YouTube"}]}
True Intent: open_app, True Entities: [{'type': 'app_name', 'value': 'YouTube'}]
--------------------------------------------------
User Input: Search for cooking videos on YouTube
LLM Output: {"intent": "search", "entities": [{"type": "search_query", "value": "cooking videos"}, {"type": "app_name", "value": "YouTube"}]}
True Intent: search, True Entities: [{'type': 'search_query', 'value': 'cooking videos'}, {'type': 'app_name', 'value': 'YouTube'}]
--------------------------------------------------
User Input: Turn up the volume
LLM Output: {"intent": "settings", "entities": [{"type": "settings_action", "value": "volume_up"}]}
True Intent: settings, True Entities: [{'type': 'settings_action', 'value': 'volume_up'}]
--------------------------------------------------
User Input: Find action movies
LLM Output: {"intent": "play_media", "entities": [{"type": "content_type", 

In [38]:
# calculate accuracy
pred_intents = [output['intent'] for output in llm_out]
pred_intents = [intent if intent != "play_media" else "search" for intent in pred_intents]
correct = sum(p == t for p, t in zip(pred_intents, intents))
accuracy = correct / len(intents)
print(f"Accuracy: {accuracy:.2%}")

Accuracy: 87.50%


In [39]:
# Calculate Time 
llm_latencies = []
for example in examples[:4]:
    start_time = time.time()
    for _ in range(5):  # Run multiple times for averaging
        llm_generate(example)
    end_time = time.time()
    avg_latency = (end_time - start_time) / 5
    llm_latencies.append(avg_latency)

for example, latency in zip(examples[:4], llm_latencies):
    print(f"Example: '{example}' - Average Latency: {latency:.4f} seconds")

Example: 'Open YouTube' - Average Latency: 43.2046 seconds
Example: 'Search for cooking videos on YouTube' - Average Latency: 41.3157 seconds
Example: 'Turn up the volume' - Average Latency: 36.4100 seconds
Example: 'Find action movies' - Average Latency: 37.0170 seconds


Classifier

In [40]:
classifier = pipeline(
    "zero-shot-classification",
    model="distilbert/distilbert-base-multilingual-cased",  # 134M parameters 
)
labels = ["open_app", "search", "settings", "None"]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


Inference: accuracy(Performance), Time(Latency)

In [41]:
result = classifier(examples, candidate_labels=labels)

pred_intents = []
for true_intent, res in zip(intents, result):
    pred_intents.append(res['labels'][0])
    pred_label = res['labels'][0]
    pred_score = res['scores'][0]

    print(f"True Intent: {true_intent}")
    print(f"Predicted Intent: {pred_label} with score {pred_score:.4f}")
    print()

True Intent: open_app
Predicted Intent: search with score 0.2518

True Intent: search
Predicted Intent: open_app with score 0.2514

True Intent: settings
Predicted Intent: open_app with score 0.2526

True Intent: search
Predicted Intent: None with score 0.2520

True Intent: None
Predicted Intent: open_app with score 0.2515

True Intent: settings
Predicted Intent: open_app with score 0.2521

True Intent: search
Predicted Intent: open_app with score 0.2513

True Intent: open_app
Predicted Intent: open_app with score 0.2509



In [42]:
# Calculate accuracy
correct = sum(t == p for t, p in zip(intents, pred_intents))
accuracy = correct / len(intents)
print(f"Accuracy: {accuracy:.2%}")

Accuracy: 12.50%


In [43]:
# Calculate Time 
cls_latencies = []
for example in examples:
    start_time = time.time()
    for _ in range(5):  # Run multiple times for averaging
        classifier(example, candidate_labels=labels)
    end_time = time.time()
    avg_latency = (end_time - start_time) / 5
    cls_latencies.append(avg_latency)

for example, latency in zip(examples, cls_latencies):
    print(f"Example: '{example}' - Average Latency: {latency:.4f} seconds")


Example: 'Open YouTube' - Average Latency: 0.1311 seconds
Example: 'Search for cooking videos on YouTube' - Average Latency: 0.1606 seconds
Example: 'Turn up the volume' - Average Latency: 0.1462 seconds
Example: 'Find action movies' - Average Latency: 0.1296 seconds
Example: 'Good to see you again!' - Average Latency: 0.1404 seconds
Example: 'Mute the TV' - Average Latency: 0.1472 seconds
Example: 'I want to watch Ronaldo Goals' - Average Latency: 0.1422 seconds
Example: 'Launch Netflix' - Average Latency: 0.1344 seconds


LLM, Classifier speedup 

In [47]:
speedup_factors = [llm / cls for cls, llm in zip(cls_latencies, llm_latencies)]
for speedup in speedup_factors: 
    print(f"Speedup Factor: {speedup:.2f}x")

Speedup Factor: 329.55x
Speedup Factor: 257.26x
Speedup Factor: 249.04x
Speedup Factor: 285.71x


Intent Data

In [6]:
import pandas as pd
eng_df = pd.read_csv("data/english_intent_dataset.csv")
ara_df = pd.read_csv("data/arabic_intents_clean_balanced.csv")

In [7]:
eng_df.head()

Unnamed: 0,text,label
0,launch YouTube and perform a search for cat vi...,open_app_and_search
1,adjust sharpness,settings
2,search travel documentaries,search
3,raise the brightness,settings
4,Disney Plus service go and search animations,open_app_and_search


In [8]:
ara_df.head()

Unnamed: 0,text,label
0,فين ألاقي أسعار السيارات,Search
1,دلني على وصفات طبخ,Search
2,فعل الموقع,Settings
3,أريد معرفة معلومات عن كورسات برمجة,Search
4,اذهب إلى إعدادات الشاشة,Settings


In [9]:
eng_df.label.value_counts()

label
search                 238
settings               231
open_app               224
out_of_scope           220
open_app_and_search    195
Name: count, dtype: int64

In [10]:
ara_df.label.value_counts()

label
Search                 250
Open_app               250
Out_of_scope           250
Open_app_and_search    250
Settings               249
Name: count, dtype: int64

Merge Datasets

In [14]:
df = pd.concat([eng_df, ara_df], ignore_index=True)
df = df.sample(frac=1).reset_index(drop=True)
df.head(20)

Unnamed: 0,text,label
0,مرحبا,Out_of_scope
1,open HBO Max and look for Game of Thrones,open_app_and_search
2,مرحبا,Out_of_scope
3,أريد معرفة معلومات عن أسعار السيارات,Search
4,look for fantasy series,search
5,show me survival shows,search
6,search zombie shows,search
7,أريد معرفة معلومات عن تمارين رياضية,Search
8,افتح تطبيق TikTok,Open_app
9,look for cooking shows,search


In [None]:
df.label.value_counts()

label
Out_of_scope           250
Search                 250
Open_app_and_search    250
Open_app               250
Settings               249
search                 238
settings               231
open_app               224
out_of_scope           220
open_app_and_search    195
Name: count, dtype: int64

In [17]:
def uncapitalize_first_letter(text):
    if isinstance(text, str) and text:
        return text[0].lower() + text[1:]
    return text

df["label"] = df["label"].apply(uncapitalize_first_letter)

In [18]:
df.label.value_counts()

label
search                 488
settings               480
open_app               474
out_of_scope           470
open_app_and_search    445
Name: count, dtype: int64

In [20]:
df.to_csv("data\multilingual_intent_dataset.csv", index=False)

Trained Classifier

In [None]:
model_path = "mohamedgomaaa/intent-classifier-multilingual"
model = pipeline(
    "text-classification",
    model=model_path,
    top_k=None)

In [30]:
import pandas as pd
df = pd.read_csv("data/multilingual_intent_dataset.csv")
texts = df['text'].tolist()
labels = df['label'].tolist()

Evaluate on Train Data

In [31]:
outs = model(texts)
pred_label = [out[0]['label'] for out in outs]
correct = sum(p == t for p, t in zip(pred_label, labels))
accuracy = correct / len(labels)

In [32]:
accuracy

0.9995757318625371

Evaluate on Test Data

In [33]:
df = pd.read_csv("data/intent_test_data.csv")
texts = df['text'].tolist()
labels = df['label'].tolist()

In [34]:
outs = model(texts)
pred_label = [out[0]['label'] for out in outs]
correct = sum(p == t for p, t in zip(pred_label, labels))
accuracy = correct / len(labels)
accuracy

0.92