In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline

CLASSES_OF_INTEREST = ['translate', 'travel_alert', 'flight_status', 'lost_luggage', 'travel_suggestion', 'carry_on', 'book_hotel', 'book_flight']

# Load pre-trained model (fine-tuned on original CLINC150, i.e in english)
model = AutoModelForSequenceClassification.from_pretrained("dbounds/roberta-large-finetuned-clinc")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("dbounds/roberta-large-finetuned-clinc")

# Load translator (french to english)
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-fr-en")


In [None]:
def lazy_intent_classifier(user_input):
    # Get the translated sentence
    translated_user_input = translator(user_input)[0]['translation_text']
    print(translated_user_input)

    # Tokenize the input
    inputs = tokenizer(translated_user_input, return_tensors="pt")

    # Get the outputs
    outputs = model(**inputs)

    # Get the logits
    logits = outputs.logits

    # Get the index of the intent with the highest probability
    intent_index = logits.argmax().item()

    # Get the intent label from the index
    intent_label = model.config.id2label[intent_index]

    # Map the intent label to the one used in instructions
    if intent_label == 'oos':
        intent_label = 'out_of_scope'

    # Map the intent label to oos if it's not in CLASSES_OF_INTEREST
    if intent_label not in CLASSES_OF_INTEREST:
        intent_label = 'out_of_scope'

    return intent_label

In [None]:
# Load the example set
examples = pd.read_csv('../data/examples.csv')

# Get the user inputs
user_inputs = examples['text'].tolist()

# Get the true labels
true_labels = examples['label'].tolist()

# Get the predicted labels
predicted_labels = [lazy_intent_classifier(user_input) for user_input in user_inputs]

# Create a dataframe with the results
results = pd.DataFrame({'user_input': user_inputs, 'true_label': true_labels, 'predicted_label': predicted_labels})

In [None]:
# Save the results
results.to_csv('../data/examples_naive.csv', index=False)

In [None]:
# Get the number of correct predictions
correct_predictions = results[results['true_label'] == results['predicted_label']].shape[0]

# Get the total number of predictions
total_predictions = results.shape[0]

# Get the accuracy
accuracy = correct_predictions / total_predictions
print(f'Accuracy: {accuracy}')

In [None]:
# Print the model misses
incorrect_predictions = results[results['true_label'] != results['predicted_label']]
for _, row in incorrect_predictions.iterrows():
    print(f'User input: {row["user_input"]}')
    print(f'True label: {row["true_label"]}')
    print(f'Predicted label: {row["predicted_label"]}')
    print('')