In [2]:
!pip install transformers



In [4]:
queries_with_categories = [
    {"query": "I need a phone with a really good camera for under $500.", "category": "Electronics"},
    {"query": "What’s the best wireless headset for working from home?", "category": "Electronics"},
    {"query": "Can you suggest a smartwatch that works with iPhones?", "category": "Electronics"},
    {"query": "I’m looking for a TV that has both 4K and Dolby Vision support.", "category": "Electronics"},

    {"query": "I want a set of non-stick pans that last long and are easy to clean.", "category": "Home & Kitchen"},
    {"query": "What’s the best air purifier to help with allergies?", "category": "Home & Kitchen"},
    {"query": "I’m looking for a vacuum cleaner that works well for picking up pet hair.", "category": "Home & Kitchen"},
    {"query": "Can you recommend a fridge that’s energy-efficient and has smart features?", "category": "Home & Kitchen"},

    {"query": "I want a blue dress for summer that’s comfortable.", "category": "Fashion & Apparel"},
    {"query": "Can you help me find waterproof hiking boots for men?", "category": "Fashion & Apparel"},
    {"query": "I’m looking for a warm jacket for winter that isn’t too bulky.", "category": "Fashion & Apparel"},
    {"query": "I need a casual handbag made of leather, but not too expensive.", "category": "Fashion & Apparel"},

    {"query": "Can you recommend organic skincare for sensitive skin?", "category": "Health & Personal Care"},
    {"query": "I need an electric toothbrush that has a really long battery life.", "category": "Health & Personal Care"},
    {"query": "What’s the best hair dryer that works well for frizzy hair?", "category": "Health & Personal Care"},
    {"query": "I’m looking for vegan multivitamins that are good for daily use.", "category": "Health & Personal Care"},

    {"query": "I need a matte lipstick that lasts all day.", "category": "Beauty"},
    {"query": "Can you recommend a good anti-aging serum?", "category": "Beauty"},
    {"query": "I’m looking for a waterproof eyeliner that won’t smudge easily.", "category": "Beauty"},
    {"query": "What’s a good fragrance for summer that’s light and fresh?", "category": "Beauty"},

    {"query": "I want a lightweight tent for camping with my family.", "category": "Sports & Outdoors"},
    {"query": "What’s a good mountain bike for a beginner?", "category": "Sports & Outdoors"},
    {"query": "Can you recommend a yoga mat that’s comfortable for home workouts?", "category": "Sports & Outdoors"},
    {"query": "I need a backpack that’s waterproof and great for hiking.", "category": "Sports & Outdoors"},

    {"query": "I’m looking for an educational toy for my 5-year-old.", "category": "Toys & Games"},
    {"query": "What’s a good board game for playing with the whole family?", "category": "Toys & Games"},
    {"query": "My son loves remote control cars. Any recommendations?", "category": "Toys & Games"},
    {"query": "I need some fun water toys for my kids to play with outside this summer.", "category": "Toys & Games"},

    {"query": "Can you recommend a good thriller with a female lead?", "category": "Books"},
    {"query": "What’s a good book about productivity?", "category": "Books"},
    {"query": "I’m looking for a fantasy series for young adults. Any suggestions?", "category": "Books"},
    {"query": "Can you recommend a picture book with animals for kids?", "category": "Books"},

    {"query": "I need a cordless drill for some DIY projects around the house.", "category": "Tools & Home Improvement"},
    {"query": "What’s a good smart thermostat that will help save on energy?", "category": "Tools & Home Improvement"},
    {"query": "I’m looking for power tools for woodwork. Any recommendations?", "category": "Tools & Home Improvement"},
    {"query": "I want solar-powered lights for my garden.", "category": "Tools & Home Improvement"},

    {"query": "Can you recommend some gluten-free snacks for kids?", "category": "Grocery"},
    {"query": "I need keto-friendly ingredients for baking at home.", "category": "Grocery"},
    {"query": "What’s the best organic coffee for brewing at home?", "category": "Grocery"},
    {"query": "What are some good dairy-free milk alternatives?", "category": "Grocery"}
]

In [5]:
candidate_labels = [
    'Electronics',
    'Home & Kitchen',
    'Fashion & Apparel',
    'Health & Personal Care',
    'Beauty',
    'Sports & Outdoors',
    'Toys & Games',
    'Books',
    'Tools & Home Improvement',
    'Grocery'
]

In [53]:
import timeit
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Function to run the classifier and return results for each query
def run_classification(queries_with_categories, candidate_labels, classifier):
    results_list = []
    total_time = 0  # To store the total execution time for all queries

    for test in queries_with_categories:
        # Start timing for the current query
        start_time = timeit.default_timer()

        # Run the classifier
        results = classifier(test['query'], candidate_labels)

        # End timing for the current query
        end_time = timeit.default_timer()

        # Calculate the time taken for the query
        time_taken = end_time - start_time
        total_time += time_taken  # Accumulate the time for each query

        results_list.append({
            "query": test['query'],
            "original_category": test['category'],
            "predicted_labels": results['labels'],
            "predicted_scores": results['scores'],
            "time_taken": time_taken  # Storing the time taken for each query
        })

        # Print the results and time taken for each query (optional)
        print(f"Query: {test['query']}")
        print(f"Original Category: {test['category']}")
        print(f"Predicted Category: {results['labels']}")
        print(f"Predicted Scores: {results['scores']}")
        print(f"Time taken: {time_taken:.4f} seconds")
        print("-" * 80)
        print()

    # Calculate the average time
    average_time = total_time / len(queries_with_categories)
    print(f"Average Time per Query: {average_time:.4f} seconds")

    return results_list

# Function to evaluate the predictions
def evaluate_predictions(results_list):
    true_labels = []
    predicted_top1 = []
    predicted_top3 = []
    evaluation_metrics = []

    # Gather true labels and top predictions
    for result in results_list:
        true_labels.append(result['original_category'])
        predicted_top1.append(result['predicted_labels'][0])  # Top-1 predicted label
        predicted_top3.append(result['predicted_labels'][:3])  # Top-3 predicted labels

    # Calculate Top-1 Accuracy
    top1_accuracy = accuracy_score(true_labels, predicted_top1)
    evaluation_metrics.append(top1_accuracy)
    print(f"Top-1 Accuracy: {top1_accuracy:.4f}")

    # Calculate Top-3 Accuracy
    top3_correct = sum(1 for true_label, top3_predictions in zip(true_labels, predicted_top3) if true_label in top3_predictions)
    top3_accuracy = top3_correct / len(true_labels)
    evaluation_metrics.append(top3_correct)
    print(f"Top-3 Accuracy: {top3_accuracy:.4f}")

    # Calculate Precision, Recall, and F1-Score
    precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_top1, average='weighted')
    evaluation_metrics.append(precision)
    evaluation_metrics.append(recall)
    evaluation_metrics.append(f1)
    print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

    return evaluation_metrics


# Zero Shot Classification with BART LARGE

In [25]:
from transformers import pipeline
classifier_bart = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli")



In [50]:
# Main function to run classification and evaluation
def main(queries_with_categories, candidate_labels, classifier):
    # Run the classification
    results = run_classification(queries_with_categories, candidate_labels, classifier)

    # Evaluate the predictions
    evaluation_metrics = evaluate_predictions(results)
    return evaluation_metrics

main(queries_with_categories, candidate_labels, classifier_bart)

Query: I need a phone with a really good camera for under $500.
Original Category: Electronics
Predicted Category: ['Electronics', 'Beauty', 'Home & Kitchen', 'Sports & Outdoors', 'Toys & Games', 'Health & Personal Care', 'Books', 'Tools & Home Improvement', 'Fashion & Apparel', 'Grocery']
Predicted Scores: [0.4416484534740448, 0.11992223560810089, 0.10086075961589813, 0.09315022826194763, 0.058817245066165924, 0.04412177577614784, 0.040335334837436676, 0.035431548953056335, 0.0334584042429924, 0.03225397691130638]
Time taken: 10.2930 seconds
--------------------------------------------------------------------------------

Query: What’s the best wireless headset for working from home?
Original Category: Electronics
Predicted Category: ['Home & Kitchen', 'Electronics', 'Beauty', 'Books', 'Sports & Outdoors', 'Health & Personal Care', 'Toys & Games', 'Fashion & Apparel', 'Grocery', 'Tools & Home Improvement']
Predicted Scores: [0.1639995127916336, 0.1562432199716568, 0.13981063663959503,

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[0.6, 33, 0.6421428571428571, 0.6, 0.5751082251082252]

# Zero Shot Classification with Cross-Encoder/Deberta-v3-Base-nli

In [51]:
from transformers import pipeline

classifier_deberta = pipeline("zero-shot-classification", model='cross-encoder/nli-deberta-v3-base')




In [56]:
# Main function to run classification and evaluation
def main(queries_with_categories, candidate_labels, classifier):
    # Run the classification
    results = run_classification(queries_with_categories, candidate_labels, classifier)

    # Evaluate the predictions
    evaluation_metrics = evaluate_predictions(results)
    return evaluation_metrics

# Call the main function
main(queries_with_categories, candidate_labels, classifier_deberta)

Query: I need a phone with a really good camera for under $500.
Original Category: Electronics
Predicted Category: ['Electronics', 'Sports & Outdoors', 'Fashion & Apparel', 'Beauty', 'Home & Kitchen', 'Grocery', 'Health & Personal Care', 'Books', 'Toys & Games', 'Tools & Home Improvement']
Predicted Scores: [0.9707419276237488, 0.008652317337691784, 0.004545948468148708, 0.004464976955205202, 0.0036659655161201954, 0.00303573883138597, 0.0018449216149747372, 0.0011541432468220592, 0.0009503174223937094, 0.0009438643464818597]
Time taken: 3.6638 seconds
--------------------------------------------------------------------------------

Query: What’s the best wireless headset for working from home?
Original Category: Electronics
Predicted Category: ['Electronics', 'Home & Kitchen', 'Sports & Outdoors', 'Tools & Home Improvement', 'Grocery', 'Health & Personal Care', 'Fashion & Apparel', 'Books', 'Beauty', 'Toys & Games']
Predicted Scores: [0.9521678686141968, 0.009584474377334118, 0.006928

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[0.475, 32, 0.43636363636363634, 0.475, 0.4228571428571429]

# Zero Shot Classification with typeform/distilbert-base-uncased-mnli

In [57]:
from transformers import pipeline

classifier_distilbert = pipeline("zero-shot-classification", model='typeform/distilbert-base-uncased-mnli')

The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


In [58]:
# Main function to run classification and evaluation
def main(queries_with_categories, candidate_labels, classifier):
    # Run the classification
    results = run_classification(queries_with_categories, candidate_labels, classifier)

    # Evaluate the predictions
    evaluation_metrics = evaluate_predictions(results)
    return evaluation_metrics

# Call the main function
main(queries_with_categories, candidate_labels, classifier_distilbert)

Query: I need a phone with a really good camera for under $500.
Original Category: Electronics
Predicted Category: ['Electronics', 'Beauty', 'Tools & Home Improvement', 'Health & Personal Care', 'Home & Kitchen', 'Sports & Outdoors', 'Fashion & Apparel', 'Toys & Games', 'Grocery', 'Books']
Predicted Scores: [0.6001147031784058, 0.10776521265506744, 0.05732416734099388, 0.04956219717860222, 0.04530344903469086, 0.04472143203020096, 0.03775296360254288, 0.026158669963479042, 0.01603618636727333, 0.015260972082614899]
Time taken: 0.9095 seconds
--------------------------------------------------------------------------------

Query: What’s the best wireless headset for working from home?
Original Category: Electronics
Predicted Category: ['Electronics', 'Beauty', 'Tools & Home Improvement', 'Health & Personal Care', 'Sports & Outdoors', 'Fashion & Apparel', 'Books', 'Grocery', 'Home & Kitchen', 'Toys & Games']
Predicted Scores: [0.9862163662910461, 0.00411610770970583, 0.003534412011504173

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[0.575, 30, 0.6363636363636365, 0.575, 0.5528571428571429]

# Zero shot classification with MoritzLaurer/mDeBERTa-v3-base-mnli-xnli

In [59]:
from transformers import pipeline

classifier_mdeberta = pipeline("zero-shot-classification", model='MoritzLaurer/mDeBERTa-v3-base-mnli-xnli')

In [60]:
# Main function to run classification and evaluation
def main(queries_with_categories, candidate_labels, classifier):
    # Run the classification
    results = run_classification(queries_with_categories, candidate_labels, classifier)

    # Evaluate the predictions
    evaluation_metrics = evaluate_predictions(results)
    return evaluation_metrics


# Call the main function
main(queries_with_categories, candidate_labels, classifier_mdeberta)

Query: I need a phone with a really good camera for under $500.
Original Category: Electronics
Predicted Category: ['Electronics', 'Health & Personal Care', 'Fashion & Apparel', 'Sports & Outdoors', 'Tools & Home Improvement', 'Beauty', 'Grocery', 'Home & Kitchen', 'Toys & Games', 'Books']
Predicted Scores: [0.5835335850715637, 0.06371399015188217, 0.060272786766290665, 0.05692867189645767, 0.05061699450016022, 0.04326498135924339, 0.04170524701476097, 0.04060068726539612, 0.03484542667865753, 0.024517685174942017]
Time taken: 6.7317 seconds
--------------------------------------------------------------------------------

Query: What’s the best wireless headset for working from home?
Original Category: Electronics
Predicted Category: ['Electronics', 'Tools & Home Improvement', 'Health & Personal Care', 'Fashion & Apparel', 'Home & Kitchen', 'Sports & Outdoors', 'Toys & Games', 'Grocery', 'Books', 'Beauty']
Predicted Scores: [0.3935095965862274, 0.130445197224617, 0.12060186266899109, 0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[0.575, 33, 0.5366666666666667, 0.575, 0.5403174603174603]