In [1]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Define the controlled vocabularies and generated tags
controlled_vocab = {
    'bangor_graduation_21': ['graduation', 'students', 'academic', 'celebration', 'ceremony', 'caps and gowns', 'higher education'],
    'cooking_at_home': ['cooking', 'home', 'couple', 'kitchen', 'domestic', 'food preparation', 'togetherness'],
    'family_cooking_kitchen': ['cooking', 'family', 'kitchen', 'food preparation', 'togetherness', 'home', 'domestic'],
    'girl_with_shopping_bags': ['shopping', 'retail', 'bags', 'consumerism', 'fashion', 'outdoors', 'lifestyle'],
    'london_tube_rush_hour': ['commuting', 'rush hour', 'subway', 'crowded', 'public transport', 'urban', 'daily life'],
    'london_englanduk_covent_gardenmarketshopping_shopper': ['market', 'shopping', 'london', 'outdoor', 'retail', 'daily life', 'urban'],
    'musician_on_street_mixed_race': ['street musician', 'performance', 'outdoor', 'culture', 'art', 'mixed race', 'music'],
    'new_orleans_musicians_luisiana': ['musicians', 'street performance', 'new orleans', 'culture', 'music', 'outdoor', 'louisiana'],
    'office_workers': ['office', 'work', 'computers', 'professional', 'workspace', 'daily life', 'technology'],
    'office_workers_online_meeting': ['meeting', 'office', 'work', 'online', 'professional', 'technology', 'daily life'],
    'kids_in_class': ['classroom', 'kids', 'education', 'learning', 'school', 'students', 'technology'],
    'passengers_reading_books_and_newspapers_in_a_london_underground': ['commuting', 'reading', 'subway', 'books', 'newspapers', 'london', 'public transport'],
    'people_relaxing_outdoors': ['outdoors', 'relaxing', 'campus', 'students', 'socializing', 'university', 'green space'],
    'piccadilly_line_passengers_in_overcrowded_carriage': ['commuting', 'subway', 'overcrowded', 'public transport', 'london', 'daily life', 'urban'],
    'reading_a_book_on_tube': ['reading', 'subway', 'book', 'commuting', 'public transport', 'daily life', 'urban'],
    'school_playground': ['playground', 'kids', 'school', 'outdoor', 'play', 'education', 'recreation'],
    'street_musicians': ['street musicians', 'performance', 'outdoor', 'music', 'art', 'culture', 'daily life'],
    'teacher_in_class': ['classroom', 'teacher', 'education', 'learning', 'school', 'students', 'instruction'],
    'two_girls_shopping': ['shopping', 'retail', 'girls', 'consumerism', 'fashion', 'lifestyle', 'bags'],
    'vegetable_seller_africa': ['market', 'vegetables', 'seller', 'africa', 'outdoor', 'daily life', 'trade'],
    'vegetable_shopper': ['shopping', 'vegetables', 'grocery', 'market', 'produce', 'basket', 'retail', 'daily life'],
}

google_tags = {
    'bangor_graduation_21': ['outerwear', 'smile', 'black', 'mortarboard', 'scholar', 'academic dress', 'graduation', 'gesture', 'sleeve', 'headgear'],
    'cooking_at_home': ['smile', 'cooking', 'food', 'kitchen appliance', 'recipe', 'home appliance', 'tableware', 'kitchen', 'countertop', 'beard'],
    'family_cooking_kitchen': ['food', 'tableware', 'table', 'sharing', 'window', 'recipe', 'mixing bowl', 'happy', 'plate', 'smile'],
    'girl_with_shopping_bags': ['glasses', 'vision care', 'goggles', 'sunglasses', 'fashion', 'gesture', 'eyewear', 'microphone', 'plant', 'beauty'],
    'kids_in_class': ['blue', 'sharing', 'television set', 'social group', 'leisure', 't-shirt', 'television', 'toddler', 'child', 'event'],
    'london_tube_rush_hour': ['train', 'vehicle', 'motor vehicle', 'rolling stock', 'hat', 'mode of transport', 'public transport', 'travel', 'jacket', 'passenger'],
    'london_englanduk_covent_gardenmarketshopping_shopper': ['photograph', 'temple', 'selling', 'shopping', 'leisure', 'market', 'travel', 'city', 'retail', 'fun'],
    'musician_on_street_mixed_race': ['musical instrument', 'shoe', 'musician', 'blue', 'guitar', 'hat', 'window', 'drum', 'string instrument', 'plucked string instruments'],
    'new_orleans_musicians_luisiana': ['musical instrument', 'drum', 'infrastructure', 'musician', 'building', 'folk instrument', 'string instrument', 'music', 'road', 'band plays'],
    'office_workers_online_meeting': ['table', 'computer', 'laptop', 'furniture', 'shirt', 'personal computer', 'chair', 'interior design', 'interaction', 'leisure'],
    'office_workers': ['computer', 'table', 'personal computer', 'furniture', 'computer monitor', 'computer desk', 'desk', 'computer keyboard', 'peripheral', 'desktop computer'],
    'passengers_reading_books_and_newspapers_in_a_london_underground': ['train', 'motor vehicle', 'mode of transport', 'automotive design', 'travel', 'hat', 'public transport', 'passenger', 'metropolitan area', 'event'],
    'people_relaxing_outdoors': ['cloud', 'plant', 'sky', 'window', 'nature', 'building', 'tree', 'grass', 'leisure', 'city'],
    'piccadilly_line_passengers_in_overcrowded_carriage': ['train', 'vehicle', 'mode of transport', 'travel', 'rolling stock', 'jacket', 'public transport', 'passenger', 'air travel', 'hat'],
    'reading_a_book_on_tube': ['train', 'mode of transport', 'travel', 'public transport', 'passenger', 'electric blue', 'motor vehicle', 'belt', 'vehicle door', 'job'],
    'school_playground': ['clothing', 'plant', 'tree', 'woody plant', 'leisure', 'shorts', 'recreation', 'city', 'fun', 'event'],
    'street_musicians': ['musical instrument', 'band plays', 'musician', 'drum', 'orange', 'microphone', 'performing arts', 'music', 'door', 'entertainment'],
    'teacher_in_class': ['table', 'chair', 'desk', 'engineering', 'teacher', 'leisure', 'event', 't-shirt', 'academic institution', 'art'],
    'two_girls_shopping': ['hand', 'outerwear', 'hairstyle', 'smile', 'hat', 'sleeve', 'textile', 'eyewear', 'standing', 'travel'],
    'vegetable_seller_africa': ['food', 'umbrella', 'selling', 'green', 'natural foods', 'sky', 'hawker', 'greengrocer', 'whole food', 'market'],
    'vegetable_shopper': ['food', 'selling', 'green', 'product', 'natural foods', 'greengrocer', 'whole food', 'ingredient', 'food group', 'retail'],
}

azure_tags = {
    'bangor_graduation_21': ['commencement', 'graduation', 'human face', 'person', 'scholar', 'academic dress', 'clothing', 'academic costume', 'mortarboard', 'college', 'smile', 'outdoor', 'university', 'diploma', 'student', 'academic institution', 'cloak', 'woman'],
    'cooking_at_home': ['person', 'clothing', 'indoor', 'kitchen appliance', 'wall', 'human face', 'home appliance', 'smile', 'countertop', 'appliance', 'kitchen', 'woman', 'standing', 'stove'],
    'family_cooking_kitchen': ['person', 'clothing', 'human face', 'tableware', 'bowl', 'food', 'indoor', 'wall', 'toddler', 'mixing bowl', 'boy', 'sharing', 'meal', 'woman', 'child', 'people', 'girl', 'kitchen', 'family'],
    'girl_with_shopping_bags': ['clothing', 'human face', 'fashion accessory', 'person', 'outdoor', 'glasses', 'accessory', 'handbag', 'goggles', 'eyewear', 'woman', 'sunglasses', 'fashion'],
    'kids_in_class': ['clothing', 'indoor', 'person', 'girl', 'boy', 'toddler', 'school', 'kindergarten', 'learning', 'human face', 'education', 'class', 'child', 'wall', 'group', 'room'],
    'london_tube_rush_hour': ['clothing', 'person', 'people', 'vehicle', 'man', 'passenger', 'group', 'jacket', 'outdoor', 'crowd', 'train', 'subway'],
    'london_englanduk_covent_gardenmarketshopping_shopper': ['clothing', 'scene', 'person', 'market', 'building', 'street', 'outdoor', 'man', 'trade', 'bazaar', 'footwear', 'shopping', 'retail', 'flea market', 'selling', 'shop', 'public space', 'marketplace', 'people', 'store', 'ground', 'standing', 'woman'],
    'musician_on_street_mixed_race': ['musical instrument', 'music', 'clothing', 'person', 'outdoor', 'drum', 'guitar', 'street performance', 'building', 'man', 'ground', 'string instrument', 'folk instrument', 'hat', 'people', 'sitting', 'group', 'street'],
    'new_orleans_musicians_luisiana': ['musical instrument', 'music', 'clothing', 'person', 'outdoor', 'street performance', 'building', 'street', 'man', 'footwear', 'road', 'cello', 'street artist', 'string instrument', 'drum', 'people', 'pedestrian', 'folk instrument', 'musician', 'city', 'group', 'standing'],
    'office_workers_online_meeting': ['clothing', 'furniture', 'person', 'indoor', 'office building', 'desk', 'laptop', 'window', 'chair', 'people', 'computer', 'sitting', 'table', 'wall', 'group', 'woman'],
    'office_workers': ['indoor', 'office building', 'clothing', 'person', 'computer monitor', 'personal computer', 'job', 'computer', 'computer desk', 'desktop computer', 'furniture', 'table', 'computer keyboard', 'output device', 'desk', 'computer network', 'wall', 'people', 'woman', 'office'],
    'passengers_reading_books_and_newspapers_in_a_london_underground': ['clothing', 'person', 'man', 'passenger', 'newspaper', 'public transport', 'text', 'people', 'sitting', 'subway', 'train'],
    'people_relaxing_outdoors': ['outdoor', 'grass', 'cloud', 'sky', 'tree', 'building', 'plant', 'campus', 'park', 'people', 'estate', 'university', 'person', 'lawn', 'house', 'sitting', 'group', 'summer', 'large'],
    'piccadilly_line_passengers_in_overcrowded_carriage': ['clothing', 'person', 'passenger', 'man', 'human face', 'people', 'group', 'train', 'subway', 'indoor'],
    'reading_a_book_on_tube': ['clothing', 'person', 'passenger', 'public transport', 'vehicle', 'woman', 'train', 'subway', 'blue', 'indoor'],
    'school_playground': ['outdoor', 'tree', 'footwear', 'playground', 'clothing', 'person', 'public space', 'land vehicle', 'people', 'street'],
    'street_musicians': ['musical instrument', 'building', 'person', 'bowed instrument', 'music', 'clothing', 'violin family', 'bowed string instrument', 'string instrument', 'violin', 'outdoor', 'classical music', 'footwear', 'street performance', 'ground', 'cello', 'standing', 'people', 'woman', 'street'],
    'teacher_in_class': ['clothing', 'indoor', 'person', 'table', 'furniture', 'wall', 'education', 'whiteboard', 'learning', 'classroom', 'class', 'desk', 'footwear', 'school', 'course', 'chair', 'teacher', 'people', 'group'],
    'two_girls_shopping': ['fashion accessory', 'clothing', 'hat', 'person', 'human face', 'sun hat', 'outdoor', 'handbag', 'street fashion', 'sky', 'fashion', 'fedora', 'smile', 'girl', 'woman', 'standing', 'street'],
    'vegetable_seller_africa': ['scene', 'person', 'marketplace', 'market', 'local food', 'outdoor', 'selling', 'trade', 'greengrocer', 'clothing', 'whole food', 'natural foods', 'bazaar', 'grocer', 'flea market', 'food group', 'vegan nutrition', 'vegetable', 'people', 'standing', 'produce', 'food'],
    'vegetable_shopper': ['natural foods', 'whole food', 'local food', 'greengrocer', 'diet food', 'food group', 'vegan nutrition', 'person', 'trade', 'selling', 'market', 'fruit', 'superfood', 'food', 'grocer', 'clothing', 'grocery store', 'shopkeeper', 'retail', 'apple', 'produce', 'outdoor', 'standing', 'woman', 'vegetable', 'marketplace', 'fresh', 'sale'],
}

rekognition_tags = {
    'bangor_graduation_21': ['people', 'person', 'coat', 'adult', 'male', 'man', 'female', 'woman', 'student', 'graduation'],
    'cooking_at_home': ['adult', 'male', 'man', 'person', 'female', 'woman', 'cookware', 'plant', 'cooking pot', 'stirring food'],
    'family_cooking_kitchen': ['cooking', 'food', 'stirring food', 'child', 'female', 'girl', 'person', 'cooking pot', 'pot', 'cooktop'],
    'girl_with_shopping_bags': ['person', 'shopping', 'adult', 'female', 'woman', 'bag', 'face', 'head', 'shopping bag', 'accessories'],
    'kids_in_class': ['child', 'female', 'girl', 'person', 'boy', 'male', 'monitor', 'student', 'handbag', 'kindergarten'],
    'london_tube_rush_hour': ['people', 'person', 'adult', 'male', 'man', 'boarding', 'boy', 'child', 'female', 'woman'],
    'london_englanduk_covent_gardenmarketshopping_shopper': ['kiosk', 'shorts', 'path', 'city', 'adult', 'male', 'man', 'person', 'child', 'girl'],
    'musician_on_street_mixed_race': ['adult', 'male', 'man', 'person', 'guitar', 'chair', 'shoe', 'music band', 'musician', 'performer'],
    'new_orleans_musicians_luisiana': ['adult', 'female', 'person', 'woman', 'group performance', 'music band', 'male', 'man', 'city', 'guitar'],
    'office_workers_online_meeting': ['adult', 'female', 'person', 'woman', 'laptop', 'chair', 'monitor', 'mobile phone', 'conversation', 'shoe'],
    'office_workers': ['adult', 'male', 'man', 'person', 'table', 'mobile phone', 'laptop', 'female', 'woman', 'pen'],
    'passengers_reading_books_and_newspapers_in_a_london_underground': ['terminal', 'person', 'reading', 'people', 'adult', 'female', 'woman', 'male', 'man', 'jeans'],
    'people_relaxing_outdoors': ['architecture', 'building', 'college', 'person', 'housing', 'house'],
    'piccadilly_line_passengers_in_overcrowded_carriage': ['people', 'person', 'adult', 'male', 'man', 'female', 'woman', 'face', 'terminal', 'jacket'],
    'reading_a_book_on_tube': ['person', 'reading', 'adult', 'female', 'woman', 'pants', 'head', 'railway', 'train', 'terminal'],
    'school_playground': ['play area', 'outdoor play area', 'outdoors', 'people', 'person', 'child', 'female', 'girl', 'car', 'shoe'],
    'street_musicians': ['adult', 'female', 'person', 'woman', 'mailbox', 'coat', 'music', 'musical instrument', 'musician', 'performer'],
    'teacher_in_class': ['school', 'adult', 'female', 'person', 'woman', 'girl', 'teen', 'classroom', 'boy', 'child'],
    'two_girls_shopping': ['person', 'shopping', 'face', 'happy', 'head', 'laughing', 'adult', 'female', 'woman', 'handbag'],
    'vegetable_seller_africa': ['adult', 'male', 'man', 'person', 'female', 'woman', 'jeans', 'market', "farmer's market", 'face'],
    'vegetable_shopper': ['adult', 'female', 'person', 'woman', 'market', 'indoors', 'shop', 'grocery store', 'supermarket', 'produce'],
}

def calculate_metrics(controlled_vocab, tags_dict):
    precision_scores = []
    recall_scores = []
    f1_scores = []
    accuracy_scores = []

    for key in controlled_vocab.keys():
        # Convert both true and predicted tags to lowercase for case-insensitive comparison
        y_true = [1 if tag.lower() in [t.lower() for t in controlled_vocab[key]] else 0 for tag in controlled_vocab[key]]
        y_pred = [1 if tag.lower() in [t.lower() for t in tags_dict.get(key, [])] else 0 for tag in controlled_vocab[key]]

        precision_scores.append(precision_score(y_true, y_pred, zero_division=0))
        recall_scores.append(recall_score(y_true, y_pred, zero_division=0))
        f1_scores.append(f1_score(y_true, y_pred, zero_division=0))
        accuracy_scores.append(accuracy_score(y_true, y_pred))

    return {
        'precision': np.mean(precision_scores),
        'recall': np.mean(recall_scores),
        'f1_score': np.mean(f1_scores),
        'accuracy': np.mean(accuracy_scores)
    }

# Calculate metrics for each AI service
google_metrics = calculate_metrics(controlled_vocab, google_tags)
azure_metrics = calculate_metrics(controlled_vocab, azure_tags)
rekognition_metrics = calculate_metrics(controlled_vocab, rekognition_tags)

# Print the results
print("Google Cloud Vision Metrics:")
print(google_metrics)
print("\nMicrosoft Azure Metrics:")
print(azure_metrics)
print("\nAmazon Rekognition Metrics:")
print(rekognition_metrics)


Google Cloud Vision Metrics:
{'precision': 0.6666666666666666, 'recall': 0.11479591836734693, 'f1_score': 0.19126984126984126, 'accuracy': 0.11479591836734693}

Microsoft Azure Metrics:
{'precision': 0.9523809523809523, 'recall': 0.28316326530612246, 'f1_score': 0.41748436748436746, 'accuracy': 0.28316326530612246}

Amazon Rekognition Metrics:
{'precision': 0.47619047619047616, 'recall': 0.07993197278911564, 'f1_score': 0.13544973544973546, 'accuracy': 0.07993197278911564}
