In [1]:
import numpy as np
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Define the controlled vocabularies and generated tags
# Controlled vocabulary for different categories
controlled_vocab = {
    "arabic newspapers": ["news", "arabic", "middle east", "politics", "economy", "social issues", "editorial", "current events", "political analysis", "economic trends", "social commentary", "middle eastern countries", "arabic"],
    "arabic poetry with english translation": ["poetry", "arabic", "translation", "bilingual", "literary", "arabic literature", "poetic forms", "cultural expressions", "nature", "social commentary", "beauty", "arabic", "english"],
    "chinese calligraphy": ["calligraphy", "chinese", "traditional script", "artistic writing", "chinese language", "calligraphy techniques", "cultural art", "traditional", "brush script", "chinese"],
    "english book page": ["literature", "english", "classic", "text", "academic", "english literature", "novel", "cultural studies", "english"],
    "english historical newspaper": ["newspaper", "english", "historical", "advertisement", "news", "news reporting", "historical events", "advertisements", "local news", "english"],
    "handwritten notes": ["handwriting", "personal", "notes", "message", "informal", "personal communication", "handwritten documents", "letters", "cursive", "block letters", "ink"],
    "digitized book page": ["book", "english", "digitization", "historical text", "literature", "history", "education", "digital scan", "archival document", "english"],
    "hindi poetry": ["hindi", "poetry", "love", "emotions", "inspiration", "life lessons"],
    "japanese script": ["japanese", "historical document", "script", "traditional", "handwritten", "calligraphy"],
    "khalil gibran poetry": ["khalil gibran", "arabic", "poetry", "philosophy", "life", "wisdom", "inspirational quotes"],
    "le monde newspaper": ["french", "newspaper", "le monde", "politics", "economy", "international news", "europe"],
    "everett advocate newspaper": ["local news", "everett", "community", "multilingual", "celebration", "events", "advocacy"],
    "magna carta book cover": ["law", "history", "ireland", "magna carta", "liberty", "legal principles", "book cover"],
    "marilyn monroe life magazine cover": ["marilyn monroe", "hollywood", "celebrity", "life magazine", "1952", "pop culture", "icon"],
    "multiple magazine covers (time, life, new yorker, national geographic)": ["magazine covers", "historical", "iconic", "portraits", "news", "culture", "politics", "science"],
    "national geographic cover 'afghan girl'": ["national geographic", "iconic photograph", "afghan girl", "human interest", "1985"],
    "national geographic cover 'found after 17 years'": ["national geographic", "afghan refugee", "human interest", "return", "2002"],
    "old arabic script": ["arabic", "manuscript", "calligraphy", "historical document", "islamic art", "religious text"],
    "old newspaper": ["newspaper", "events", "business", "health", "politics", "local news", "1970s"],
    "time magazine cover - 9/11": ["9/11", "world trade center", "terrorism", "new york", "historical event", "time magazine", "2001"],
    "time magazine cover - 'are you mom enough?'": ["parenting", "motherhood", "time magazine", "social issues", "controversy", "2012"],
    "urdu poetry": ["urdu", "poetry", "love", "emotion", "heartache", "philosophical", "literary art"]
}



azure_tags = {
    "arab magazine": ["text", "newspaper", "news", "newsprint", "publication", "tabloid", "magazine", "senior"],
    "arabic news paper": ["text", "newspaper", "man", "news", "clothing", "publication", "person", "suit", "newsprint"],
    "arabic poetry with translation": ["text", "font", "handwriting", "calligraphy", "ink", "document"],
    "chinese poem": ["text", "font", "handwriting", "typography", "number", "calligraphy"],
    "eng book page.png": ["text", "screenshot"],
    "eng newspaper": ["text", "newspaper", "newsprint", "black and white", "monochrome"],
    "eng page of book": ["text", "book", "paper", "handwriting", "font", "publication", "number", "document"],
    "googlebooks scan": ["text", "letter", "black and white", "screenshot", "document"],
    "hand written note 2": ["text", "handwriting", "calligraphy", "letter", "font", "ink", "document"],
    "hand written note": ["text", "handwriting", "letter", "paper", "document"],
    "hindi poetry": ["text", "book", "plant"],
    "japanese script": ["text", "handwriting", "paper", "book", "calligraphy", "document"],
    "khalil gibran poetry": ["text", "handwriting", "human face", "letter", "man", "autograph", "calligraphy", "signature", "ink", "document", "black and white"],
    "le-monde_1": ["text", "newspaper", "publication", "newsprint", "paper"],
    "magazine page": ["text", "newspaper", "news", "newsprint", "publication", "person", "human face", "tabloid", "magazine"],
    "magna carta": ["text", "handwriting", "letter", "book", "calligraphy", "paper", "document"],
    "marilyn monroe cover life.webp": ["text", "human face", "clothing", "lady", "smile", "person", "female person", "poster", "book", "woman", "retro style", "dress", "magazine"],
    "multiple magazine covers": ["text", "human face", "poster", "man", "graphic design", "fiction", "illustration", "art"],
    "nat geo cover": ["text", "human face", "book", "poster", "clothing", "woman", "book cover"],
    "national geographic": ["text", "human face", "poster", "woman", "magazine", "publication", "book cover", "flyer", "person", "book"],
    "old arabic script": ["handwriting", "text", "drawing", "indoor"],
    "old newspaper": ["text", "newspaper", "newsprint", "publication", "news", "person"],
    "times cover 9 11.webp": ["text", "poster", "screenshot", "outdoor", "smoke"],
    "times cover": ["text", "dog", "clothing", "woman", "footwear", "poster", "flyer", "person", "magazine"],
    "urdu poetry": ["text", "handwriting", "font", "calligraphy", "typography", "document"]
}


rekognition_tags = {
    "arab magazine": ["newspaper", "text", "adult", "male", "man", "person", "boy", "child", "head", "face"],
    "arabic news paper": ["publication", "text", "newspaper", "adult", "male", "man", "person", "qr code", "face", "head"],
    "arabic poetry with translation": ["page", "text", "book", "publication", "handwriting", "letter"],
    "chinese poem": ["text", "alphabet"],
    "eng book page.png": ["computer", "electronics", "tablet computer", "page", "text", "mobile phone", "phone"],
    "eng newspaper": ["text", "newspaper", "person", "head", "face"],
    "eng page of book": ["book", "publication", "page", "text", "novel", "diary"],
    "googlebooks scan": ["page", "text", "letter", "handwriting", "book", "publication", "advertisement", "poster"],
    "hand written note 2": ["text", "handwriting", "white board", "letter"],
    "hand written note": ["text", "white board", "handwriting", "page", "letter"],
    "hindi poetry": ["page", "text", "herbal", "plant", "advertisement", "poster", "leaf", "greeting card", "mail", "letter"],
    "japanese script": ["text", "book", "publication", "handwriting", "animal", "bird", "alphabet", "calligraphy"],
    "khalil gibran poetry": ["text", "adult", "male", "man", "person", "white board", "handwriting", "document"],
    "le-monde_1": ["text", "newspaper", "document", "id cards", "passport", "page"],
    "magazine page": ["adult", "male", "man", "person", "newspaper", "text", "publication", "female", "woman", "baby"],
    "magna carta": ["book", "publication", "text", "page", "handwriting", "novel", "diary"],
    "multiple magazine covers": ["publication", "adult", "male", "man", "person", "face", "head", "advertisement", "poster", "book"],
    "nat geo cover": ["publication", "book", "adult", "female", "person", "woman", "fashion", "advertisement", "poster", "hoodie"],
    "national geographic": ["advertisement", "poster", "publication", "head", "person", "photography", "portrait", "book", "blackboard", "novel"],
    "old arabic script": ["text", "book", "publication", "calligraphy", "handwriting", "alphabet", "symbol", "number"],
    "old newspaper": ["newspaper", "text", "person", "adult", "male", "man", "bride", "female", "woman", "face"],
    "times cover": ["publication", "adult", "female", "person", "woman", "chair", "shoe", "magazine", "face", "jeans"],
    "urdu poetry": ["text", "blackboard", "document", "alphabet", "handwriting"]
}



vision_tags = {
    "arab magazine": ["photograph", "newspaper", "organ", "publication", "news", "font", "sleeve", "advertising", "design", "newsprint"],
    "arabic news paper": ["newspaper", "publication", "font", "suit", "news", "blazer", "advertising", "newsprint", "paper", "formal wear"],
    "arabic poetry with translation": ["font", "handwriting", "number", "document", "art", "circle", "writing", "rectangle", "screenshot", "paper product"],
    "chinese poem": ["font", "art", "handwriting", "symmetry", "pattern", "parallel", "electric blue", "number", "ink", "drawing"],
    "eng book page.png": ["font", "rectangle", "material property", "electric blue", "parallel", "circle", "screenshot", "communication device", "square", "magenta"],
    "eng newspaper": ["newspaper", "font", "material property", "publication", "parallel", "paper", "paper product", "monochrome", "news", "newsprint"],
    "eng page of book": ["font", "publication", "paper", "paper product", "document", "symmetry"],
    "googlebooks scan": ["font", "paper", "document", "publication", "circle", "paper product"],
    "hand written note 2": ["handwriting", "font", "electric blue", "parallel", "writing", "number", "symmetry"],
    "hand written note": ["handwriting", "rectangle", "font", "parallel", "writing", "paper", "paper product", "pattern", "number", "letter"],
    "hindi poetry": ["plant", "font", "terrestrial plant", "twig", "event", "paper", "rectangle", "fashion accessory", "paper product", "pattern"],
    "japanese script": ["handwriting", "book", "font", "paper", "writing", "paper product", "document", "publication", "rectangle", "art"],
    "khalil gibran poetry": ["nose", "photograph", "facial expression", "white", "handwriting", "jaw", "smile", "beard", "gesture", "font"],
    "le-monde_1": ["newspaper", "publication", "font", "newsprint", "paper product", "paper", "document", "book", "circle", "parallel"],
    "magazine page": ["smile", "newspaper", "publication", "sleeve", "news", "font", "material property", "advertising", "newsprint", "tabloid"],
    "magna carta": ["book", "font", "publication", "handwriting", "paper", "parallel", "pattern", "illustration", "paper product", "document"],
    "multiple magazine covers": ["watch", "chin", "hairstyle", "human", "analog watch", "font", "art", "clock", "tie", "publication"],
    "nat geo cover": ["purple", "poster", "violet", "font", "magenta", "sleeve", "publication", "advertising", "graphic design", "illustration"],
    "national geographic": ["publication", "poster", "advertising", "book cover", "font", "book", "magazine", "fiction", "hair coloring", "flesh"],
    "old arabic script": ["handwriting", "art", "font", "writing", "rectangle", "book", "paper", "number", "parallel", "ink"],
    "old newspaper": ["newspaper", "publication", "news", "font", "material property", "newsprint", "parallel", "paper", "design", "paper product"],
    "times cover": ["arm", "leg", "shorts", "active tank", "knee", "thigh", "waist", "font", "handwriting", "sportswear"],
    "urdu poetry": ["handwriting", "font", "writing", "art", "darkness", "monochrome photography", "monochrome", "number", "illustration", "event"]
}


# Function to calculate metrics
def calculate_metrics(controlled_vocab, tags_dict):
    precision_scores = []
    recall_scores = []
    f1_scores = []
    accuracy_scores = []

    for key in controlled_vocab.keys():
        # Convert both true and predicted tags to lowercase for case-insensitive comparison
        y_true = [1 if tag.lower() in [t.lower() for t in controlled_vocab[key]] else 0 for tag in controlled_vocab[key]]
        y_pred = [1 if tag.lower() in [t.lower() for t in tags_dict.get(key, [])] else 0 for tag in controlled_vocab[key]]

        precision_scores.append(precision_score(y_true, y_pred, zero_division=0))
        recall_scores.append(recall_score(y_true, y_pred, zero_division=0))
        f1_scores.append(f1_score(y_true, y_pred, zero_division=0))
        accuracy_scores.append(accuracy_score(y_true, y_pred))

    return {
        'precision': np.mean(precision_scores),
        'recall': np.mean(recall_scores),
        'f1_score': np.mean(f1_scores),
        'accuracy': np.mean(accuracy_scores)
    }

# Calculate metrics for each AI service
google_metrics = calculate_metrics(controlled_vocab, vision_tags)
azure_metrics = calculate_metrics(controlled_vocab, azure_tags)
rekognition_metrics = calculate_metrics(controlled_vocab, rekognition_tags)

# Print the results
print("Google Cloud Vision Metrics:")
print(google_metrics)
print("\nMicrosoft Azure Metrics:")
print(azure_metrics)
print("\nAmazon Rekognition Metrics:")
print(rekognition_metrics)


Google Cloud Vision Metrics:
{'precision': 0.045454545454545456, 'recall': 0.006493506493506493, 'f1_score': 0.011363636363636364, 'accuracy': 0.006493506493506493}

Microsoft Azure Metrics:
{'precision': 0.09090909090909091, 'recall': 0.01406926406926407, 'f1_score': 0.02435064935064935, 'accuracy': 0.01406926406926407}

Amazon Rekognition Metrics:
{'precision': 0.13636363636363635, 'recall': 0.021645021645021644, 'f1_score': 0.037337662337662336, 'accuracy': 0.021645021645021644}
