In [1]:
from huggingface_hub import HfApi
import pandas as pd
from IPython.display import display
import warnings

# Ignore irrelevant warnings from Hugging Face API
warnings.filterwarnings("ignore")

# Initialize API
api = HfApi()

# Define NLP-related tasks
nlp_tasks = [
    "text-classification",  
    "token-classification",  
    "question-answering",  
    "translation",  
    "summarization",  
    "text-generation",  
    "fill-mask",  
    "sentence-similarity",  
    "feature-extraction",  
    "table-question-answering",  
    "zero-shot-classification",  
    "text2text-generation",  
    "text-embeddings-inference"  
]

model_data = []
all_languages = set()

for task in nlp_tasks:
    try:
        models = api.list_models(filter=task, limit=50)  # Retrieve up to 50 models per task
    except Exception as e:
        print(f"Failed to retrieve models for task {task}, error: {e}")
        continue  # Skip this task and proceed to the next one

    for model in models:
        model_id = model.id
        try:
            # Get detailed model information
            model_info = api.model_info(model_id)
            tags = model_info.tags if model_info.tags else []

            # Extract possible language tags
            languages = sorted({tag for tag in tags if len(tag) == 2 or tag.startswith("language:")})

            all_languages.update(languages)  # Record all detected languages

            model_data.append({
                "model_id": model_id,
                "languages": ", ".join(languages)  # Store languages as a string for readability
            })
        except Exception as e:
            print(f"Failed to retrieve details for {model_id}, skipping this model: {e}")
            continue  # Skip this model and proceed to the next one

# Convert to DataFrame
df = pd.DataFrame(model_data)

# Display final NLP-related model data
display(df)

# Print all detected language tags
print("All detected languages in NLP-related models:", sorted(all_languages))


Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.
Invalid model-index. Not loading eval results into CardData.


Unnamed: 0,model_id,languages
0,BAAI/bge-reranker-v2-m3,
1,Alibaba-NLP/gte-multilingual-reranker-base,"af, ar, az, be, bg, bn, ca, cs, cy, da, de, el..."
2,ProsusAI/finbert,"en, tf"
3,BAAI/bge-reranker-large,"en, zh"
4,facebook/bart-large-mnli,
...,...,...
645,DeepChem/SmilesTokenizer_PubChem_1M,
646,KBLab/sentence-bert-swedish-cased,sv
647,T-Systems-onsite/cross-en-de-roberta-sentence-...,"de, en, tf"
648,shaktiman404/dummy-bert-base-cased,


All detected languages in NLP-related models: ['T5', 'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az', 'ba', 'be', 'bg', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce', 'ch', 'co', 'cs', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr', 'fy', 'ga', 'gd', 'ge', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr', 'ht', 'hu', 'hy', 'id', 'ig', 'io', 'is', 'it', 'iu', 'iw', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn', 'ko', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'lm', 'ln', 'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'na', 'nb', 'ne', 'nl', 'nn', 'no', 'nr', 'ns', 'nv', 'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pl', 'ps', 'pt', 'qa', 'qu', 'r1', 'rm', 'rn', 'ro', 'rp', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 't5', 'ta', 'te', 'tf', 'tg', 'th', 'ti', 'tk',