LLM Benchmark

In [None]:
from huggingface_hub import list_models
import time

nlp_tasks = {
    "Text Classification": "zero-shot-classification",
    "Named Entity Recognition (NER) OR Part-of-Speech Tagging (POS)": "token-classification",
    "Sentiment Analysis": "text-classification",
    "Semantic Analysis": "sentence-similarity",
    "Language Modeling OR Chatbots OR Text Generation": "text-generation",
    "Machine Translation": "translation",
    "Text Summarization": "summarization",
    "Information Extraction": "question-answering",
    "Speech Recognition": "automatic-speech-recognition",
}

print("🤖 Top 10 Models per NLP Task by trending_score and downloads")
print("=" * 70)


def get_models(pipeline_tag: str, sort: str, limit: int = 10):
    """Fetch models for a pipeline tag sorted by a metric (trending_score|downloads).

    Updated: Uses direct keyword args (library=, pipeline_tag=) instead of ModelFilter.
    """
    try:
        infos = list_models(
            filter="transformers", pipeline_tag=pipeline_tag, sort=sort, limit=limit
        )
        ids = []
        for info in infos:
            mid = getattr(info, "modelId", None) or getattr(info, "id", None)
            if mid:
                ids.append(mid)
        return ids
    except Exception as e:
        print(
            f"   ❌ Error fetching models for '{pipeline_tag}' sorted by '{sort}': {e}"
        )
        return []


def merged_unique(top_trending: list, top_downloads: list, limit: int = 10):
    """Merge two lists, keeping order priority by trending first then downloads,
    removing duplicates within the merged result, capped at limit.
    """
    seen = set()
    out = []
    for lst in (top_trending, top_downloads):
        for mid in lst:
            if mid not in seen:
                seen.add(mid)
                out.append(mid)
                if len(out) >= limit:
                    return out
    return out


start = time.time()
for task_name, pipeline_tag in nlp_tasks.items():
    print(f"\n📋 {task_name}  (tag: {pipeline_tag})")
    print("-" * 70)
    
    top_downloads = get_models(pipeline_tag, sort="downloads", limit=10)
    print("\nTop 10 by downloads:")
    if top_downloads:
        for i, model in enumerate(top_downloads, 1):
            print(f"   {i}. {model}")
    else:
        print("   No models found for downloads")

    top_trending = get_models(pipeline_tag, sort="trending_score", limit=10)
    print("Top 10 by trending_score:")
    if top_trending:
        for i, model in enumerate(top_trending, 1):
            print(f"   {i}. {model}")
    else:
        print("   No models found for trending_score")

    merged = merged_unique(top_downloads, top_trending, limit=20)
    print("\nMerged (unique up to 20):")
    if merged:
        for i, model in enumerate(merged, 1):
            print(f"   {i}. {model}")
    else:
        print("   No models found for merged")

    time.sleep(0.1)  # be gentle to the API

print("\n" + "=" * 70)
print(f"✅ Completed in {time.time() - start:.1f}s")

🤖 Top 10 Models per NLP Task by trending_score and downloads

📋 Text Classification  (tag: zero-shot-classification)
----------------------------------------------------------------------

Top 10 by downloads:
   1. facebook/bart-large-mnli
   2. sileod/deberta-v3-base-tasksource-nli
   3. MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
   4. joeddav/xlm-roberta-large-xnli
   5. joeddav/bart-large-mnli-yahoo-answers
   6. MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7
   7. vicgalle/xlm-roberta-large-xnli-anli
   8. valhalla/distilbart-mnli-12-1
   9. cross-encoder/nli-deberta-v3-large
   10. MoritzLaurer/mDeBERTa-v3-base-mnli-xnli
Top 10 by trending_score:
   1. facebook/bart-large-mnli
   2. MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli
   3. joeddav/xlm-roberta-large-xnli
   4. MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli
   5. claritylab/zero-shot-explicit-binary-bert
   6. MoritzLaurer/deberta-v3-large-zeroshot-v1.1-all-33
   7. tasksource/ModernBERT-base-nli
  

In [None]:
import time
import json
import os
from transformers import pipeline

TASK_NAME = "Named Entity Recognition (NER)"
PIPELINE_TAG = "token-classification"

# 1. Collect merged model list (top trending + top downloads, unique)
print(f"Collecting models for: {TASK_NAME} ({PIPELINE_TAG})")
trend = get_models(PIPELINE_TAG, sort="trending_score", limit=10)
download = get_models(PIPELINE_TAG, sort="downloads", limit=10)
merged_models = merged_unique(trend, download, limit=20)

print("\nMerged model list (up to 20 unique):")
for i, m in enumerate(merged_models, 1):
    print(f" {i:2d}. {m}")

# 2. Quick validation on a sample text
sample_text = (
    "Apple CEO Tim Cook met Elon Musk in Paris to discuss AI initiatives at the Louvre Museum. "
    "Microsoft and OpenAI representatives joined later in the afternoon."
)

# Limit number of models to validate to keep runtime/resource usage reasonable.
max_models = 20  # You can raise this after first quick check.
print(f"\nValidating first {max_models} models on sample text:\n{sample_text}\n")

try:
    import torch  # optional acceleration

    device = 0 if torch.cuda.is_available() else -1
except Exception:
    device = -1

results_summary = []

for model_id in merged_models[:max_models]:
    print("=" * 90)
    print(f"Loading model: {model_id}")
    t0 = time.time()
    try:
        try:
            ner_pipe = pipeline(
                PIPELINE_TAG,
                model=model_id,
                aggregation_strategy="simple",  # prefer grouped entities
                device=device,
            )
        except TypeError:
            # Fallback if aggregation_strategy not supported at init
            ner_pipe = pipeline(PIPELINE_TAG, model=model_id, device=device)

        outputs = ner_pipe(sample_text)
        elapsed = time.time() - t0
        print(f"Inference succeeded in {elapsed:.2f}s. Entities:")

        # Normalize output format
        normalized = []
        for ent in outputs:
            word = ent.get("word") or ent.get("entity")
            group = ent.get("entity_group") or ent.get("entity")
            score = ent.get("score", None)
            start = ent.get("start")
            end = ent.get("end")
            normalized.append(
                {
                    "word": word,
                    "entity_group": group,
                    "score": float(score) if score is not None else None,
                    "start": start,
                    "end": end,
                }
            )
            display_score = f"{score:.3f}" if score is not None else "-"
            print(
                f"  - {word:<20} {group:<18} score={display_score} span=({start},{end})"
            )

        results_summary.append(
            {
                "model": model_id,
                "elapsed_seconds": elapsed,
                "entities": normalized,
            }
        )
    except Exception as e:
        elapsed = time.time() - t0
        print(f"Failed after {elapsed:.2f}s: {e.__class__.__name__}: {e}")
        results_summary.append(
            {
                "model": model_id,
                "error": f"{e.__class__.__name__}: {e}",
                "elapsed_seconds": elapsed,
            }
        )

# 3. Persist results to disk
os.makedirs("results", exist_ok=True)
out_path = os.path.join("results", "ner_validation_results.json")
with open(out_path, "w", encoding="utf-8") as f:
    json.dump(results_summary, f, ensure_ascii=False, indent=2)

print("\nSaved validation summary ->", out_path)
print("Done.")

Collecting models for: Named Entity Recognition (NER) (token-classification)

Merged model list (up to 20 unique):
  1. dslim/bert-base-NER
  2. OpenMed/OpenMed-NER-ChemicalDetect-EuroMed-212M
  3. Babelscape/wikineural-multilingual-ner
  4. CAMeL-Lab/bert-base-arabic-camelbert-ca-pos-egy
  5. Jean-Baptiste/camembert-ner
  6. KoichiYasuoka/roberta-base-english-upos
  7. KoichiYasuoka/roberta-large-english-upos
  8. cahya/bert-base-indonesian-NER
  9. jiaqianjing/chinese-address-ner
 10. dslim/bert-large-NER
 11. w11wo/indonesian-roberta-base-posp-tagger
 12. dbmdz/bert-large-cased-finetuned-conll03-english
 13. adibvafa/CodonTransformer
 14. oliverguhr/fullstop-punctuation-multilang-large
 15. tsmatz/xlm-roberta-ner-japanese
 16. kredor/punctuate-all
 17. cahya/NusaBert-ner-v1.3
 18. OpenMed/OpenMed-NER-PharmaDetect-SuperClinical-434M
 19. Isotonic/distilbert_finetuned_ai4privacy_v2

Validating first 20 models on sample text:
Apple CEO Tim Cook met Elon Musk in Paris to discuss AI init

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


Inference succeeded in 3.20s. Entities:
  - Apple                ORG                score=0.998 span=(0,5)
  - Tim Cook             PER                score=1.000 span=(10,18)
  - Elon                 ORG                score=0.807 span=(23,27)
  - Mu                   PER                score=0.520 span=(28,30)
  - ##sk                 ORG                score=0.812 span=(30,32)
  - Paris                LOC                score=1.000 span=(36,41)
  - AI                   MISC               score=0.998 span=(53,55)
  - Lou                  LOC                score=0.984 span=(75,78)
  - ##vre Museum         LOC                score=0.983 span=(78,88)
  - Microsoft            ORG                score=0.999 span=(90,99)
  - OpenAI               ORG                score=0.991 span=(104,110)
Loading model: OpenMed/OpenMed-NER-ChemicalDetect-EuroMed-212M
Failed after 0.10s: ValueError: The repository OpenMed/OpenMed-NER-ChemicalDetect-EuroMed-212M contains custom code which must be executed

Device set to use cpu


Inference succeeded in 0.89s. Entities:
  - Apple                ORG                score=0.998 span=(0,5)
  - Tim Cook             PER                score=1.000 span=(10,18)
  - Elon Musk            PER                score=0.998 span=(23,32)
  - Paris                LOC                score=1.000 span=(36,41)
  - Louvre Museum        LOC                score=0.998 span=(75,88)
  - Microsoft            ORG                score=0.997 span=(90,99)
  - OpenAI               ORG                score=0.942 span=(104,110)
Loading model: CAMeL-Lab/bert-base-arabic-camelbert-ca-pos-egy


Some weights of the model checkpoint at CAMeL-Lab/bert-base-arabic-camelbert-ca-pos-egy were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Inference succeeded in 1.44s. Entities:
  - Apple                adj                score=0.986 span=(0,5)
  - CEO                  noun_prop          score=0.944 span=(6,9)
  - Tim                  noun               score=0.744 span=(10,13)
  - Cook met             adj                score=0.783 span=(14,22)
  - Elon Musk in Par     noun_prop          score=0.808 span=(23,39)
  - ##is                 adj                score=0.514 span=(39,41)
  - to discus            noun_prop          score=0.751 span=(42,51)
  - ##s                  adj                score=0.539 span=(51,52)
  - AI initiat           noun_prop          score=0.945 span=(53,63)
  - ##ives               adj                score=0.620 span=(63,67)
  - at                   noun_prop          score=0.421 span=(68,70)
  - the                  abbrev             score=0.356 span=(71,74)
  - Louv                 noun_prop          score=0.842 span=(75,79)
  - ##re Museum          adj                score=0.785 span=(79,88

Device set to use cpu


Inference succeeded in 1.01s. Entities:
  - Apple                ORG                score=0.988 span=(0,5)
  - Tim Cook             PER                score=0.999 span=(9,18)
  - Elon Musk            PER                score=0.999 span=(22,32)
  - Paris                LOC                score=0.998 span=(35,41)
  - AI                   MISC               score=0.527 span=(52,55)
  - Louvre Museum        LOC                score=0.995 span=(74,88)
  - Microsoft            ORG                score=0.987 span=(89,99)
  - OpenAI               ORG                score=0.556 span=(103,110)
Loading model: KoichiYasuoka/roberta-base-english-upos


Device set to use cpu


Inference succeeded in 1.12s. Entities:
  - Apple                PROPN              score=0.999 span=(0,5)
  -  CEO                 NOUN               score=0.987 span=(6,9)
  -  Tim Cook            PROPN              score=0.999 span=(10,18)
  -  met                 VERB               score=1.000 span=(19,22)
  -  Elon Musk           PROPN              score=0.999 span=(23,32)
  -  in                  ADP                score=1.000 span=(33,35)
  -  Paris               PROPN              score=0.999 span=(36,41)
  -  to                  PART               score=0.999 span=(42,44)
  -  discuss             VERB               score=1.000 span=(45,52)
  -  AI                  PROPN              score=0.799 span=(53,55)
  -  initiatives         NOUN               score=1.000 span=(56,67)
  -  at                  ADP                score=1.000 span=(68,70)
  -  the                 DET                score=1.000 span=(71,74)
  -  Louvre Museum       PROPN              score=0.997 span=(75,88

Device set to use cpu


Inference succeeded in 2.74s. Entities:
  - Apple                PROPN              score=1.000 span=(0,5)
  -  CEO                 NOUN               score=0.996 span=(6,9)
  -  Tim Cook            PROPN              score=1.000 span=(10,18)
  -  met                 VERB               score=1.000 span=(19,22)
  -  Elon Musk           PROPN              score=1.000 span=(23,32)
  -  in                  ADP                score=1.000 span=(33,35)
  -  Paris               PROPN              score=1.000 span=(36,41)
  -  to                  PART               score=0.999 span=(42,44)
  -  discuss             VERB               score=0.999 span=(45,52)
  -  AI initiatives      NOUN               score=0.993 span=(53,67)
  -  at                  ADP                score=1.000 span=(68,70)
  -  the                 DET                score=1.000 span=(71,74)
  -  Louvre Museum       PROPN              score=0.992 span=(75,88)
  - .                    PUNCT              score=1.000 span=(88,89

Some weights of the model checkpoint at cahya/bert-base-indonesian-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Inference succeeded in 1.21s. Entities:
  - apple ceo tim cook   ORG                score=0.906 span=(0,18)
  - elon musk            PER                score=0.817 span=(23,32)
  - paris                GPE                score=0.530 span=(36,41)
  - the louvre museum    ORG                score=0.824 span=(71,88)
  - microsoft            ORG                score=0.969 span=(90,99)
  - openai               ORG                score=0.635 span=(104,110)
Loading model: jiaqianjing/chinese-address-ner


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/407M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/462 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


tokenizer.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/407M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Inference succeeded in 9.01s. Entities:
  - apple ceo tim cook me LABEL_14           score=0.289 span=(0,21)
  - ##t el               LABEL_0            score=0.380 span=(21,25)
  - ##on musk            LABEL_14           score=0.367 span=(25,32)
  - in paris to discuss ai initiatives at the lou LABEL_0            score=0.710 span=(33,78)
  - ##vr                 LABEL_14           score=0.442 span=(78,80)
  - ##e                  LABEL_0            score=0.560 span=(80,81)
  - museum               LABEL_14           score=0.574 span=(82,88)
  - . microsoft and openai representatives joined later in the afternoon. LABEL_0            score=0.739 span=(88,157)
Loading model: dslim/bert-large-NER


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of the model checkpoint at dslim/bert-large-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/40.0 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Device set to use cpu


Inference succeeded in 21.18s. Entities:
  - Apple                ORG                score=0.998 span=(0,5)
  - Tim Cook             PER                score=1.000 span=(10,18)
  - Elon Musk            PER                score=0.973 span=(23,32)
  - Paris                LOC                score=1.000 span=(36,41)
  - AI                   MISC               score=0.960 span=(53,55)
  - Louvre Museum        LOC                score=0.985 span=(75,88)
  - Microsoft            ORG                score=0.999 span=(90,99)
  - OpenAI               ORG                score=0.917 span=(104,110)
Loading model: w11wo/indonesian-roberta-base-posp-tagger


Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Inference succeeded in 0.77s. Entities:
  -  Apple               NNP                score=0.990 span=(0,5)
  -  CEO                 NNO                score=0.978 span=(6,9)
  -  Tim                 NNO                score=0.971 span=(10,13)
  -  Cook                NNP                score=0.991 span=(14,18)
  -  met                 NNP                score=0.991 span=(19,22)
  -  El                  NNP                score=0.998 span=(23,25)
  - on                   NNP                score=0.926 span=(25,27)
  -  Mus                 NNP                score=0.999 span=(28,31)
  - k                    NNP                score=0.995 span=(31,32)
  -  in                  PPO                score=0.994 span=(33,35)
  -  Paris               NNP                score=0.971 span=(36,41)
  -  to                  NNP                score=0.956 span=(42,44)
  -  dis                 NNP                score=0.997 span=(45,48)
  - cus                  NNP                score=0.990 span=(48,51

Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu
Some weights of BigBirdForTokenClassification were not initialized from the model checkpoint at adibvafa/CodonTransformer and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

Inference succeeded in 1.50s. Entities:
  - Apple                ORG                score=0.999 span=(0,5)
  - Tim Cook             PER                score=1.000 span=(10,18)
  - Elon Musk            PER                score=0.998 span=(23,32)
  - Paris                LOC                score=1.000 span=(36,41)
  - AI                   MISC               score=0.595 span=(53,55)
  - Louvre Museum        LOC                score=0.927 span=(75,88)
  - Microsoft            ORG                score=0.999 span=(90,99)
  - OpenAI               ORG                score=0.992 span=(104,110)
Loading model: adibvafa/CodonTransformer


Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Attention type 'block_sparse' is not possible if sequence_length: 30 <= num global tokens: 2 * config.block_size + min. num sliding tokens: 3 * config.block_size + config.num_random_blocks * config.block_size + additional buffer: config.num_random_blocks * config.block_size = 704 with config.block_size = 64, config.num_random_blocks = 3. Changing attention type to 'original_full'...


Inference succeeded in 0.78s. Entities:
  - Apple CEO Tim Cook met Elon Musk in Paris to discuss AI initiatives at the Louvre Museum . Microsoft and OpenAI representatives joined later in the afternoon . LABEL_1            score=0.766 span=(0,157)
Loading model: oliverguhr/fullstop-punctuation-multilang-large


Device set to use cpu


Inference succeeded in 2.36s. Entities:
  - Apple CEO Tim Cook met Elon Musk in Paris to discuss AI initiatives at the Louvre 0                  score=0.958 span=(0,81)
  - Museum               .                  score=0.521 span=(81,88)
  - . Microsoft and OpenAI representatives joined later in the 0                  score=0.998 span=(88,146)
  - afternoon.           .                  score=0.973 span=(146,157)
Loading model: tsmatz/xlm-roberta-ner-japanese


Device set to use cpu


Inference succeeded in 1.75s. Entities:
  - Apple                ORG                score=0.996 span=(0,5)
  - Tim Cook             PER                score=0.980 span=(10,18)
  - Elon Musk            PER                score=0.995 span=(23,32)
  - Paris                LOC                score=0.979 span=(36,41)
  - the Louvre Museum    INS                score=0.999 span=(71,88)
  - Microsoft            ORG                score=0.997 span=(90,99)
  - OpenAI               ORG                score=0.996 span=(104,110)
Loading model: kredor/punctuate-all


Device set to use cpu


Inference succeeded in 4.65s. Entities:
  - Apple CEO Tim Cook met Elon Musk in Paris to discuss AI initiatives at the Louvre Museum. Microsoft and OpenAI representatives joined later in the afternoon. 0                  score=0.955 span=(0,157)
Loading model: cahya/NusaBert-ner-v1.3


Device set to use cpu


Inference succeeded in 2.66s. Entities:
  - Apple                ORG                score=0.814 span=(0,5)
  -  Tim Cook            PER                score=0.992 span=(9,18)
  -  Elon Musk           PER                score=0.989 span=(22,32)
  -  Paris               GPE                score=0.995 span=(35,41)
  -  AI                  PRD                score=0.777 span=(52,55)
  -  the Louvre Museum   LOC                score=0.821 span=(70,88)
  -  Microsoft           ORG                score=0.944 span=(89,99)
  -  OpenAI representatives ORG                score=0.805 span=(103,126)
Loading model: OpenMed/OpenMed-NER-PharmaDetect-SuperClinical-434M


Device set to use cpu
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Inference succeeded in 1.90s. Entities:
Loading model: Isotonic/distilbert_finetuned_ai4privacy_v2


Device set to use cpu


Inference succeeded in 0.52s. Entities:
  - tim                  FIRSTNAME          score=0.942 span=(10,13)
  - cook                 LASTNAME           score=0.979 span=(14,18)
  - elon                 FIRSTNAME          score=0.972 span=(23,27)
  - musk                 LASTNAME           score=0.965 span=(28,32)
  - paris                STATE              score=0.741 span=(36,41)

Saved validation summary -> results\ner_validation_results.json
Done.
