## Evaluate Multiple Classifiers Runtime

In [3]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset, load_from_disk, concatenate_datasets
from sklearn.metrics import accuracy_score
import random
from tqdm import tqdm
import numpy as np
import torch
import os

  from .autonotebook import tqdm as notebook_tqdm


## Set Random Seed for Reproducibility

In [4]:
# Set a seed for random module
random.seed(42)

# Set a seed for numpy module
np.random.seed(42)

# Set a seed for torch module
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

## Define Parameters

In [5]:
SAMPLING = "random" # "random", "stratified", "clustered", "shared_domain"
SUFFIX = "_extended" #"", "_holdout", "_extended"
SPLIT = "test" # "train", "test", "holdout", "extende
MAX_CONTENT_LENGTH = 384 # 496, 192
OVERLAP = 64
FEATURES = "url_and_content" # "url", "content", "url_and_content"
FOLDER_DATA = "data"
FOLDER_MODELS = "models"
N = 5  # Number of times to run the evaluation

In [6]:
TOPICS = ["cannabis", "kinder", "energie"]
#TOPICS = ["cannabis"]

In [7]:
MODELS = ["distilbert/distilbert-base-multilingual-cased",
          "google-bert/bert-base-multilingual-cased", 
          "FacebookAI/xlm-roberta-base", 
          "FacebookAI/xlm-roberta-large", 
          "dbmdz/bert-base-german-uncased", 
          "deepset/gelectra-large",
          "deepset/gelectra-base",
          "deepset/gbert-large",
          "deepset/gbert-base",
          ]

In [8]:
CUDA_ID = 1

**Extract URL-path:**

In [9]:
from urllib.parse import urlparse, urlunparse

def extract_url_path(example):
    view_url = example['view_url']
    if "://" not in view_url:
        view_url = "http://" + view_url  # Assume http if no protocol specified
    parsed_url = urlparse(view_url)
    new_url = urlunparse(('', '', parsed_url.path, parsed_url.params, parsed_url.query, parsed_url.fragment))
    example['url_path'] = new_url.lstrip('/')  # Store the result in a new field
    return example


extract_url_path({"view_url": "https://www.google.com/search?q=python+url+path"})

{'view_url': 'https://www.google.com/search?q=python+url+path',
 'url_path': 'search?q=python+url+path'}

## Evaluate Models

In [10]:
from torch.utils.data import DataLoader, Dataset
import torch
from tqdm import tqdm
import time
import numpy as np

class TokenizedDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx].clone().detach() for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx]).clone().detach()
        return item

    def __len__(self):
        return len(self.labels)

def get_predictions(tokenized_datasets, tokenizer, model, device, features, split="test", batch_size=32):
    """Use the trained model to make predictions on the test set."""
    
    # Tokenize the dataset
    texts = []
    urls = []
    labels = []
    
    for row in tokenized_datasets[split]:
        labels.append(row["label"])
        if features == "content":
            texts.append(row["text"])
        elif features == "url":
            urls.append(row["url_path"])
        elif features == "url_and_content":
            urls.append(row["url_path"])
            texts.append(row["text"])
        else:
            raise ValueError("Invalid value for FEATURES. Expected 'content', 'url', or 'url_and_content'.")

    if features == "content":
        encodings = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
    elif features == "url":
        encodings = tokenizer(urls, padding=True, truncation=True, return_tensors="pt")
    elif features == "url_and_content":
        encodings = tokenizer(urls, texts, padding=True, truncation=True, return_tensors="pt")
    
    dataset = TokenizedDataset(encodings, labels)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    preds = []
    probabilities = []
    inference_times = []
    
    model.eval()  # Put the model in evaluation mode

    for batch in tqdm(dataloader):
        # Move inputs to device
        batch = {k: v.to(device) for k, v in batch.items()}
        
        start_time = time.time()
        with torch.no_grad():
            # Forward pass
            outputs = model(**batch)
            # Apply softmax to logits to get probabilities
            predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
            # Get the predicted class (the one with the highest probability)
            predicted_classes = torch.argmax(predictions, dim=-1).cpu().numpy()
        
        end_time = time.time()
        batch_inference_time = end_time - start_time
        inference_times.extend([batch_inference_time / len(batch["labels"])] * len(batch["labels"]))  # Store time per sample
        
        # Store the predictions and probabilities
        preds.extend(predicted_classes)
        probabilities.extend(predictions[:, 1].cpu().numpy().tolist())  # Store the probability of the positive class
    
    # Calculate statistics
    avg_time = np.mean(inference_times)
    std_time = np.std(inference_times)
    min_time = np.min(inference_times)
    max_time = np.max(inference_times)
    
    return preds, labels, probabilities, avg_time, std_time, min_time, max_time, inference_times


In [11]:
def calc_metrics(labels, preds):
    """
    Calculates the accuracy, precision, recall, and F1 score for the given labels and predictions and returns them in a dictionary.
    """
    
    metrics = {
        'accuracy': accuracy_score(labels, preds),
        'precision': precision_score(labels, preds, average='binary'),
        'recall': recall_score(labels, preds, average='binary'),
        'f1': f1_score(labels, preds, average='binary'),
    }

    return metrics

In [12]:
def sample_random_from_dataset(dataset, n=5, subset='test'):
    """
    Samples n random examples from a specified subset of the dataset.
    """
    n = min(n, len(dataset[subset]))
    random_indices = random.sample(range(len(dataset[subset])), n)
    sampled_dataset = dataset[subset].select(random_indices)
    return sampled_dataset

**Get chunk level predictions:**

In [13]:
from collections import defaultdict
eval_results = defaultdict(lambda: defaultdict(list))

In [11]:
for topic in TOPICS: # ----------------------------------------------------------------------

    for model_name in MODELS: # -------------------------------------------------------------

        print(f"\n\n###### Evaluating model {model_name} on {topic} ###### \n\n")
            
        if FEATURES == "url":
            dataset = load_from_disk(
                f"../../{FOLDER_DATA}/tmp/processed_dataset_{topic}_buffed_{SAMPLING}{SUFFIX}")

            if SPLIT == "holdout":
                dataset["holdout"] = concatenate_datasets(
                    [dataset["holdout"], dataset["test"]])
            # Extract the path from the URL
            dataset = dataset.map(extract_url_path, num_proc=8)
        else:
            dataset = load_from_disk(
                f"../../{FOLDER_DATA}/tmp/processed_dataset_{topic}_buffed_chunkified_{SAMPLING}{SUFFIX}_{MAX_CONTENT_LENGTH}")

            if SPLIT == "holdout":
                dataset["holdout"] = concatenate_datasets(
                    [dataset["holdout"], dataset["test"]])
                
            # Extract the path from the URL
            dataset = dataset.map(extract_url_path)
        
        for i in range(N):
            print(f"Run {i+1}/{N}")
            
            # Load model and tokenizer
            model_name_local = f"../../{FOLDER_MODELS}/{model_name.replace('/','_')}_{topic}_model_{FEATURES}/"
            print(f"Loading model from {model_name_local}")
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name_local, num_labels=2, local_files_only=True)

            # Move model to GPU if available
            DEVICE = torch.device(f"cuda:{CUDA_ID}" if torch.cuda.is_available() else "cpu")
            model.to(DEVICE)
            
            # Use the trained model to make predictions on the test set
            preds, labels, probas, avg_time, std_time, min_time, max_time, runtimes = get_predictions(dataset, tokenizer, model, DEVICE, FEATURES, split=SPLIT)
            metrics = calc_metrics(labels, preds)
            
            print(f"Metrics for {model_name} on {topic}: {metrics}")
            print(f'Average Inference Time per Sample: {avg_time:.6f} seconds')
            print(f'STD Inference Time per Sample: {std_time:.6f} seconds')
            print(f'Min Inference Time per Sample: {min_time:.6f} seconds')
            print(f'Max Inference Time per Sample: {max_time:.6f} seconds')
            
            # Update the eval_results dictionary
            eval_results[model_name][topic].append({
                'metrics': metrics,
                'avg_time': avg_time,
                'std_time': std_time,
                'min_time': min_time,
                'max_time': max_time
            })
            
            # Clear GPU memory to avoid memory errors
            del model, tokenizer
            torch.cuda.empty_cache()



###### Evaluating model distilbert/distilbert-base-multilingual-cased on cannabis ###### 


Run 1/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:04<00:00,  3.72it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.008373 seconds
STD Inference Time per Sample: 0.002354 seconds
Min Inference Time per Sample: 0.007752 seconds
Max Inference Time per Sample: 0.017443 seconds
Run 2/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:03<00:00,  4.01it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.007763 seconds
STD Inference Time per Sample: 0.000004 seconds
Min Inference Time per Sample: 0.007756 seconds
Max Inference Time per Sample: 0.007774 seconds
Run 3/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:03<00:00,  4.01it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.007759 seconds
STD Inference Time per Sample: 0.000008 seconds
Min Inference Time per Sample: 0.007744 seconds
Max Inference Time per Sample: 0.007778 seconds
Run 4/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:03<00:00,  4.01it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.007771 seconds
STD Inference Time per Sample: 0.000016 seconds
Min Inference Time per Sample: 0.007753 seconds
Max Inference Time per Sample: 0.007817 seconds
Run 5/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:03<00:00,  4.00it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.007772 seconds
STD Inference Time per Sample: 0.000018 seconds
Min Inference Time per Sample: 0.007753 seconds
Max Inference Time per Sample: 0.007811 seconds


###### Evaluating model google-bert/bert-base-multilingual-cased on cannabis ###### 


Run 1/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:07<00:00,  2.02it/s]


Metrics for google-bert/bert-base-multilingual-cased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.015488 seconds
STD Inference Time per Sample: 0.000036 seconds
Min Inference Time per Sample: 0.015448 seconds
Max Inference Time per Sample: 0.015616 seconds
Run 2/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:07<00:00,  2.02it/s]


Metrics for google-bert/bert-base-multilingual-cased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.015506 seconds
STD Inference Time per Sample: 0.000039 seconds
Min Inference Time per Sample: 0.015461 seconds
Max Inference Time per Sample: 0.015590 seconds
Run 3/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:07<00:00,  2.02it/s]


Metrics for google-bert/bert-base-multilingual-cased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.015538 seconds
STD Inference Time per Sample: 0.000037 seconds
Min Inference Time per Sample: 0.015468 seconds
Max Inference Time per Sample: 0.015612 seconds
Run 4/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:07<00:00,  2.02it/s]


Metrics for google-bert/bert-base-multilingual-cased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.015546 seconds
STD Inference Time per Sample: 0.000031 seconds
Min Inference Time per Sample: 0.015470 seconds
Max Inference Time per Sample: 0.015611 seconds
Run 5/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:07<00:00,  2.01it/s]


Metrics for google-bert/bert-base-multilingual-cased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.015575 seconds
STD Inference Time per Sample: 0.000037 seconds
Min Inference Time per Sample: 0.015508 seconds
Max Inference Time per Sample: 0.015639 seconds


###### Evaluating model FacebookAI/xlm-roberta-base on cannabis ###### 


Run 1/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.96it/s]


Metrics for FacebookAI/xlm-roberta-base on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.016023 seconds
STD Inference Time per Sample: 0.000253 seconds
Min Inference Time per Sample: 0.015907 seconds
Max Inference Time per Sample: 0.016987 seconds
Run 2/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.96it/s]


Metrics for FacebookAI/xlm-roberta-base on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.015965 seconds
STD Inference Time per Sample: 0.000029 seconds
Min Inference Time per Sample: 0.015910 seconds
Max Inference Time per Sample: 0.016035 seconds
Run 3/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.96it/s]


Metrics for FacebookAI/xlm-roberta-base on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.015969 seconds
STD Inference Time per Sample: 0.000031 seconds
Min Inference Time per Sample: 0.015917 seconds
Max Inference Time per Sample: 0.016036 seconds
Run 4/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.96it/s]


Metrics for FacebookAI/xlm-roberta-base on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.015966 seconds
STD Inference Time per Sample: 0.000040 seconds
Min Inference Time per Sample: 0.015901 seconds
Max Inference Time per Sample: 0.016062 seconds
Run 5/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.96it/s]


Metrics for FacebookAI/xlm-roberta-base on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.015993 seconds
STD Inference Time per Sample: 0.000099 seconds
Min Inference Time per Sample: 0.015907 seconds
Max Inference Time per Sample: 0.016349 seconds


###### Evaluating model FacebookAI/xlm-roberta-large on cannabis ###### 


Run 1/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:26<00:00,  1.66s/it]


Metrics for FacebookAI/xlm-roberta-large on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.052403 seconds
STD Inference Time per Sample: 0.000137 seconds
Min Inference Time per Sample: 0.052186 seconds
Max Inference Time per Sample: 0.052785 seconds
Run 2/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:26<00:00,  1.66s/it]


Metrics for FacebookAI/xlm-roberta-large on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.052392 seconds
STD Inference Time per Sample: 0.000119 seconds
Min Inference Time per Sample: 0.052102 seconds
Max Inference Time per Sample: 0.052624 seconds
Run 3/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:26<00:00,  1.66s/it]


Metrics for FacebookAI/xlm-roberta-large on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.052391 seconds
STD Inference Time per Sample: 0.000104 seconds
Min Inference Time per Sample: 0.052150 seconds
Max Inference Time per Sample: 0.052662 seconds
Run 4/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:26<00:00,  1.66s/it]


Metrics for FacebookAI/xlm-roberta-large on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.052395 seconds
STD Inference Time per Sample: 0.000106 seconds
Min Inference Time per Sample: 0.052150 seconds
Max Inference Time per Sample: 0.052623 seconds
Run 5/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:26<00:00,  1.67s/it]


Metrics for FacebookAI/xlm-roberta-large on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.052440 seconds
STD Inference Time per Sample: 0.000133 seconds
Min Inference Time per Sample: 0.052093 seconds
Max Inference Time per Sample: 0.052610 seconds


###### Evaluating model dbmdz/bert-base-german-uncased on cannabis ###### 


Run 1/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.93it/s]


Metrics for dbmdz/bert-base-german-uncased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.016249 seconds
STD Inference Time per Sample: 0.000041 seconds
Min Inference Time per Sample: 0.016164 seconds
Max Inference Time per Sample: 0.016305 seconds
Run 2/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.93it/s]


Metrics for dbmdz/bert-base-german-uncased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.016231 seconds
STD Inference Time per Sample: 0.000041 seconds
Min Inference Time per Sample: 0.016128 seconds
Max Inference Time per Sample: 0.016281 seconds
Run 3/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.93it/s]


Metrics for dbmdz/bert-base-german-uncased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.016235 seconds
STD Inference Time per Sample: 0.000037 seconds
Min Inference Time per Sample: 0.016171 seconds
Max Inference Time per Sample: 0.016332 seconds
Run 4/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.93it/s]


Metrics for dbmdz/bert-base-german-uncased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.016244 seconds
STD Inference Time per Sample: 0.000033 seconds
Min Inference Time per Sample: 0.016174 seconds
Max Inference Time per Sample: 0.016301 seconds
Run 5/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.93it/s]


Metrics for dbmdz/bert-base-german-uncased on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.016229 seconds
STD Inference Time per Sample: 0.000035 seconds
Min Inference Time per Sample: 0.016155 seconds
Max Inference Time per Sample: 0.016296 seconds


###### Evaluating model deepset/gelectra-large on cannabis ###### 


Run 1/5
Loading model from ../../models/deepset_gelectra-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:28<00:00,  1.78s/it]


Metrics for deepset/gelectra-large on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.056182 seconds
STD Inference Time per Sample: 0.000071 seconds
Min Inference Time per Sample: 0.056047 seconds
Max Inference Time per Sample: 0.056336 seconds
Run 2/5
Loading model from ../../models/deepset_gelectra-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:28<00:00,  1.78s/it]


Metrics for deepset/gelectra-large on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.056194 seconds
STD Inference Time per Sample: 0.000127 seconds
Min Inference Time per Sample: 0.055873 seconds
Max Inference Time per Sample: 0.056393 seconds
Run 3/5
Loading model from ../../models/deepset_gelectra-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:28<00:00,  1.79s/it]


Metrics for deepset/gelectra-large on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.056221 seconds
STD Inference Time per Sample: 0.000102 seconds
Min Inference Time per Sample: 0.055947 seconds
Max Inference Time per Sample: 0.056358 seconds
Run 4/5
Loading model from ../../models/deepset_gelectra-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:28<00:00,  1.79s/it]


Metrics for deepset/gelectra-large on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.056226 seconds
STD Inference Time per Sample: 0.000094 seconds
Min Inference Time per Sample: 0.055997 seconds
Max Inference Time per Sample: 0.056409 seconds
Run 5/5
Loading model from ../../models/deepset_gelectra-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:28<00:00,  1.79s/it]


Metrics for deepset/gelectra-large on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.056241 seconds
STD Inference Time per Sample: 0.000080 seconds
Min Inference Time per Sample: 0.056112 seconds
Max Inference Time per Sample: 0.056393 seconds


###### Evaluating model deepset/gelectra-base on cannabis ###### 


Run 1/5
Loading model from ../../models/deepset_gelectra-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


Metrics for deepset/gelectra-base on cannabis: {'accuracy': 0.9861932938856016, 'precision': 0.9792284866468842, 'recall': 1.0, 'f1': 0.9895052473763118}
Average Inference Time per Sample: 0.017221 seconds
STD Inference Time per Sample: 0.000037 seconds
Min Inference Time per Sample: 0.017144 seconds
Max Inference Time per Sample: 0.017292 seconds
Run 2/5
Loading model from ../../models/deepset_gelectra-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


Metrics for deepset/gelectra-base on cannabis: {'accuracy': 0.9861932938856016, 'precision': 0.9792284866468842, 'recall': 1.0, 'f1': 0.9895052473763118}
Average Inference Time per Sample: 0.017220 seconds
STD Inference Time per Sample: 0.000036 seconds
Min Inference Time per Sample: 0.017165 seconds
Max Inference Time per Sample: 0.017276 seconds
Run 3/5
Loading model from ../../models/deepset_gelectra-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


Metrics for deepset/gelectra-base on cannabis: {'accuracy': 0.9861932938856016, 'precision': 0.9792284866468842, 'recall': 1.0, 'f1': 0.9895052473763118}
Average Inference Time per Sample: 0.017232 seconds
STD Inference Time per Sample: 0.000101 seconds
Min Inference Time per Sample: 0.017166 seconds
Max Inference Time per Sample: 0.017600 seconds
Run 4/5
Loading model from ../../models/deepset_gelectra-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


Metrics for deepset/gelectra-base on cannabis: {'accuracy': 0.9861932938856016, 'precision': 0.9792284866468842, 'recall': 1.0, 'f1': 0.9895052473763118}
Average Inference Time per Sample: 0.017196 seconds
STD Inference Time per Sample: 0.000037 seconds
Min Inference Time per Sample: 0.017121 seconds
Max Inference Time per Sample: 0.017248 seconds
Run 5/5
Loading model from ../../models/deepset_gelectra-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


Metrics for deepset/gelectra-base on cannabis: {'accuracy': 0.9861932938856016, 'precision': 0.9792284866468842, 'recall': 1.0, 'f1': 0.9895052473763118}
Average Inference Time per Sample: 0.017192 seconds
STD Inference Time per Sample: 0.000034 seconds
Min Inference Time per Sample: 0.017101 seconds
Max Inference Time per Sample: 0.017233 seconds


###### Evaluating model deepset/gbert-large on cannabis ###### 


Run 1/5
Loading model from ../../models/deepset_gbert-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:28<00:00,  1.78s/it]


Metrics for deepset/gbert-large on cannabis: {'accuracy': 0.9861932938856016, 'precision': 0.9969230769230769, 'recall': 0.9818181818181818, 'f1': 0.9893129770992366}
Average Inference Time per Sample: 0.056184 seconds
STD Inference Time per Sample: 0.000094 seconds
Min Inference Time per Sample: 0.056049 seconds
Max Inference Time per Sample: 0.056447 seconds
Run 2/5
Loading model from ../../models/deepset_gbert-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:28<00:00,  1.79s/it]


Metrics for deepset/gbert-large on cannabis: {'accuracy': 0.9861932938856016, 'precision': 0.9969230769230769, 'recall': 0.9818181818181818, 'f1': 0.9893129770992366}
Average Inference Time per Sample: 0.056229 seconds
STD Inference Time per Sample: 0.000100 seconds
Min Inference Time per Sample: 0.056020 seconds
Max Inference Time per Sample: 0.056433 seconds
Run 3/5
Loading model from ../../models/deepset_gbert-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:28<00:00,  1.79s/it]


Metrics for deepset/gbert-large on cannabis: {'accuracy': 0.9861932938856016, 'precision': 0.9969230769230769, 'recall': 0.9818181818181818, 'f1': 0.9893129770992366}
Average Inference Time per Sample: 0.056221 seconds
STD Inference Time per Sample: 0.000076 seconds
Min Inference Time per Sample: 0.056009 seconds
Max Inference Time per Sample: 0.056345 seconds
Run 4/5
Loading model from ../../models/deepset_gbert-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:28<00:00,  1.79s/it]


Metrics for deepset/gbert-large on cannabis: {'accuracy': 0.9861932938856016, 'precision': 0.9969230769230769, 'recall': 0.9818181818181818, 'f1': 0.9893129770992366}
Average Inference Time per Sample: 0.056236 seconds
STD Inference Time per Sample: 0.000114 seconds
Min Inference Time per Sample: 0.055932 seconds
Max Inference Time per Sample: 0.056414 seconds
Run 5/5
Loading model from ../../models/deepset_gbert-large_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:28<00:00,  1.79s/it]


Metrics for deepset/gbert-large on cannabis: {'accuracy': 0.9861932938856016, 'precision': 0.9969230769230769, 'recall': 0.9818181818181818, 'f1': 0.9893129770992366}
Average Inference Time per Sample: 0.056248 seconds
STD Inference Time per Sample: 0.000096 seconds
Min Inference Time per Sample: 0.055918 seconds
Max Inference Time per Sample: 0.056365 seconds


###### Evaluating model deepset/gbert-base on cannabis ###### 


Run 1/5
Loading model from ../../models/deepset_gbert-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


Metrics for deepset/gbert-base on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.017234 seconds
STD Inference Time per Sample: 0.000027 seconds
Min Inference Time per Sample: 0.017186 seconds
Max Inference Time per Sample: 0.017275 seconds
Run 2/5
Loading model from ../../models/deepset_gbert-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


Metrics for deepset/gbert-base on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.017228 seconds
STD Inference Time per Sample: 0.000036 seconds
Min Inference Time per Sample: 0.017178 seconds
Max Inference Time per Sample: 0.017299 seconds
Run 3/5
Loading model from ../../models/deepset_gbert-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


Metrics for deepset/gbert-base on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.017219 seconds
STD Inference Time per Sample: 0.000050 seconds
Min Inference Time per Sample: 0.017084 seconds
Max Inference Time per Sample: 0.017303 seconds
Run 4/5
Loading model from ../../models/deepset_gbert-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


Metrics for deepset/gbert-base on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.017217 seconds
STD Inference Time per Sample: 0.000035 seconds
Min Inference Time per Sample: 0.017110 seconds
Max Inference Time per Sample: 0.017257 seconds
Run 5/5
Loading model from ../../models/deepset_gbert-base_cannabis_model_url_and_content/


100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


Metrics for deepset/gbert-base on cannabis: {'accuracy': 0.9960552268244576, 'precision': 0.9939759036144579, 'recall': 1.0, 'f1': 0.9969788519637462}
Average Inference Time per Sample: 0.017205 seconds
STD Inference Time per Sample: 0.000034 seconds
Min Inference Time per Sample: 0.017156 seconds
Max Inference Time per Sample: 0.017260 seconds


###### Evaluating model distilbert/distilbert-base-multilingual-cased on kinder ###### 


Run 1/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:02<00:00,  3.62it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on kinder: {'accuracy': 0.9968354430379747, 'precision': 1.0, 'recall': 0.9950980392156863, 'f1': 0.9975429975429976}
Average Inference Time per Sample: 0.008627 seconds
STD Inference Time per Sample: 0.000028 seconds
Min Inference Time per Sample: 0.008581 seconds
Max Inference Time per Sample: 0.008684 seconds
Run 2/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:02<00:00,  3.62it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on kinder: {'accuracy': 0.9968354430379747, 'precision': 1.0, 'recall': 0.9950980392156863, 'f1': 0.9975429975429976}
Average Inference Time per Sample: 0.008618 seconds
STD Inference Time per Sample: 0.000024 seconds
Min Inference Time per Sample: 0.008583 seconds
Max Inference Time per Sample: 0.008666 seconds
Run 3/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:02<00:00,  3.62it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on kinder: {'accuracy': 0.9968354430379747, 'precision': 1.0, 'recall': 0.9950980392156863, 'f1': 0.9975429975429976}
Average Inference Time per Sample: 0.008615 seconds
STD Inference Time per Sample: 0.000022 seconds
Min Inference Time per Sample: 0.008582 seconds
Max Inference Time per Sample: 0.008660 seconds
Run 4/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:02<00:00,  3.62it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on kinder: {'accuracy': 0.9968354430379747, 'precision': 1.0, 'recall': 0.9950980392156863, 'f1': 0.9975429975429976}
Average Inference Time per Sample: 0.008623 seconds
STD Inference Time per Sample: 0.000025 seconds
Min Inference Time per Sample: 0.008588 seconds
Max Inference Time per Sample: 0.008673 seconds
Run 5/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:02<00:00,  3.63it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on kinder: {'accuracy': 0.9968354430379747, 'precision': 1.0, 'recall': 0.9950980392156863, 'f1': 0.9975429975429976}
Average Inference Time per Sample: 0.008625 seconds
STD Inference Time per Sample: 0.000015 seconds
Min Inference Time per Sample: 0.008599 seconds
Max Inference Time per Sample: 0.008644 seconds


###### Evaluating model google-bert/bert-base-multilingual-cased on kinder ###### 


Run 1/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.83it/s]


Metrics for google-bert/bert-base-multilingual-cased on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.017130 seconds
STD Inference Time per Sample: 0.000047 seconds
Min Inference Time per Sample: 0.017066 seconds
Max Inference Time per Sample: 0.017206 seconds
Run 2/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.83it/s]


Metrics for google-bert/bert-base-multilingual-cased on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.017148 seconds
STD Inference Time per Sample: 0.000047 seconds
Min Inference Time per Sample: 0.017051 seconds
Max Inference Time per Sample: 0.017213 seconds
Run 3/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.83it/s]


Metrics for google-bert/bert-base-multilingual-cased on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.017165 seconds
STD Inference Time per Sample: 0.000039 seconds
Min Inference Time per Sample: 0.017107 seconds
Max Inference Time per Sample: 0.017254 seconds
Run 4/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.83it/s]


Metrics for google-bert/bert-base-multilingual-cased on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.017152 seconds
STD Inference Time per Sample: 0.000033 seconds
Min Inference Time per Sample: 0.017123 seconds
Max Inference Time per Sample: 0.017226 seconds
Run 5/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.83it/s]


Metrics for google-bert/bert-base-multilingual-cased on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.017141 seconds
STD Inference Time per Sample: 0.000046 seconds
Min Inference Time per Sample: 0.017047 seconds
Max Inference Time per Sample: 0.017207 seconds


###### Evaluating model FacebookAI/xlm-roberta-base on kinder ###### 


Run 1/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.92it/s]


Metrics for FacebookAI/xlm-roberta-base on kinder: {'accuracy': 0.9936708860759493, 'precision': 0.9950980392156863, 'recall': 0.9950980392156863, 'f1': 0.9950980392156863}
Average Inference Time per Sample: 0.016365 seconds
STD Inference Time per Sample: 0.000042 seconds
Min Inference Time per Sample: 0.016296 seconds
Max Inference Time per Sample: 0.016448 seconds
Run 2/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.92it/s]


Metrics for FacebookAI/xlm-roberta-base on kinder: {'accuracy': 0.9936708860759493, 'precision': 0.9950980392156863, 'recall': 0.9950980392156863, 'f1': 0.9950980392156863}
Average Inference Time per Sample: 0.016352 seconds
STD Inference Time per Sample: 0.000028 seconds
Min Inference Time per Sample: 0.016291 seconds
Max Inference Time per Sample: 0.016391 seconds
Run 3/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.92it/s]


Metrics for FacebookAI/xlm-roberta-base on kinder: {'accuracy': 0.9936708860759493, 'precision': 0.9950980392156863, 'recall': 0.9950980392156863, 'f1': 0.9950980392156863}
Average Inference Time per Sample: 0.016387 seconds
STD Inference Time per Sample: 0.000061 seconds
Min Inference Time per Sample: 0.016314 seconds
Max Inference Time per Sample: 0.016544 seconds
Run 4/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.92it/s]


Metrics for FacebookAI/xlm-roberta-base on kinder: {'accuracy': 0.9936708860759493, 'precision': 0.9950980392156863, 'recall': 0.9950980392156863, 'f1': 0.9950980392156863}
Average Inference Time per Sample: 0.016376 seconds
STD Inference Time per Sample: 0.000017 seconds
Min Inference Time per Sample: 0.016335 seconds
Max Inference Time per Sample: 0.016402 seconds
Run 5/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.92it/s]


Metrics for FacebookAI/xlm-roberta-base on kinder: {'accuracy': 0.9936708860759493, 'precision': 0.9950980392156863, 'recall': 0.9950980392156863, 'f1': 0.9950980392156863}
Average Inference Time per Sample: 0.016347 seconds
STD Inference Time per Sample: 0.000019 seconds
Min Inference Time per Sample: 0.016319 seconds
Max Inference Time per Sample: 0.016373 seconds


###### Evaluating model FacebookAI/xlm-roberta-large on kinder ###### 


Run 1/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:16<00:00,  1.68s/it]


Metrics for FacebookAI/xlm-roberta-large on kinder: {'accuracy': 0.9968354430379747, 'precision': 1.0, 'recall': 0.9950980392156863, 'f1': 0.9975429975429976}
Average Inference Time per Sample: 0.053147 seconds
STD Inference Time per Sample: 0.000167 seconds
Min Inference Time per Sample: 0.052726 seconds
Max Inference Time per Sample: 0.053374 seconds
Run 2/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:16<00:00,  1.68s/it]


Metrics for FacebookAI/xlm-roberta-large on kinder: {'accuracy': 0.9968354430379747, 'precision': 1.0, 'recall': 0.9950980392156863, 'f1': 0.9975429975429976}
Average Inference Time per Sample: 0.053173 seconds
STD Inference Time per Sample: 0.000185 seconds
Min Inference Time per Sample: 0.052653 seconds
Max Inference Time per Sample: 0.053308 seconds
Run 3/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:16<00:00,  1.68s/it]


Metrics for FacebookAI/xlm-roberta-large on kinder: {'accuracy': 0.9968354430379747, 'precision': 1.0, 'recall': 0.9950980392156863, 'f1': 0.9975429975429976}
Average Inference Time per Sample: 0.053197 seconds
STD Inference Time per Sample: 0.000097 seconds
Min Inference Time per Sample: 0.052973 seconds
Max Inference Time per Sample: 0.053311 seconds
Run 4/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:16<00:00,  1.69s/it]


Metrics for FacebookAI/xlm-roberta-large on kinder: {'accuracy': 0.9968354430379747, 'precision': 1.0, 'recall': 0.9950980392156863, 'f1': 0.9975429975429976}
Average Inference Time per Sample: 0.053268 seconds
STD Inference Time per Sample: 0.000122 seconds
Min Inference Time per Sample: 0.052971 seconds
Max Inference Time per Sample: 0.053443 seconds
Run 5/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:16<00:00,  1.68s/it]


Metrics for FacebookAI/xlm-roberta-large on kinder: {'accuracy': 0.9968354430379747, 'precision': 1.0, 'recall': 0.9950980392156863, 'f1': 0.9975429975429976}
Average Inference Time per Sample: 0.053164 seconds
STD Inference Time per Sample: 0.000121 seconds
Min Inference Time per Sample: 0.052862 seconds
Max Inference Time per Sample: 0.053277 seconds


###### Evaluating model dbmdz/bert-base-german-uncased on kinder ###### 


Run 1/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.90it/s]


Metrics for dbmdz/bert-base-german-uncased on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.016527 seconds
STD Inference Time per Sample: 0.000027 seconds
Min Inference Time per Sample: 0.016474 seconds
Max Inference Time per Sample: 0.016572 seconds
Run 2/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.90it/s]


Metrics for dbmdz/bert-base-german-uncased on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.016546 seconds
STD Inference Time per Sample: 0.000036 seconds
Min Inference Time per Sample: 0.016451 seconds
Max Inference Time per Sample: 0.016593 seconds
Run 3/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.90it/s]


Metrics for dbmdz/bert-base-german-uncased on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.016563 seconds
STD Inference Time per Sample: 0.000049 seconds
Min Inference Time per Sample: 0.016439 seconds
Max Inference Time per Sample: 0.016624 seconds
Run 4/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.90it/s]


Metrics for dbmdz/bert-base-german-uncased on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.016548 seconds
STD Inference Time per Sample: 0.000037 seconds
Min Inference Time per Sample: 0.016462 seconds
Max Inference Time per Sample: 0.016605 seconds
Run 5/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.90it/s]


Metrics for dbmdz/bert-base-german-uncased on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.016552 seconds
STD Inference Time per Sample: 0.000033 seconds
Min Inference Time per Sample: 0.016495 seconds
Max Inference Time per Sample: 0.016615 seconds


###### Evaluating model deepset/gelectra-large on kinder ###### 


Run 1/5
Loading model from ../../models/deepset_gelectra-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:17<00:00,  1.74s/it]


Metrics for deepset/gelectra-large on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.054903 seconds
STD Inference Time per Sample: 0.000090 seconds
Min Inference Time per Sample: 0.054718 seconds
Max Inference Time per Sample: 0.055068 seconds
Run 2/5
Loading model from ../../models/deepset_gelectra-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:17<00:00,  1.74s/it]


Metrics for deepset/gelectra-large on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.054920 seconds
STD Inference Time per Sample: 0.000135 seconds
Min Inference Time per Sample: 0.054655 seconds
Max Inference Time per Sample: 0.055151 seconds
Run 3/5
Loading model from ../../models/deepset_gelectra-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:17<00:00,  1.74s/it]


Metrics for deepset/gelectra-large on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.054913 seconds
STD Inference Time per Sample: 0.000098 seconds
Min Inference Time per Sample: 0.054635 seconds
Max Inference Time per Sample: 0.054999 seconds
Run 4/5
Loading model from ../../models/deepset_gelectra-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:17<00:00,  1.74s/it]


Metrics for deepset/gelectra-large on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.054969 seconds
STD Inference Time per Sample: 0.000114 seconds
Min Inference Time per Sample: 0.054844 seconds
Max Inference Time per Sample: 0.055274 seconds
Run 5/5
Loading model from ../../models/deepset_gelectra-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:17<00:00,  1.74s/it]


Metrics for deepset/gelectra-large on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.054920 seconds
STD Inference Time per Sample: 0.000113 seconds
Min Inference Time per Sample: 0.054696 seconds
Max Inference Time per Sample: 0.055072 seconds


###### Evaluating model deepset/gelectra-base on kinder ###### 


Run 1/5
Loading model from ../../models/deepset_gelectra-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.87it/s]


Metrics for deepset/gelectra-base on kinder: {'accuracy': 0.990506329113924, 'precision': 0.9950738916256158, 'recall': 0.9901960784313726, 'f1': 0.9926289926289926}
Average Inference Time per Sample: 0.016805 seconds
STD Inference Time per Sample: 0.000044 seconds
Min Inference Time per Sample: 0.016728 seconds
Max Inference Time per Sample: 0.016857 seconds
Run 2/5
Loading model from ../../models/deepset_gelectra-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.87it/s]


Metrics for deepset/gelectra-base on kinder: {'accuracy': 0.990506329113924, 'precision': 0.9950738916256158, 'recall': 0.9901960784313726, 'f1': 0.9926289926289926}
Average Inference Time per Sample: 0.016814 seconds
STD Inference Time per Sample: 0.000037 seconds
Min Inference Time per Sample: 0.016744 seconds
Max Inference Time per Sample: 0.016862 seconds
Run 3/5
Loading model from ../../models/deepset_gelectra-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.87it/s]


Metrics for deepset/gelectra-base on kinder: {'accuracy': 0.990506329113924, 'precision': 0.9950738916256158, 'recall': 0.9901960784313726, 'f1': 0.9926289926289926}
Average Inference Time per Sample: 0.016789 seconds
STD Inference Time per Sample: 0.000049 seconds
Min Inference Time per Sample: 0.016697 seconds
Max Inference Time per Sample: 0.016864 seconds
Run 4/5
Loading model from ../../models/deepset_gelectra-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.87it/s]


Metrics for deepset/gelectra-base on kinder: {'accuracy': 0.990506329113924, 'precision': 0.9950738916256158, 'recall': 0.9901960784313726, 'f1': 0.9926289926289926}
Average Inference Time per Sample: 0.016797 seconds
STD Inference Time per Sample: 0.000026 seconds
Min Inference Time per Sample: 0.016739 seconds
Max Inference Time per Sample: 0.016834 seconds
Run 5/5
Loading model from ../../models/deepset_gelectra-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.87it/s]


Metrics for deepset/gelectra-base on kinder: {'accuracy': 0.990506329113924, 'precision': 0.9950738916256158, 'recall': 0.9901960784313726, 'f1': 0.9926289926289926}
Average Inference Time per Sample: 0.016793 seconds
STD Inference Time per Sample: 0.000034 seconds
Min Inference Time per Sample: 0.016741 seconds
Max Inference Time per Sample: 0.016841 seconds


###### Evaluating model deepset/gbert-large on kinder ###### 


Run 1/5
Loading model from ../../models/deepset_gbert-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:17<00:00,  1.74s/it]


Metrics for deepset/gbert-large on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.054848 seconds
STD Inference Time per Sample: 0.000139 seconds
Min Inference Time per Sample: 0.054532 seconds
Max Inference Time per Sample: 0.055019 seconds
Run 2/5
Loading model from ../../models/deepset_gbert-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:17<00:00,  1.74s/it]


Metrics for deepset/gbert-large on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.054886 seconds
STD Inference Time per Sample: 0.000140 seconds
Min Inference Time per Sample: 0.054572 seconds
Max Inference Time per Sample: 0.055064 seconds
Run 3/5
Loading model from ../../models/deepset_gbert-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:17<00:00,  1.74s/it]


Metrics for deepset/gbert-large on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.054933 seconds
STD Inference Time per Sample: 0.000110 seconds
Min Inference Time per Sample: 0.054677 seconds
Max Inference Time per Sample: 0.055064 seconds
Run 4/5
Loading model from ../../models/deepset_gbert-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:17<00:00,  1.74s/it]


Metrics for deepset/gbert-large on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.055001 seconds
STD Inference Time per Sample: 0.000062 seconds
Min Inference Time per Sample: 0.054918 seconds
Max Inference Time per Sample: 0.055135 seconds
Run 5/5
Loading model from ../../models/deepset_gbert-large_kinder_model_url_and_content/


100%|██████████| 10/10 [00:17<00:00,  1.74s/it]


Metrics for deepset/gbert-large on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.054960 seconds
STD Inference Time per Sample: 0.000120 seconds
Min Inference Time per Sample: 0.054738 seconds
Max Inference Time per Sample: 0.055194 seconds


###### Evaluating model deepset/gbert-base on kinder ###### 


Run 1/5
Loading model from ../../models/deepset_gbert-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.87it/s]


Metrics for deepset/gbert-base on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.016833 seconds
STD Inference Time per Sample: 0.000040 seconds
Min Inference Time per Sample: 0.016766 seconds
Max Inference Time per Sample: 0.016903 seconds
Run 2/5
Loading model from ../../models/deepset_gbert-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.87it/s]


Metrics for deepset/gbert-base on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.016837 seconds
STD Inference Time per Sample: 0.000040 seconds
Min Inference Time per Sample: 0.016753 seconds
Max Inference Time per Sample: 0.016887 seconds
Run 3/5
Loading model from ../../models/deepset_gbert-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.87it/s]


Metrics for deepset/gbert-base on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.016831 seconds
STD Inference Time per Sample: 0.000047 seconds
Min Inference Time per Sample: 0.016746 seconds
Max Inference Time per Sample: 0.016901 seconds
Run 4/5
Loading model from ../../models/deepset_gbert-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.87it/s]


Metrics for deepset/gbert-base on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.016813 seconds
STD Inference Time per Sample: 0.000055 seconds
Min Inference Time per Sample: 0.016671 seconds
Max Inference Time per Sample: 0.016876 seconds
Run 5/5
Loading model from ../../models/deepset_gbert-base_kinder_model_url_and_content/


100%|██████████| 10/10 [00:05<00:00,  1.87it/s]


Metrics for deepset/gbert-base on kinder: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.016810 seconds
STD Inference Time per Sample: 0.000044 seconds
Min Inference Time per Sample: 0.016716 seconds
Max Inference Time per Sample: 0.016866 seconds


###### Evaluating model distilbert/distilbert-base-multilingual-cased on energie ###### 


Run 1/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_energie_model_url_and_content/


100%|██████████| 19/19 [00:05<00:00,  3.75it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9924812030075187, 'recall': 1.0, 'f1': 0.9962264150943396}
Average Inference Time per Sample: 0.008636 seconds
STD Inference Time per Sample: 0.000071 seconds
Min Inference Time per Sample: 0.008579 seconds
Max Inference Time per Sample: 0.009580 seconds
Run 2/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_energie_model_url_and_content/


100%|██████████| 19/19 [00:05<00:00,  3.76it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9924812030075187, 'recall': 1.0, 'f1': 0.9962264150943396}
Average Inference Time per Sample: 0.008625 seconds
STD Inference Time per Sample: 0.000066 seconds
Min Inference Time per Sample: 0.008578 seconds
Max Inference Time per Sample: 0.009464 seconds
Run 3/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_energie_model_url_and_content/


100%|██████████| 19/19 [00:05<00:00,  3.75it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9924812030075187, 'recall': 1.0, 'f1': 0.9962264150943396}
Average Inference Time per Sample: 0.008632 seconds
STD Inference Time per Sample: 0.000063 seconds
Min Inference Time per Sample: 0.008596 seconds
Max Inference Time per Sample: 0.009461 seconds
Run 4/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_energie_model_url_and_content/


100%|██████████| 19/19 [00:05<00:00,  3.75it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9924812030075187, 'recall': 1.0, 'f1': 0.9962264150943396}
Average Inference Time per Sample: 0.008631 seconds
STD Inference Time per Sample: 0.000047 seconds
Min Inference Time per Sample: 0.008579 seconds
Max Inference Time per Sample: 0.009195 seconds
Run 5/5
Loading model from ../../models/distilbert_distilbert-base-multilingual-cased_energie_model_url_and_content/


100%|██████████| 19/19 [00:05<00:00,  3.76it/s]


Metrics for distilbert/distilbert-base-multilingual-cased on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9924812030075187, 'recall': 1.0, 'f1': 0.9962264150943396}
Average Inference Time per Sample: 0.008629 seconds
STD Inference Time per Sample: 0.000059 seconds
Min Inference Time per Sample: 0.008578 seconds
Max Inference Time per Sample: 0.009368 seconds


###### Evaluating model google-bert/bert-base-multilingual-cased on energie ###### 


Run 1/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_energie_model_url_and_content/


100%|██████████| 19/19 [00:10<00:00,  1.90it/s]


Metrics for google-bert/bert-base-multilingual-cased on energie: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.017180 seconds
STD Inference Time per Sample: 0.000124 seconds
Min Inference Time per Sample: 0.017079 seconds
Max Inference Time per Sample: 0.018794 seconds
Run 2/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_energie_model_url_and_content/


100%|██████████| 19/19 [00:10<00:00,  1.90it/s]


Metrics for google-bert/bert-base-multilingual-cased on energie: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.017189 seconds
STD Inference Time per Sample: 0.000111 seconds
Min Inference Time per Sample: 0.017081 seconds
Max Inference Time per Sample: 0.018633 seconds
Run 3/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_energie_model_url_and_content/


100%|██████████| 19/19 [00:10<00:00,  1.90it/s]


Metrics for google-bert/bert-base-multilingual-cased on energie: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.017190 seconds
STD Inference Time per Sample: 0.000108 seconds
Min Inference Time per Sample: 0.017089 seconds
Max Inference Time per Sample: 0.018593 seconds
Run 4/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_energie_model_url_and_content/


100%|██████████| 19/19 [00:10<00:00,  1.90it/s]


Metrics for google-bert/bert-base-multilingual-cased on energie: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.017185 seconds
STD Inference Time per Sample: 0.000114 seconds
Min Inference Time per Sample: 0.017047 seconds
Max Inference Time per Sample: 0.018677 seconds
Run 5/5
Loading model from ../../models/google-bert_bert-base-multilingual-cased_energie_model_url_and_content/


100%|██████████| 19/19 [00:10<00:00,  1.89it/s]


Metrics for google-bert/bert-base-multilingual-cased on energie: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.017212 seconds
STD Inference Time per Sample: 0.000105 seconds
Min Inference Time per Sample: 0.017149 seconds
Max Inference Time per Sample: 0.018620 seconds


###### Evaluating model FacebookAI/xlm-roberta-base on energie ###### 


Run 1/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:08<00:00,  2.24it/s]


Metrics for FacebookAI/xlm-roberta-base on energie: {'accuracy': 0.9913644214162349, 'precision': 0.9850187265917603, 'recall': 0.9962121212121212, 'f1': 0.9905838041431262}
Average Inference Time per Sample: 0.014563 seconds
STD Inference Time per Sample: 0.000114 seconds
Min Inference Time per Sample: 0.014495 seconds
Max Inference Time per Sample: 0.015749 seconds
Run 2/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:08<00:00,  2.24it/s]


Metrics for FacebookAI/xlm-roberta-base on energie: {'accuracy': 0.9913644214162349, 'precision': 0.9850187265917603, 'recall': 0.9962121212121212, 'f1': 0.9905838041431262}
Average Inference Time per Sample: 0.014541 seconds
STD Inference Time per Sample: 0.000101 seconds
Min Inference Time per Sample: 0.014462 seconds
Max Inference Time per Sample: 0.015872 seconds
Run 3/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:08<00:00,  2.24it/s]


Metrics for FacebookAI/xlm-roberta-base on energie: {'accuracy': 0.9913644214162349, 'precision': 0.9850187265917603, 'recall': 0.9962121212121212, 'f1': 0.9905838041431262}
Average Inference Time per Sample: 0.014535 seconds
STD Inference Time per Sample: 0.000105 seconds
Min Inference Time per Sample: 0.014440 seconds
Max Inference Time per Sample: 0.015928 seconds
Run 4/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:08<00:00,  2.24it/s]


Metrics for FacebookAI/xlm-roberta-base on energie: {'accuracy': 0.9913644214162349, 'precision': 0.9850187265917603, 'recall': 0.9962121212121212, 'f1': 0.9905838041431262}
Average Inference Time per Sample: 0.014532 seconds
STD Inference Time per Sample: 0.000107 seconds
Min Inference Time per Sample: 0.014452 seconds
Max Inference Time per Sample: 0.015932 seconds
Run 5/5
Loading model from ../../models/FacebookAI_xlm-roberta-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:08<00:00,  2.24it/s]


Metrics for FacebookAI/xlm-roberta-base on energie: {'accuracy': 0.9913644214162349, 'precision': 0.9850187265917603, 'recall': 0.9962121212121212, 'f1': 0.9905838041431262}
Average Inference Time per Sample: 0.014551 seconds
STD Inference Time per Sample: 0.000113 seconds
Min Inference Time per Sample: 0.014489 seconds
Max Inference Time per Sample: 0.015755 seconds


###### Evaluating model FacebookAI/xlm-roberta-large on energie ###### 


Run 1/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:27<00:00,  1.46s/it]


Metrics for FacebookAI/xlm-roberta-large on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9962121212121212, 'recall': 0.9962121212121212, 'f1': 0.9962121212121212}
Average Inference Time per Sample: 0.047693 seconds
STD Inference Time per Sample: 0.000372 seconds
Min Inference Time per Sample: 0.047447 seconds
Max Inference Time per Sample: 0.052635 seconds
Run 2/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:27<00:00,  1.46s/it]


Metrics for FacebookAI/xlm-roberta-large on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9962121212121212, 'recall': 0.9962121212121212, 'f1': 0.9962121212121212}
Average Inference Time per Sample: 0.047748 seconds
STD Inference Time per Sample: 0.000302 seconds
Min Inference Time per Sample: 0.047549 seconds
Max Inference Time per Sample: 0.051735 seconds
Run 3/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:27<00:00,  1.46s/it]


Metrics for FacebookAI/xlm-roberta-large on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9962121212121212, 'recall': 0.9962121212121212, 'f1': 0.9962121212121212}
Average Inference Time per Sample: 0.047795 seconds
STD Inference Time per Sample: 0.000282 seconds
Min Inference Time per Sample: 0.047547 seconds
Max Inference Time per Sample: 0.051416 seconds
Run 4/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:27<00:00,  1.46s/it]


Metrics for FacebookAI/xlm-roberta-large on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9962121212121212, 'recall': 0.9962121212121212, 'f1': 0.9962121212121212}
Average Inference Time per Sample: 0.047836 seconds
STD Inference Time per Sample: 0.000320 seconds
Min Inference Time per Sample: 0.047509 seconds
Max Inference Time per Sample: 0.051868 seconds
Run 5/5
Loading model from ../../models/FacebookAI_xlm-roberta-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:27<00:00,  1.46s/it]


Metrics for FacebookAI/xlm-roberta-large on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9962121212121212, 'recall': 0.9962121212121212, 'f1': 0.9962121212121212}
Average Inference Time per Sample: 0.047798 seconds
STD Inference Time per Sample: 0.000288 seconds
Min Inference Time per Sample: 0.047608 seconds
Max Inference Time per Sample: 0.051561 seconds


###### Evaluating model dbmdz/bert-base-german-uncased on energie ###### 


Run 1/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.44it/s]


Metrics for dbmdz/bert-base-german-uncased on energie: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.013351 seconds
STD Inference Time per Sample: 0.000131 seconds
Min Inference Time per Sample: 0.013268 seconds
Max Inference Time per Sample: 0.015127 seconds
Run 2/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.44it/s]


Metrics for dbmdz/bert-base-german-uncased on energie: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.013348 seconds
STD Inference Time per Sample: 0.000133 seconds
Min Inference Time per Sample: 0.013235 seconds
Max Inference Time per Sample: 0.015136 seconds
Run 3/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.44it/s]


Metrics for dbmdz/bert-base-german-uncased on energie: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.013345 seconds
STD Inference Time per Sample: 0.000132 seconds
Min Inference Time per Sample: 0.013249 seconds
Max Inference Time per Sample: 0.015122 seconds
Run 4/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.44it/s]


Metrics for dbmdz/bert-base-german-uncased on energie: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.013343 seconds
STD Inference Time per Sample: 0.000135 seconds
Min Inference Time per Sample: 0.013256 seconds
Max Inference Time per Sample: 0.015164 seconds
Run 5/5
Loading model from ../../models/dbmdz_bert-base-german-uncased_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.44it/s]


Metrics for dbmdz/bert-base-german-uncased on energie: {'accuracy': 1.0, 'precision': 1.0, 'recall': 1.0, 'f1': 1.0}
Average Inference Time per Sample: 0.013338 seconds
STD Inference Time per Sample: 0.000142 seconds
Min Inference Time per Sample: 0.013256 seconds
Max Inference Time per Sample: 0.015269 seconds


###### Evaluating model deepset/gelectra-large on energie ###### 


Run 1/5
Loading model from ../../models/deepset_gelectra-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:26<00:00,  1.37s/it]


Metrics for deepset/gelectra-large on energie: {'accuracy': 0.998272884283247, 'precision': 1.0, 'recall': 0.9962121212121212, 'f1': 0.9981024667931688}
Average Inference Time per Sample: 0.044784 seconds
STD Inference Time per Sample: 0.000290 seconds
Min Inference Time per Sample: 0.044417 seconds
Max Inference Time per Sample: 0.048270 seconds
Run 2/5
Loading model from ../../models/deepset_gelectra-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:26<00:00,  1.37s/it]


Metrics for deepset/gelectra-large on energie: {'accuracy': 0.998272884283247, 'precision': 1.0, 'recall': 0.9962121212121212, 'f1': 0.9981024667931688}
Average Inference Time per Sample: 0.044821 seconds
STD Inference Time per Sample: 0.000341 seconds
Min Inference Time per Sample: 0.044538 seconds
Max Inference Time per Sample: 0.049001 seconds
Run 3/5
Loading model from ../../models/deepset_gelectra-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:26<00:00,  1.37s/it]


Metrics for deepset/gelectra-large on energie: {'accuracy': 0.998272884283247, 'precision': 1.0, 'recall': 0.9962121212121212, 'f1': 0.9981024667931688}
Average Inference Time per Sample: 0.044834 seconds
STD Inference Time per Sample: 0.000260 seconds
Min Inference Time per Sample: 0.044424 seconds
Max Inference Time per Sample: 0.047721 seconds
Run 4/5
Loading model from ../../models/deepset_gelectra-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:26<00:00,  1.37s/it]


Metrics for deepset/gelectra-large on energie: {'accuracy': 0.998272884283247, 'precision': 1.0, 'recall': 0.9962121212121212, 'f1': 0.9981024667931688}
Average Inference Time per Sample: 0.044835 seconds
STD Inference Time per Sample: 0.000248 seconds
Min Inference Time per Sample: 0.044506 seconds
Max Inference Time per Sample: 0.047629 seconds
Run 5/5
Loading model from ../../models/deepset_gelectra-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:26<00:00,  1.37s/it]


Metrics for deepset/gelectra-large on energie: {'accuracy': 0.998272884283247, 'precision': 1.0, 'recall': 0.9962121212121212, 'f1': 0.9981024667931688}
Average Inference Time per Sample: 0.044802 seconds
STD Inference Time per Sample: 0.000241 seconds
Min Inference Time per Sample: 0.044488 seconds
Max Inference Time per Sample: 0.047687 seconds


###### Evaluating model deepset/gelectra-base on energie ###### 


Run 1/5
Loading model from ../../models/deepset_gelectra-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.38it/s]


Metrics for deepset/gelectra-base on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9962121212121212, 'recall': 0.9962121212121212, 'f1': 0.9962121212121212}
Average Inference Time per Sample: 0.013696 seconds
STD Inference Time per Sample: 0.000117 seconds
Min Inference Time per Sample: 0.013618 seconds
Max Inference Time per Sample: 0.015265 seconds
Run 2/5
Loading model from ../../models/deepset_gelectra-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.38it/s]


Metrics for deepset/gelectra-base on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9962121212121212, 'recall': 0.9962121212121212, 'f1': 0.9962121212121212}
Average Inference Time per Sample: 0.013692 seconds
STD Inference Time per Sample: 0.000114 seconds
Min Inference Time per Sample: 0.013632 seconds
Max Inference Time per Sample: 0.015218 seconds
Run 3/5
Loading model from ../../models/deepset_gelectra-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.38it/s]


Metrics for deepset/gelectra-base on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9962121212121212, 'recall': 0.9962121212121212, 'f1': 0.9962121212121212}
Average Inference Time per Sample: 0.013684 seconds
STD Inference Time per Sample: 0.000100 seconds
Min Inference Time per Sample: 0.013615 seconds
Max Inference Time per Sample: 0.015018 seconds
Run 4/5
Loading model from ../../models/deepset_gelectra-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.38it/s]


Metrics for deepset/gelectra-base on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9962121212121212, 'recall': 0.9962121212121212, 'f1': 0.9962121212121212}
Average Inference Time per Sample: 0.013675 seconds
STD Inference Time per Sample: 0.000120 seconds
Min Inference Time per Sample: 0.013612 seconds
Max Inference Time per Sample: 0.015254 seconds
Run 5/5
Loading model from ../../models/deepset_gelectra-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.38it/s]


Metrics for deepset/gelectra-base on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9962121212121212, 'recall': 0.9962121212121212, 'f1': 0.9962121212121212}
Average Inference Time per Sample: 0.013666 seconds
STD Inference Time per Sample: 0.000126 seconds
Min Inference Time per Sample: 0.013608 seconds
Max Inference Time per Sample: 0.015377 seconds


###### Evaluating model deepset/gbert-large on energie ###### 


Run 1/5
Loading model from ../../models/deepset_gbert-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:25<00:00,  1.37s/it]


Metrics for deepset/gbert-large on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9924812030075187, 'recall': 1.0, 'f1': 0.9962264150943396}
Average Inference Time per Sample: 0.044745 seconds
STD Inference Time per Sample: 0.000269 seconds
Min Inference Time per Sample: 0.044423 seconds
Max Inference Time per Sample: 0.047825 seconds
Run 2/5
Loading model from ../../models/deepset_gbert-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:26<00:00,  1.37s/it]


Metrics for deepset/gbert-large on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9924812030075187, 'recall': 1.0, 'f1': 0.9962264150943396}
Average Inference Time per Sample: 0.044796 seconds
STD Inference Time per Sample: 0.000317 seconds
Min Inference Time per Sample: 0.044449 seconds
Max Inference Time per Sample: 0.048717 seconds
Run 3/5
Loading model from ../../models/deepset_gbert-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:26<00:00,  1.37s/it]


Metrics for deepset/gbert-large on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9924812030075187, 'recall': 1.0, 'f1': 0.9962264150943396}
Average Inference Time per Sample: 0.044807 seconds
STD Inference Time per Sample: 0.000256 seconds
Min Inference Time per Sample: 0.044616 seconds
Max Inference Time per Sample: 0.047965 seconds
Run 4/5
Loading model from ../../models/deepset_gbert-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:25<00:00,  1.37s/it]


Metrics for deepset/gbert-large on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9924812030075187, 'recall': 1.0, 'f1': 0.9962264150943396}
Average Inference Time per Sample: 0.044780 seconds
STD Inference Time per Sample: 0.000293 seconds
Min Inference Time per Sample: 0.044521 seconds
Max Inference Time per Sample: 0.048526 seconds
Run 5/5
Loading model from ../../models/deepset_gbert-large_energie_model_url_and_content/


100%|██████████| 19/19 [00:26<00:00,  1.37s/it]


Metrics for deepset/gbert-large on energie: {'accuracy': 0.9965457685664939, 'precision': 0.9924812030075187, 'recall': 1.0, 'f1': 0.9962264150943396}
Average Inference Time per Sample: 0.044816 seconds
STD Inference Time per Sample: 0.000261 seconds
Min Inference Time per Sample: 0.044593 seconds
Max Inference Time per Sample: 0.047863 seconds


###### Evaluating model deepset/gbert-base on energie ###### 


Run 1/5
Loading model from ../../models/deepset_gbert-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:08<00:00,  2.37it/s]


Metrics for deepset/gbert-base on energie: {'accuracy': 0.998272884283247, 'precision': 0.9962264150943396, 'recall': 1.0, 'f1': 0.998109640831758}
Average Inference Time per Sample: 0.013707 seconds
STD Inference Time per Sample: 0.000116 seconds
Min Inference Time per Sample: 0.013615 seconds
Max Inference Time per Sample: 0.015239 seconds
Run 2/5
Loading model from ../../models/deepset_gbert-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.38it/s]


Metrics for deepset/gbert-base on energie: {'accuracy': 0.998272884283247, 'precision': 0.9962264150943396, 'recall': 1.0, 'f1': 0.998109640831758}
Average Inference Time per Sample: 0.013715 seconds
STD Inference Time per Sample: 0.000101 seconds
Min Inference Time per Sample: 0.013672 seconds
Max Inference Time per Sample: 0.015086 seconds
Run 3/5
Loading model from ../../models/deepset_gbert-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:08<00:00,  2.37it/s]


Metrics for deepset/gbert-base on energie: {'accuracy': 0.998272884283247, 'precision': 0.9962264150943396, 'recall': 1.0, 'f1': 0.998109640831758}
Average Inference Time per Sample: 0.013726 seconds
STD Inference Time per Sample: 0.000113 seconds
Min Inference Time per Sample: 0.013648 seconds
Max Inference Time per Sample: 0.015219 seconds
Run 4/5
Loading model from ../../models/deepset_gbert-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:07<00:00,  2.38it/s]


Metrics for deepset/gbert-base on energie: {'accuracy': 0.998272884283247, 'precision': 0.9962264150943396, 'recall': 1.0, 'f1': 0.998109640831758}
Average Inference Time per Sample: 0.013714 seconds
STD Inference Time per Sample: 0.000116 seconds
Min Inference Time per Sample: 0.013654 seconds
Max Inference Time per Sample: 0.015278 seconds
Run 5/5
Loading model from ../../models/deepset_gbert-base_energie_model_url_and_content/


100%|██████████| 19/19 [00:08<00:00,  2.37it/s]

Metrics for deepset/gbert-base on energie: {'accuracy': 0.998272884283247, 'precision': 0.9962264150943396, 'recall': 1.0, 'f1': 0.998109640831758}
Average Inference Time per Sample: 0.013711 seconds
STD Inference Time per Sample: 0.000104 seconds
Min Inference Time per Sample: 0.013627 seconds
Max Inference Time per Sample: 0.015078 seconds





In [14]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

german_stop_words = stopwords.words('german')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/jschelb/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [18]:
import numpy as np
import time
from collections import defaultdict
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix


for topic in TOPICS:
    print(f"\n\n###### Evaluating SVM on {topic} ###### \n\n")
    
    if FEATURES == "url":
        dataset = load_from_disk(
            f"../../{FOLDER_DATA}/tmp/processed_dataset_{topic}_buffed_{SAMPLING}{SUFFIX}")

        if SPLIT == "holdout":
            dataset["holdout"] = concatenate_datasets(
                [dataset["holdout"], dataset["test"]])
        # Extract the path from the URL
        dataset = dataset.map(extract_url_path, num_proc=8)
    else:
        dataset = load_from_disk(
            f"../../{FOLDER_DATA}/tmp/processed_dataset_{topic}_buffed_chunkified_{SAMPLING}{SUFFIX}_{MAX_CONTENT_LENGTH}")

        if SPLIT == "holdout":
            dataset["holdout"] = concatenate_datasets(
                [dataset["holdout"], dataset["test"]])
             
        # Extract the path from the URL
        dataset = dataset.map(extract_url_path)
        
        print(f"Loading dataset for {topic} from {dataset}")
        
        train_data = dataset['train']
        test_data = dataset['test']

        vectorizer = TfidfVectorizer(stop_words=german_stop_words, max_features=10000)

        # Vectorize the training data
        X_train = vectorizer.fit_transform(train_data['text'])

        # Vectorize the test data using the same vectorizer
        X_test = vectorizer.transform(test_data['text'])

        y_train = np.array(train_data['label'])
        y_test = np.array(test_data['label'])

        # Initialize and train the SVM classifier
        svm_classifier = SVC()

        # Train the classifier
        svm_classifier.fit(X_train, y_train)

    
    for i in range(N):
       for i in range(N):
        print(f"Run {i+1}/{N}")
    
        # Measure the total time for predicting the entire test set
        start_time = time.time()
        predictions = svm_classifier.predict(X_test)
        end_time = time.time()

        # Calculate average time per sample
        total_time = end_time - start_time
        num_samples = X_test.shape[0]
        avg_time_per_sample = total_time / num_samples
        runtimes = [avg_time_per_sample] * num_samples
        std_time = np.std(runtimes)
        min_time = np.min(runtimes)
        max_time = np.max(runtimes)
        
        # Collecting metrics
        metrics = classification_report(y_test, predictions, output_dict=True)

        print(f"Metrics for SVM on {topic}: {metrics}")
        print(f'Average Inference Time per Sample: {avg_time_per_sample:.6f} seconds')
        print(f'STD Inference Time per Sample: {std_time:.6f} seconds')
        print(f'Min Inference Time per Sample: {min_time:.6f} seconds')
        print(f'Max Inference Time per Sample: {max_time:.6f} seconds')

        # Update eval_results
        eval_results['svm_classifier'][topic].append({
            'metrics': metrics,
            'avg_time': avg_time_per_sample,
            'std_time': std_time,
            'min_time': min_time,
            'max_time': max_time
        })



###### Evaluating SVM on cannabis ###### 


Loading dataset for cannabis from DatasetDict({
    train: Dataset({
        features: ['_id', 'batch_id', 'domain', 'view_url', 'lang', 'text', 'text_length', 'word_count', 'topic', 'category', 'good_for_training', 'good_for_augmentation', 'annotation_type', 'is_topic', 'label', 'token_count', 'chunk_id', 'url_path'],
        num_rows: 3815
    })
    test: Dataset({
        features: ['_id', 'batch_id', 'domain', 'view_url', 'lang', 'text', 'text_length', 'word_count', 'topic', 'category', 'good_for_training', 'good_for_augmentation', 'annotation_type', 'is_topic', 'label', 'token_count', 'chunk_id', 'url_path'],
        num_rows: 507
    })
    holdout: Dataset({
        features: ['_id', 'batch_id', 'domain', 'view_url', 'lang', 'text', 'text_length', 'word_count', 'topic', 'category', 'good_for_training', 'good_for_augmentation', 'annotation_type', 'is_topic', 'label', 'token_count', 'chunk_id', 'url_path'],
        num_rows: 33702
   

In [27]:
print(eval_results)

defaultdict(<function <lambda> at 0x7fb1d350f370>, {'svm_classifier': defaultdict(<class 'list'>, {'cannabis': [{'metrics': {'0': {'precision': 0.7953488372093023, 'recall': 0.9661016949152542, 'f1-score': 0.8724489795918368, 'support': 177.0}, '1': {'precision': 0.9794520547945206, 'recall': 0.8666666666666667, 'f1-score': 0.9196141479099679, 'support': 330.0}, 'accuracy': 0.9013806706114399, 'macro avg': {'precision': 0.8874004460019114, 'recall': 0.9163841807909605, 'f1-score': 0.8960315637509023, 'support': 507.0}, 'weighted avg': {'precision': 0.9151793338624028, 'recall': 0.9013806706114399, 'f1-score': 0.9031482015740523, 'support': 507.0}}, 'avg_time': 0.0017493927972556571, 'std_time': 0.0004228871436871009, 'min_time': 0.0008242130279541016, 'max_time': 0.008214235305786133}, {'metrics': {'0': {'precision': 0.7953488372093023, 'recall': 0.9661016949152542, 'f1-score': 0.8724489795918368, 'support': 177.0}, '1': {'precision': 0.9794520547945206, 'recall': 0.8666666666666667, '

### Save Chunk Level Predictions and Output Results

In [20]:
from IPython.display import display, HTML
from tabulate import tabulate
import json

In [14]:
# Define the file path to save the dictionary
file_path =f"eval_runtime_{FEATURES}_{SPLIT}_chunks.json"

In [21]:
# Save the dictionary to disk as JSON
with open(file_path, "w") as file:
    json.dump(eval_results, file)

NameError: name 'file_path' is not defined

In [16]:
with open(file_path, "r") as file:
    eval_results = json.load(file)

In [30]:
from collections import defaultdict
from tabulate import tabulate
import numpy as np

# Function to compute average metrics and runtime statistics
def compute_average_metrics(eval_results):
    averaged_results = defaultdict(dict)
    
    for model, topics_metrics in eval_results.items():
        for topic, runs in topics_metrics.items():
            avg_metrics = defaultdict(list)
            for run in runs:
                metrics = run.get('metrics', {})
                for key, value in metrics.items():
                    if isinstance(value, (int, float)):  # Ensure the value is numeric
                        avg_metrics[key].append(value)
                avg_metrics['avg_time'].append(run.get('avg_time', 0))
                avg_metrics['std_time'].append(run.get('std_time', 0))
                avg_metrics['min_time'].append(run.get('min_time', 0))
                avg_metrics['max_time'].append(run.get('max_time', 0))
            
            # Calculate means only if there are values to avoid empty list issues
            averaged_results[model][topic] = {metric: np.mean(values) for metric, values in avg_metrics.items() if len(values) > 0 and isinstance(values[0], (int, float))}
            averaged_results[model][topic].update({
                'avg_time_std': np.std(avg_metrics['avg_time']) if len(avg_metrics['avg_time']) > 0 else 0,
                'std_time_std': np.std(avg_metrics['std_time']) if len(avg_metrics['std_time']) > 0 else 0,
                'min_time_std': np.std(avg_metrics['min_time']) if len(avg_metrics['min_time']) > 0 else 0,
                'max_time_std': np.std(avg_metrics['max_time']) if len(avg_metrics['max_time']) > 0 else 0,
            })
    
    return averaged_results

# Compute average metrics
averaged_eval_results = compute_average_metrics(eval_results)

# Identify all topics (assuming all models are evaluated on the same topics)
topics = list(next(iter(averaged_eval_results.values())).keys())

# Prepare headers for the table: each topic will have six metrics
headers = ["Model"] + [f"{topic} {metric}" for topic in topics for metric in ["Acc.", "Prec.", "Rec.", "F1", "Avg. Time", "STD Time"]]

# Prepare rows: one row per model, containing metrics for each topic
rows = []
for model, topics_metrics in averaged_eval_results.items():
    row = [model]  # Start with the model name
    for topic in topics:
        metrics = topics_metrics.get(topic, {})
        row.extend([
            metrics.get('accuracy', 0.0),
            metrics.get('precision', 0.0),
            metrics.get('recall', 0.0),
            metrics.get('f1-score', 0.0),  # Assuming the key for F1 score is 'f1-score'
            metrics.get('avg_time', 0.0),
            metrics.get('std_time', 0.0)
        ])
    rows.append(row)

# Generate the HTML table
table_html = tabulate(rows, headers=headers, tablefmt="html", showindex="never", floatfmt=".3f")

print(table_html)  # This will print the HTML table; in a real application, you might save this to an HTML file.


<table>
<thead>
<tr><th>Model         </th><th style="text-align: right;">  cannabis Acc.</th><th style="text-align: right;">  cannabis Prec.</th><th style="text-align: right;">  cannabis Rec.</th><th style="text-align: right;">  cannabis F1</th><th style="text-align: right;">  cannabis Avg. Time</th><th style="text-align: right;">  cannabis STD Time</th><th style="text-align: right;">  kinder Acc.</th><th style="text-align: right;">  kinder Prec.</th><th style="text-align: right;">  kinder Rec.</th><th style="text-align: right;">  kinder F1</th><th style="text-align: right;">  kinder Avg. Time</th><th style="text-align: right;">  kinder STD Time</th><th style="text-align: right;">  energie Acc.</th><th style="text-align: right;">  energie Prec.</th><th style="text-align: right;">  energie Rec.</th><th style="text-align: right;">  energie F1</th><th style="text-align: right;">  energie Avg. Time</th><th style="text-align: right;">  energie STD Time</th></tr>
</thead>
<tbody>
<tr><td>sv

In [29]:
from IPython.display import display, HTML
display(HTML(table_html))


Model,cannabis Acc.,cannabis Prec.,cannabis Rec.,cannabis F1,cannabis Avg. Time,cannabis STD Time,kinder Acc.,kinder Prec.,kinder Rec.,kinder F1,kinder Avg. Time,kinder STD Time,energie Acc.,energie Prec.,energie Rec.,energie F1,energie Avg. Time,energie STD Time
svm_classifier,0.901,0.0,0.0,0.0,0.001,0.0,0.953,0.0,0.0,0.0,0.001,0.0,0.945,0.0,0.0,0.0,0.001,0.0


In [19]:
# Compute average metrics
averaged_eval_results = compute_average_metrics(eval_results)

# Prepare headers for the table
headers = ["Model", "Avg. F1 Score", "Avg. Runtime", "Runtime STD"]

# Prepare rows: one row per model, containing average F1 score and average runtime
rows = []
for model, topics_metrics in averaged_eval_results.items():
    avg_f1_scores = []
    avg_times = []
    for topic, metrics in topics_metrics.items():
        avg_f1_scores.append(metrics.get('f1', 0.0))
        avg_times.append(metrics.get('avg_time', 0.0))
    
    avg_f1_score = np.mean(avg_f1_scores)
    avg_runtime = np.mean(avg_times)
    runtime_std = np.std(avg_times)
    
    row = [model, avg_f1_score, avg_runtime, runtime_std]
    rows.append(row)

# Generate the HTML table
table_html = tabulate(rows, headers=headers, tablefmt="html", showindex="never", floatfmt=".3f")

In [20]:
from IPython.display import display, HTML
display(HTML(table_html))

Model,Avg. F1 Score,Avg. Runtime,Runtime STD
distilbert/distilbert-base-multilingual-cased,0.997,0.008,0.0
google-bert/bert-base-multilingual-cased,0.999,0.017,0.001
FacebookAI/xlm-roberta-base,0.994,0.016,0.001
FacebookAI/xlm-roberta-large,0.997,0.051,0.002
dbmdz/bert-base-german-uncased,0.999,0.015,0.001
deepset/gelectra-large,0.998,0.052,0.005
deepset/gelectra-base,0.993,0.016,0.002
deepset/gbert-large,0.995,0.052,0.005
deepset/gbert-base,0.998,0.016,0.002
