In [1]:
import os
import re

import pandas as pd

from dataclasses import dataclass
from tabulate import tabulate
from huggingface_hub import (
    login,
    HfApi,
    list_models,
    hf_hub_download,
    ModelCard,
    ModelCardData,
    update_repo_visibility
)

In [2]:
# Hub Login
hf_token = os.environ.get("HF_TOKEN")

login(token=hf_token, add_to_git_credential=False)
api = HfApi()

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/stefan/.cache/huggingface/token
Login successful


In [3]:
model_search_string = "flair-co-funer-german_dbmdz_bert_base-"
trained_models = list(list_models(author="stefan-it", search=model_search_string))

In [4]:
# Display model ids

model_ids = [model.modelId for model in trained_models]
print("\n".join(model_ids))

stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr3e-05-1
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr5e-05-1
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr3e-05-1
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr5e-05-1
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr3e-05-2
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr5e-05-2
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr3e-05-2
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr5e-05-2
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr3e-05-3
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr5e-05-3
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr3e-05-3
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr5e-05-3
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr3e-05-4
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr5e-05-4
stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr3e-05-4
st

In [5]:
hyper_parameter_regex = re.compile(".*(?P<batch_size>bs.*?)-(?P<epoch>e.*?)-(?P<learning_rate>lr.*?)-(?P<seed>\d+)$")

@dataclass
class ModelInfo:
    model_id: str
    dev_f1_score: float
    batch_size: int
    epoch: int
    learning_rate: float
    seed: int

In [6]:
# Get best model score from training log
def get_best_model_score(input_file: str) -> float:
    all_dev_results = []
    with open(input_file, "rt") as f_p:
        for line in f_p:
            line = line.rstrip()
            if "f1-score (micro avg)" in line:
                dev_result = line.split(" ")[-1]
                all_dev_results.append(dev_result)

        return max([float(value) for value in all_dev_results])

In [7]:
# Get best f1-score from Flair training log for each model
# and construct nice ModelInfo instance
model_infos = []

hyper_params = ["batch_size", "epoch", "learning_rate", "seed"]

for model_id in model_ids:
    # Fetch Flair training log
    training_log_file = hf_hub_download(repo_id=model_id, filename="training.log")
    dev_f1_score = get_best_model_score(training_log_file)
    
    matches = hyper_parameter_regex.match(model_id)
    
    params = [matches.group(hyper_param) for hyper_param in hyper_params]
    model_info_args = (model_id, dev_f1_score, *params)

    model_infos.append(ModelInfo(*model_info_args))

training.log:   0%|          | 0.00/26.6k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/26.7k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/23.8k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/23.7k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/26.6k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/26.6k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/23.8k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/23.8k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/26.7k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/26.7k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/23.7k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/23.7k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/26.7k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/26.5k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/23.8k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/23.7k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/26.7k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/26.6k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/23.8k [00:00<?, ?B/s]

training.log:   0%|          | 0.00/23.8k [00:00<?, ?B/s]

In [8]:
print(model_infos)

[ModelInfo(model_id='stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr3e-05-1', dev_f1_score=0.9141, batch_size='bs16', epoch='e10', learning_rate='lr3e-05', seed='1'), ModelInfo(model_id='stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr5e-05-1', dev_f1_score=0.927, batch_size='bs16', epoch='e10', learning_rate='lr5e-05', seed='1'), ModelInfo(model_id='stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr3e-05-1', dev_f1_score=0.9336, batch_size='bs8', epoch='e10', learning_rate='lr3e-05', seed='1'), ModelInfo(model_id='stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr5e-05-1', dev_f1_score=0.9378, batch_size='bs8', epoch='e10', learning_rate='lr5e-05', seed='1'), ModelInfo(model_id='stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr3e-05-2', dev_f1_score=0.9321, batch_size='bs16', epoch='e10', learning_rate='lr3e-05', seed='2'), ModelInfo(model_id='stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr5e-05-2', dev_f1_score=0.9341, batc

In [9]:
# Find out all hyper-parameter configurations: (Batch Size, Epoch, Learning Rate)

configurations = set([
    f"{model_info.batch_size}-{model_info.epoch}-{model_info.learning_rate}" for model_info in model_infos
])

print(configurations)

{'bs16-e10-lr5e-05', 'bs16-e10-lr3e-05', 'bs8-e10-lr5e-05', 'bs8-e10-lr3e-05'}


In [10]:
df_data = [
    [configuration, model_info.dev_f1_score
    ]
    for model_info in model_infos
    for configuration in configurations
    
    if f"{model_info.batch_size}-{model_info.epoch}-{model_info.learning_rate}" == configuration
]

In [11]:
df = pd.DataFrame(df_data, columns=["Configuration", "F1-Score"])
print(df)

       Configuration  F1-Score
0   bs16-e10-lr3e-05    0.9141
1   bs16-e10-lr5e-05    0.9270
2    bs8-e10-lr3e-05    0.9336
3    bs8-e10-lr5e-05    0.9378
4   bs16-e10-lr3e-05    0.9321
5   bs16-e10-lr5e-05    0.9341
6    bs8-e10-lr3e-05    0.9366
7    bs8-e10-lr5e-05    0.9280
8   bs16-e10-lr3e-05    0.9175
9   bs16-e10-lr5e-05    0.9372
10   bs8-e10-lr3e-05    0.9299
11   bs8-e10-lr5e-05    0.9383
12  bs16-e10-lr3e-05    0.9391
13  bs16-e10-lr5e-05    0.9283
14   bs8-e10-lr3e-05    0.9417
15   bs8-e10-lr5e-05    0.9374
16  bs16-e10-lr3e-05    0.9177
17  bs16-e10-lr5e-05    0.9329
18   bs8-e10-lr3e-05    0.9281
19   bs8-e10-lr5e-05    0.9364


In [12]:
df = df[df.duplicated("Configuration", keep=False)].groupby("Configuration")["F1-Score"].apply(list).reset_index()

In [13]:
print(df)

      Configuration                                  F1-Score
0  bs16-e10-lr3e-05  [0.9141, 0.9321, 0.9175, 0.9391, 0.9177]
1  bs16-e10-lr5e-05   [0.927, 0.9341, 0.9372, 0.9283, 0.9329]
2   bs8-e10-lr3e-05  [0.9336, 0.9366, 0.9299, 0.9417, 0.9281]
3   bs8-e10-lr5e-05   [0.9378, 0.928, 0.9383, 0.9374, 0.9364]


In [14]:
# How many seeds do we have?
number_seeds = len(df["F1-Score"].to_list()[0])
seed_columns = [f"Seed {index + 1}" for index in range(number_seeds)]

split = pd.DataFrame(df["F1-Score"].to_list(), columns=seed_columns)

In [15]:
print(split)

   Seed 1  Seed 2  Seed 3  Seed 4  Seed 5
0  0.9141  0.9321  0.9175  0.9391  0.9177
1  0.9270  0.9341  0.9372  0.9283  0.9329
2  0.9336  0.9366  0.9299  0.9417  0.9281
3  0.9378  0.9280  0.9383  0.9374  0.9364


In [16]:
final_df = pd.concat([df, split], axis=1)
final_df = final_df.drop("F1-Score", axis=1)
final_df["Average"] = final_df[seed_columns].mean(axis=1).round(4)
final_df["Std."] = final_df[seed_columns].std(axis=1).round(4)

final_df.sort_values(by="Average", ascending=False, inplace=True)

print(final_df)

      Configuration  Seed 1  Seed 2  Seed 3  Seed 4  Seed 5  Average    Std.
3   bs8-e10-lr5e-05  0.9378  0.9280  0.9383  0.9374  0.9364   0.9356  0.0043
2   bs8-e10-lr3e-05  0.9336  0.9366  0.9299  0.9417  0.9281   0.9340  0.0054
1  bs16-e10-lr5e-05  0.9270  0.9341  0.9372  0.9283  0.9329   0.9319  0.0042
0  bs16-e10-lr3e-05  0.9141  0.9321  0.9175  0.9391  0.9177   0.9241  0.0109


In [17]:
print(final_df.to_markdown(index=False))

| Configuration    |   Seed 1 |   Seed 2 |   Seed 3 |   Seed 4 |   Seed 5 |   Average |   Std. |
|:-----------------|---------:|---------:|---------:|---------:|---------:|----------:|-------:|
| bs8-e10-lr5e-05  |   0.9378 |   0.928  |   0.9383 |   0.9374 |   0.9364 |    0.9356 | 0.0043 |
| bs8-e10-lr3e-05  |   0.9336 |   0.9366 |   0.9299 |   0.9417 |   0.9281 |    0.934  | 0.0054 |
| bs16-e10-lr5e-05 |   0.927  |   0.9341 |   0.9372 |   0.9283 |   0.9329 |    0.9319 | 0.0042 |
| bs16-e10-lr3e-05 |   0.9141 |   0.9321 |   0.9175 |   0.9391 |   0.9177 |    0.9241 | 0.0109 |


In [18]:
# But we need results table with nice references to actual models on the hub
# Including highlighted F1-Score (bold) of current model
def get_results_table(final_df, model_infos, current_model_info):
    headers = final_df.columns.tolist()
    
    sorted_configurations = final_df["Configuration"].tolist()
    sorted_averages = final_df["Average"].tolist()
    sorted_stds = final_df["Std."].tolist()
    
    rows = []
    
    ref_counter = 1
    ref_list = []
    
    for sorted_configuration, sorted_average, sorted_std in zip(sorted_configurations, sorted_averages, sorted_stds):
        current_row = [f"`{sorted_configuration}`"]
        
        for model_info in model_infos:
            if f"{model_info.batch_size}-{model_info.epoch}-{model_info.learning_rate}" == sorted_configuration:
                # Model belongs to current configuration
                # Score + Model link looks like: [0.8585][4]
                seed_entry = f"[{model_info.dev_f1_score}][{ref_counter}]"
                
                # Special case: model is current model, so we need to bold it
                if model_info == current_model_info:
                    seed_entry = f"[**{model_info.dev_f1_score}**][{ref_counter}]"
    
                current_row.append(seed_entry)
    
                ref_list.append(f"[{ref_counter}]: https://hf.co/{model_info.model_id}")
                ref_counter += 1
    
        current_row.append(f"{sorted_average} ± {sorted_std}")
        
        rows.append(current_row)
    
    #print(final_df["Configuration"].tolist())

    results_table = tabulate(rows, headers=headers, tablefmt="github") + "\n\n" + "\n".join(ref_list)
    
    return results_table
    
# Test it:
test_results_table = get_results_table(final_df, model_infos, model_infos[4])
print(model_infos[4])
print("\nDevelopment Results Table:\n")
print(test_results_table)

ModelInfo(model_id='stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr3e-05-2', dev_f1_score=0.9321, batch_size='bs16', epoch='e10', learning_rate='lr3e-05', seed='2')

Development Results Table:

| Configuration      | Seed 1       | Seed 2           | Seed 3       | Seed 4       | Seed 5       | Average         |
|--------------------|--------------|------------------|--------------|--------------|--------------|-----------------|
| `bs8-e10-lr5e-05`  | [0.9378][1]  | [0.928][2]       | [0.9383][3]  | [0.9374][4]  | [0.9364][5]  | 0.9356 ± 0.0043 |
| `bs8-e10-lr3e-05`  | [0.9336][6]  | [0.9366][7]      | [0.9299][8]  | [0.9417][9]  | [0.9281][10] | 0.934 ± 0.0054  |
| `bs16-e10-lr5e-05` | [0.927][11]  | [0.9341][12]     | [0.9372][13] | [0.9283][14] | [0.9329][15] | 0.9319 ± 0.0042 |
| `bs16-e10-lr3e-05` | [0.9141][16] | [**0.9321**][17] | [0.9175][18] | [0.9391][19] | [0.9177][20] | 0.9241 ± 0.0109 |

[1]: https://hf.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e1

In [19]:
#base_model = "deepset/gbert-base"
#base_model_short = "GBERT Base"

base_model = "dbmdz/bert-base-german-cased"
base_model_short = "German DBMDZ BERT"

# Determine them automatically
batch_sizes = set([
    model_info.batch_size.replace("bs", "") for model_info in model_infos
])

learning_rates = set([
    model_info.learning_rate.replace("lr", "") for model_info in model_infos
])

widget_text = """
Wesentliche Tätigkeiten der Compliance-Funktion wurden an die Mercurtainment AG , Düsseldorf , ausgelagert .
"""

In [20]:
# Now let's create model cards for all fine-tuned models 🤗
# Use this to create follow-up PRs when modifications are made in model card template!

commit_message = "readme: add initial version of model card"
commit_description = "Hey,\n\nthis PR adds the initial version of model card."
create_pr = True

for model in model_infos:
    current_results_table = get_results_table(final_df, model_infos, model)
    card_data = ModelCardData()
    card = ModelCard.from_template(card_data, template_path="model_card_template.md",
                                   base_model=base_model,
                                   base_model_short=base_model_short,
                                   batch_sizes=f'[{", ".join([f"`{bs}`" for bs in batch_sizes ])}]',
                                   learning_rates=f'[{", ".join([f"`{lr}`" for lr in learning_rates ])}]',
                                   results=current_results_table,
                                   widget_text=widget_text.strip()
                                  )

    commit_url = card.push_to_hub(repo_id=model.model_id,
                                  create_pr=create_pr,
                                  commit_message=commit_message,
                                  commit_description=commit_description)
    
    print(commit_url + "\n")

https://huggingface.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr3e-05-1/blob/refs%2Fpr%2F1/README.md

https://huggingface.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr5e-05-1/blob/refs%2Fpr%2F1/README.md

https://huggingface.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr3e-05-1/blob/refs%2Fpr%2F1/README.md

https://huggingface.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr5e-05-1/blob/refs%2Fpr%2F1/README.md

https://huggingface.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr3e-05-2/blob/refs%2Fpr%2F1/README.md

https://huggingface.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr5e-05-2/blob/refs%2Fpr%2F1/README.md

https://huggingface.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr3e-05-2/blob/refs%2Fpr%2F1/README.md

https://huggingface.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr5e-05-2/blob/refs%2Fpr%2F1/README.md

https://huggingface.co/stefan-it/flair-co-fu

In [21]:
# Get preview of last created model hub card
print(card)

---
language: de
license: mit
tags:
- flair
- token-classification
- sequence-tagger-model
- hetzner
- hetzner-gex44
- hetzner-gpu
base_model: dbmdz/bert-base-german-cased
widget:
- text: Wesentliche Tätigkeiten der Compliance-Funktion wurden an die Mercurtainment
    AG , Düsseldorf , ausgelagert .
---

# Fine-tuned Flair Model on CO-Fun NER Dataset

This Flair model was fine-tuned on the
[CO-Fun](https://arxiv.org/abs/2403.15322) NER Dataset using German DBMDZ BERT as backbone LM.

## Dataset

The [Company Outsourcing in Fund Prospectuses (CO-Fun) dataset](https://arxiv.org/abs/2403.15322) consists of
948 sentences with 5,969 named entity annotations, including 2,340 Outsourced Services, 2,024 Companies, 1,594 Locations
and 11 Software annotations.

Overall, the following named entities are annotated:

* `Auslagerung` (engl. outsourcing)
* `Unternehmen` (engl. company)
* `Ort` (engl. location)
* `Software`

## Fine-Tuning

The latest [Flair version](https://github.com/flairNLP/flair/

In [22]:
# Now make repositories publicly visible
for model in model_infos:
    print(f"Update visibility to True for repo https://hf.co/{model.model_id}")
    update_repo_visibility(repo_id=model.model_id, private=False)

Update visibility to True for repo https://hf.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr3e-05-1
Update visibility to True for repo https://hf.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr5e-05-1
Update visibility to True for repo https://hf.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr3e-05-1
Update visibility to True for repo https://hf.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr5e-05-1
Update visibility to True for repo https://hf.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr3e-05-2
Update visibility to True for repo https://hf.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs16-e10-lr5e-05-2
Update visibility to True for repo https://hf.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr3e-05-2
Update visibility to True for repo https://hf.co/stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr5e-05-2
Update visibility to True for repo https://hf.co/stefan-it/flair-co-funer-ge