In [1]:
import json
import os
from gliner import GLiNER
import torch
# @title Fast Mertics

base_dir = 'ood_data'
# Load the test.json file
with open(f'{base_dir}/test.json', 'r') as file:
    test_data = json.load(file)

# load the test_data_baseline.json file
with open(f'{base_dir}/test_data_baseline.json', 'r') as file:
    test_data_baseline = json.load(file)

In [2]:
# Extract all labels from each example
from utils import get_all_labels

all_test_labels = get_all_labels(test_data)
all_test_baseline_labels = get_all_labels(test_data_baseline)


device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
device, all_test_labels, all_test_baseline_labels

(device(type='cuda', index=0),
 ['a_name', 'a_organisation', 'a_place'],
 ['person', 'organization', 'place'])

In [5]:
model_anonv0_path = 'models_anonymizer/AnonymizerV0_gliner-multitask-large-v0.5'

model_anonv0 = GLiNER.from_pretrained(model_anonv0_path, load_tokenizer=True, local_files_only=True)
model_anonv0 = model_anonv0.to(device)

config.json not found in /home/ubuntu/mert/dslab/models_anonymizer/AnonymizerV0_gliner-multitask-large-v0.5
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
results, f1 = model_anonv0.evaluate(test_data, flat_ner=True, threshold=0.5, batch_size=1, entity_types=all_test_labels)
output_info = f"F1 Score: {f1:.2f}" + "\n" + results
print(output_info)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


F1 Score: 0.43
P: 67.86%	R: 31.15%	F1: 42.70%



In [7]:
model_baseline1 = GLiNER.from_pretrained('knowledgator/gliner-multitask-large-v0.5')
model_baseline1 = model_baseline1.to(device)

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]



In [8]:
results, f1 = model_baseline1.evaluate(test_data_baseline, flat_ner=True, threshold=0.5, batch_size=1, entity_types=all_test_baseline_labels)
output_info = f"F1 Score: {f1:.2f}" + "\n" + results
print(output_info)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


F1 Score: 0.66
P: 59.21%	R: 73.77%	F1: 65.69%



In [9]:
# urchade/gliner_multi_pii-v1
model_baseline2 = GLiNER.from_pretrained('urchade/gliner_multi_pii-v1')
model_baseline2 = model_baseline2.to(device)

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

In [10]:
results, f1 = model_baseline2.evaluate(test_data_baseline, flat_ner=True, threshold=0.5, batch_size=1, entity_types=all_test_baseline_labels)
output_info = f"F1 Score: {f1:.2f}" + "\n" + results
print(output_info)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


F1 Score: 0.75
P: 66.67%	R: 85.25%	F1: 74.82%



# Do Inference on one sample

In [8]:
from utils import join_tokens
from utils import view

example_index = 1
text = join_tokens(test_data[example_index]["tokenized_text"])

view(test_data[example_index]["tokenized_text"], test_data[example_index]["ner"])


a_organisation: ['Gitarren', 'AG']
a_place: ['Brugg']


In [14]:
example_index = 15

text = join_tokens(test_data[example_index]["tokenized_text"])
expected_ner = test_data[example_index]["ner"]

# Labels for entity prediction
labels = all_test_labels
print(labels)

# Perform entity prediction
entities = model_anonv0.predict_entities(text, labels, threshold=0.5)

# Display predicted entities and their labels
print("Expected Entities:")
view(test_data[example_index]["tokenized_text"], test_data[example_index]["ner"])

print("\nPredicted Entities:")
for entity in entities:
    print(entity["text"], "=>", entity["label"])

['a_name', 'a_organisation', 'a_place']
Expected Entities:
a_place: ['Paris']
a_name: ['Mark', 'Schneider']

Predicted Entities:
Mark Schneider => a_name


In [15]:
# count all labels in the test dataset
def count_labels(data):
    label_count = {}
    for example in data:
        ner_data = example.get("ner", [])
        for entity in ner_data:
            label = entity[2]  # Assuming the label is the third element in the entity list
            if label not in label_count:
                label_count[label] = 0
            label_count[label] += 1
    return label_count

label_count = count_labels(test_data)
label_count

{'a_name': 76, 'a_organisation': 21, 'a_place': 31}

In [17]:
from utils import create_test_data_for_label

# create test data for each specific labels
labelled_test_data = dict() 
for label in all_test_labels:
    labelled_test_data[label] = create_test_data_for_label(test_data, label)

for k,v in labelled_test_data.items():
    print(k, len(v))

a_name 45
a_organisation 20
a_place 20


In [18]:
labelled_test_data['a_place'][0]

{'tokenized_text': ['Thomas',
  'arbeitet',
  'bei',
  'der',
  'SBB',
  'in',
  'Aarau',
  '.',
  'Er',
  'wurde',
  'vor',
  'kurzem',
  'nach',
  'Sch',
  'ö',
  'nenwerd',
  'versetzt',
  '.'],
 'ner': [[6, 6, 'a_place'], [13, 15, 'a_place']]}

In [21]:
# run evaluation for each label
results = {}

for label, data in labelled_test_data.items():
    results[label], f1 = model_anonv0.evaluate(data, flat_ner=True, threshold=0.5, batch_size=1, entity_types=[label])
    print(f"Label: {label}, Results: {results[label]}")


Label: a_name, Results: P: 80.95%	R: 47.22%	F1: 59.65%

Label: a_organisation, Results: P: 11.76%	R: 9.52%	F1: 10.53%

Label: a_place, Results: P: 64.29%	R: 31.03%	F1: 41.86%



In [53]:
# run evaluation for each label
results = {}

for label, data in labelled_test_data.items():
    threshold = 0.5
    print(f"Label: {label}")
    if label == "a_name":
        continue
    else:
        threshold = 0.95
    results[label], f1 = model_anonv0.evaluate(data, flat_ner=True, threshold=threshold, batch_size=1, entity_types=[label])
    print(f"Results: {results[label]}")


Label: a_name
Label: a_organisation
Results: P: 97.15%	R: 99.09%	F1: 98.11%

Label: a_place
Results: P: 88.28%	R: 98.10%	F1: 92.93%



: 

In [40]:
import pandas as pd
import ast

ood_data = dict()
keys = ['french', 'german', 'italian']

for key in keys:
    ood_data[key] = pd.read_csv(f'data/{key}_ood.csv')
    # read entities and labels with ast.literal_eval
    ood_data[key]['entities'] = ood_data[key]['entities'].apply(lambda x: ast.literal_eval(x))
    ood_data[key]['labels'] = ood_data[key]['labels'].apply(lambda x: ast.literal_eval(x))

ood_data['french'].head()

Unnamed: 0,text,entities,labels
0,La société Énergie Verte SA et l'association S...,"[Énergie Verte SA, Solidarité Environnement, L...","[a_organisation, a_organisation, a_place]"
1,Madame Claire Dupont et Monsieur Jean-Louis Ma...,"[Claire Dupont, Jean-Louis Martin]","[a_name, a_name]"
2,Un contrat a été signé entre l'organisation Mé...,"[Médecins Sans Frontières, Conseil Municipal d...","[a_organisation, a_place]"
3,"Monsieur Paul Durand, représentant de l'associ...","[Paul Durand, Culture et Patrimoine]","[a_name, a_organisation]"
4,La Fondation pour la Recherche Médicale et l'u...,"[Fondation pour la Recherche Médicale, univers...","[a_organisation, a_organisation, a_place]"


In [42]:
# take 5 random samples from each ood dataset and test the model

for key, data in ood_data.items():
    print(f"Key: {key}")
    data = data.sample(5)
    for index, row in data.iterrows():
        text = row['text']
        labels = row['labels']
        entities = model_anonv0.predict_entities(text, labels, threshold=0.5)
        print(f"Text: {text}")
        print("Predicted Entities:")
        for entity in entities:
            print(entity["text"], "=>", entity["label"])
        print("True Entities:")
        for entry in row['entities']:
            print(entry)
        print("\n")

Key: french
Text: Madame Anne Leroy et Monsieur Jacques Rolland ont été choisis pour présider le comité de l'événement.
Predicted Entities:
Anne Leroy => a_name
Jacques Rolland => a_name
True Entities:
Anne Leroy
Jacques Rolland


Text: La collaboration entre le Parc National des Pyrénées et le Ministère de la Culture a été officialisée.
Predicted Entities:
True Entities:
Parc National des Pyrénées
Ministère de la Culture


Text: Le contrat entre l'entreprise SolarFrance et la Région PACA a été finalisé.
Predicted Entities:
SolarFrance => a_organisation
True Entities:
SolarFrance
Région PACA


Text: Le groupe Développement Durable et la ville de Rouen ont validé leur partenariat pour l'année à venir.
Predicted Entities:
Développement Durable => a_organisation
Rouen => a_place
True Entities:
Développement Durable
ville de Rouen
Rouen


Text: Le contrat entre l'association Humanité et Progrès et l'Université de Strasbourg a été ratifié sans amendement.
Predicted Entities:
Strasbourg => a