In [2]:
import estnltk
import os
import sys
import time
import torch

import pandas as pd

from estnltk import Text
from estnltk.converters import text_to_json, json_to_text

In [6]:
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.device_count())  # Number of GPUs available
print(torch.cuda.get_device_name(0))  # Name of the first GPU

True
1
NVIDIA GeForce RTX 3080


In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob

# Create directory for saving graphs
os.makedirs('entity_graphs', exist_ok=True)

# Find all CSV result files
csv_files = glob.glob('final_results/clean_fixed/results_*.csv')
print(f"Found {len(csv_files)} CSV files")

# Process each CSV file separately
for csv_file in csv_files:
    # Extract test set name from filename (model_YYYY-MM-DD)
    test_set = os.path.basename(csv_file).replace('results_model_', '').replace('.csv', '')
    if not test_set:
        test_set = os.path.basename(csv_file).replace('results_', '').replace('.csv', '')
    
    print(f"Processing test set: {test_set}")
    
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file)
        
        if df.empty:
            print(f"Warning: Empty dataframe for {csv_file}")
            continue
        
        # Create directory for this test set
        test_set_dir = f'entity_graphs_clean_fixed/{test_set}'
        os.makedirs(test_set_dir, exist_ok=True)
        
        # 1. Overall F1 scores across models
        overall_df = df[df['tag'] == 'Overall'].sort_values('f1', ascending=False)
        
        plt.figure(figsize=(12, 6))
        ax = sns.barplot(x='model', y='f1', data=overall_df, palette='viridis')
        plt.title(f'Overall F1 Scores - Test Set: {test_set}', fontsize=15)
        plt.xlabel('Model', fontsize=12)
        plt.ylabel('F1 Score', fontsize=12)
        plt.xticks(rotation=45, ha='right')
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        
        # Add value labels on bars
        for i, v in enumerate(overall_df['f1']):
            ax.text(i, v + 0.01, f"{v:.4f}", ha='center', fontsize=9)
        
        plt.tight_layout()
        plt.savefig(f'{test_set_dir}/overall_f1.png', dpi=300)
        plt.close()
        
        # Create entity-specific graphs (PER, LOC, ORG)
        for entity in ['PER', 'LOC', 'ORG']:
            entity_df = df[df['tag'] == entity].sort_values('f1', ascending=False)
            
            if entity_df.empty:
                print(f"No data for entity {entity} in {test_set}")
                continue
            
            plt.figure(figsize=(12, 6))
            ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')
            plt.title(f'{entity} Entity F1 Scores - Test Set: {test_set}', fontsize=15)
            plt.xlabel('Model', fontsize=12)
            plt.ylabel('F1 Score', fontsize=12)
            plt.xticks(rotation=45, ha='right')
            plt.grid(axis='y', linestyle='--', alpha=0.7)
            
            # Add value labels on bars
            for i, v in enumerate(entity_df['f1']):
                ax.text(i, v + 0.01, f"{v:.4f}", ha='center', fontsize=9)
            
            plt.tight_layout()
            plt.savefig(f'{test_set_dir}/{entity}_f1.png', dpi=300)
            plt.close()
        
        # Create precision, recall, f1 comparison for each entity
        for entity in ['PER', 'LOC', 'ORG']:
            entity_df = df[df['tag'] == entity].sort_values('f1', ascending=False)
            
            if entity_df.empty:
                continue
                
            plt.figure(figsize=(14, 7))
            
            # Plot precision, recall, and F1 side by side
            x = range(len(entity_df))
            width = 0.25
            
            plt.bar([i - width for i in x], entity_df['precision'], width=width, label='Precision', color='#5DA5DA')
            plt.bar(x, entity_df['recall'], width=width, label='Recall', color='#FAA43A')
            plt.bar([i + width for i in x], entity_df['f1'], width=width, label='F1', color='#60BD68')
            
            plt.title(f'{entity} Entity Metrics Comparison - Test Set: {test_set}', fontsize=15)
            plt.xlabel('Model', fontsize=12)
            plt.ylabel('Score', fontsize=12)
            plt.xticks(x, entity_df['model'], rotation=45, ha='right')
            plt.legend()
            plt.grid(axis='y', linestyle='--', alpha=0.7)
            plt.tight_layout()
            plt.savefig(f'{test_set_dir}/{entity}_metrics_comparison.png', dpi=300)
            plt.close()
        
        # Create a combined entity F1 comparison
        plt.figure(figsize=(12, 6))
        
        # Get data for each entity
        per_df = df[df['tag'] == 'PER'].set_index('model')['f1']
        loc_df = df[df['tag'] == 'LOC'].set_index('model')['f1']
        org_df = df[df['tag'] == 'ORG'].set_index('model')['f1'] 
        
        # Combine into a single dataframe
        combined_df = pd.DataFrame({
            'PER': per_df,
            'LOC': loc_df,
            'ORG': org_df
        }).reset_index()
        
        # Melt for seaborn
        melted_df = pd.melt(combined_df, id_vars=['model'], var_name='Entity', value_name='F1')
        
        # Create the grouped bar chart
        sns.barplot(x='model', y='F1', hue='Entity', data=melted_df, palette=['#D65F5F', '#4ECDC4', '#C44D58'])
        
        plt.title(f'Entity F1 Comparison by Model - Test Set: {test_set}', fontsize=15)
        plt.xlabel('Model', fontsize=12)
        plt.ylabel('F1 Score', fontsize=12)
        plt.xticks(rotation=45, ha='right')
        plt.legend(title='Entity Type')
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        plt.tight_layout()
        plt.savefig(f'{test_set_dir}/entity_comparison.png', dpi=300)
        plt.close()
        
        # Last, create a model ranking visualization based on average entity F1
        model_ranks = df[df['tag'] != 'Overall'].groupby('model')['f1'].mean().reset_index()
        model_ranks = model_ranks.sort_values('f1', ascending=False)
        
        plt.figure(figsize=(12, 6))
        ax = sns.barplot(x='model', y='f1', data=model_ranks, palette='viridis')
        plt.title(f'Average Entity F1 Scores - Test Set: {test_set}', fontsize=15)
        plt.xlabel('Model', fontsize=12)
        plt.ylabel('Average F1 Score', fontsize=12)
        plt.xticks(rotation=45, ha='right')
        plt.grid(axis='y', linestyle='--', alpha=0.7)
        
        # Add value labels on bars
        for i, v in enumerate(model_ranks['f1']):
            ax.text(i, v + 0.01, f"{v:.4f}", ha='center', fontsize=9)
        
        plt.tight_layout()
        plt.savefig(f'{test_set_dir}/avg_entity_f1.png', dpi=300)
        plt.close()
        
        print(f"Created graphs for test set: {test_set}")
        
    except Exception as e:
        print(f"Error processing {csv_file}: {str(e)}")

print("Visualization complete. Entity-specific graphs saved in 'entity_graphs' directory.")

Found 5 CSV files
Processing test set: results_1922-04-24_1936-09-07_fixed



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=overall_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `h

Created graphs for test set: results_1922-04-24_1936-09-07_fixed
Processing test set: results_1927-03-28_1941-01-03_fixed



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=overall_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `h

Created graphs for test set: results_1927-03-28_1941-01-03_fixed
Processing test set: results_1932-01-25_fixed



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=overall_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `h

Created graphs for test set: results_1932-01-25_fixed
Processing test set: results_1934-10-15_fixed



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=overall_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `h

Created graphs for test set: results_1934-10-15_fixed
Processing test set: results_1935-09-30_fixed



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=overall_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  ax = sns.barplot(x='model', y='f1', data=entity_df, palette='viridis')

Passing `palette` without assigning `h

Created graphs for test set: results_1935-09-30_fixed
Visualization complete. Entity-specific graphs saved in 'entity_graphs' directory.


In [1]:
import os
import time
import sys
import estnltk
import multiprocessing as mp

from estnltk import Text
from estnltk.converters import text_to_json, json_to_text

from words_tokenization import preprocess_words
from sentence_tokenization import sentence_tokenizer
from sentence_tokenization import postfix_sentence_breaks_inside_parentheses

input_folder = 'data/tudengid'
output_folder = 'data/processed_tudengid'
os.makedirs(output_folder, exist_ok=True)

for name in os.listdir(input_folder):
    with open(input_folder + "/" + name, "r", encoding="utf-8") as file:
        content = file.read()
        text_import = json_to_text(json_text=content)
    
    #vana lausestus ja sõnestus maha
    text_import.pop_layer('sentences')
    text_import.pop_layer('words')
    text_import.pop_layer('compound_tokens')

    #uus lausestus ja sõnestus
    preprocess_words( text_import )
    sentence_tokenizer.tag( text_import )
    postfix_sentence_breaks_inside_parentheses( text_import, doc_name='' )

    text_to_json(text_import, file=(output_folder + "/" + name))

In [6]:
from estnltk import download, get_resource_paths
download("estbertner")
download("estbertner_v2")
from estnltk_neural.taggers import EstBERTNERTagger
from data_preprocessing import TokenizationPreprocessorFixed
from bert_ner_tagger import BertNERTagger

estbertner = EstBERTNERTagger()
model_location = get_resource_paths("estbertner_v2", only_latest=True)
estbertner2 = EstBERTNERTagger(model_location=model_location, output_layer='estbertner2', batch_size=500)
hf_model_id = 'tartuNLP/est-roberta-hist-ner'
bertner = BertNERTagger(bert_tokenizer_location=hf_model_id, 
                                bert_ner_location=hf_model_id, output_layer='bert_ner',
                                token_level=False, do_lower_case=False, use_fast=False)

Some weights of the model checkpoint at C:\Users\Norman\anaconda3\Lib\site-packages\estnltk\estnltk_resources\estbert\ner_model_hf_tartunlp_2022-05-06\ were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda


Resource 'estbertner_from_tartunlp_hf_2022-05-06' has already been downloaded.
Resource 'estbertner_v2_from_tartunlp_hf_2022-12-12' has already been downloaded.


Device set to use cuda


In [10]:
input_folder = 'data/processed_gold'
output_folder = 'data/tagged_gold'
os.makedirs(output_folder, exist_ok=True)

for name in os.listdir(input_folder):
    with open(input_folder + "/" + name, "r", encoding="utf-8") as file:
        content = file.read()
        text_import = json_to_text(json_text=content)
    
    estbertner.tag(text_import)
    estbertner2.tag(text_import)
    bertner.tag(text_import)

    text_to_json(text_import, file=os.path.join(output_folder, name))

In [5]:
def print_results(full_result, result_by_tags, name, gold_name):
    result_str = f"{name} vs {gold_name}\nOverall - precision:{round(full_result['strict']['precision'], 4)}, recall:{round(full_result['strict']['recall'], 4)}, f1:{round(full_result['strict']['f1'], 4)}\n"
    for tag, stats in result_by_tags.items():
        result_str += (tag + ':' + 
                ' precision: ' + str(round(stats['strict']['precision'], 4)) + ',' +
                ' recall: ' + str(round(stats['strict']['recall'], 4)) + ',' +
                ' f1: ' + str(round(stats['strict']['f1'], 4)) + '\n')
    #return result_str
    print(result_str)

# gold - LOC, PER, ORG, POSITION, ORG_POL, ORG_GPE (-> GPE), EVENT, LAW, MONEY, UNK, LOC_ADDRESS
# tudeng - place, person, organization, Address, Event,  school, party, unclear,  work
# tudeng universal - LOC, PER, ORG, Address, EVENT,  school, unclear,  work

# estbertner - LOC, PER, ORG
# estbertner2 - LOC, PER, ORG, DATE, EVENT, GPE, MONEY, PERCENT, PROD, TIME, TITLE
# bertner - LOC, PER, ORG, LOC_ORG (-> LOC)

# gold - LOC, PER, ORG, POSITION (-> TITLE), ORG_GPE (-> GPE), EVENT, MONEY
# tudeng - place (-> LOC), person (-> PER), organization (-> ORG), Event (-> EVENT)

# estbertner - LOC, PER, ORG
# estbertner2 - LOC, PER, ORG, EVENT, GPE, MONEY, TITLE
# bertner - LOC, PER, ORG, LOC_ORG (-> GPE)

def transform_data(layer, eval_layer, text):
    gold = { #LOC_ADDRESS
        "PER": "PER",
        "LOC": "LOC",
        "LOC_ADDRESS": "LOC",
        "ORG": "ORG",
        'POSITION': 'TITLE',
        "ORG_GPE": "GPE",
        'ORG_POL': 'ORG',
        'EVENT': 'EVENT',
        'MONEY': 'MONEY'
    }
    
    tudeng = {
        'place': 'LOC',
        'person': 'PER',
        'organization': 'ORG',
        'Event': 'EVENT',
        'date': 'DATE',
        'Address': 'LOC',
        'party': 'ORG'
    }

    estbertner = {
        "PER": "PER",
        "LOC": "LOC",
        "ORG": "ORG",
    }
    
    estbertner2 = {
        "PER": "PER",
        "LOC": "LOC",
        "ORG": "ORG",
        'EVENT': 'EVENT',
        'GPE': 'GPE',
        'MONEY': 'MONEY',
        'TITLE': 'TITLE',
        'DATE': 'DATE'
    }

    bertner = {
        "PER": "PER",
        "LOC": "LOC",
        "ORG": "ORG",
        'LOC_ORG': 'GPE'
    }

    tags = {
        'gold': gold,
        'tudeng': tudeng,
        'estbertner': estbertner,
        'estbertner2': estbertner2,
        'bertner': bertner,
    }
    
    layer_names = {
        'gold': 'ne_gold_a',
        'tudeng': 'manual_named_entities',
        'estbertner': 'estbertner',
        'estbertner2': 'estbertner2',
        'bertner': 'bert_ner'
    }

    layer_dict = tags[layer]
    eval_dict = tags[eval_layer]
    
    layer_values = set(layer_dict.values())
    eval_values = set(eval_dict.values())
    common_values = layer_values.intersection(eval_values)
    #common_values = {"LOC", "PER", "ORG"}

    layer_dict = {k: v for k, v in layer_dict.items() if v in common_values}
    eval_dict = {k: v for k, v in eval_dict.items() if v in common_values}

    data = []
    eval_data = []

    for span in text[layer_names[layer]]:
        if layer == 'bertner':
            tag = span.nertag[0]
        else:
            tag = span.nertag
        if tag in layer_dict.keys():
            data.append({"label": layer_dict[tag], "start": span.start, "end": span.end})

    for span in text[layer_names[eval_layer]]:
        if eval_layer == 'gold':
            tag = span.tag
        else:
            tag = span.tag[0]
        if tag in eval_dict.keys():
            eval_data.append({"label": eval_dict[tag], "start": span.start, "end": span.end})

    return data, eval_data, list(set(layer_dict.values()))

In [7]:
from nervaluate import Evaluator

input_folder = 'data/tagged_tudengid'

#gold_layers = ['tudeng'] #gold
gold_layer = 'tudeng' #gold
layers = ['estbertner', 'estbertner2', 'bertner']

for layer in layers:
    gold_data = []
    layer_data = []
    for name in os.listdir(input_folder):
        with open(input_folder + "/" + name, "r", encoding="utf-8") as file:
            content = file.read()
            text_import = json_to_text(json_text=content)
        temp1, temp2, eval_tags = transform_data(layer, gold_layer, text_import)
        layer_data.append(temp1)
        gold_data.append(temp2)
    print(eval_tags)
    evaluator = Evaluator(gold_data, layer_data, tags=eval_tags)
    results, results_per_tag, result_indices, result_indices_by_tag = evaluator.evaluate()
    print_results(results, results_per_tag, layer, gold_layer)

['PER', 'LOC', 'ORG']
estbertner vs tudeng
Overall - precision:0.417, recall:0.4025, f1:0.4096
PER: precision: 0.4656, recall: 0.5544, f1: 0.5061
LOC: precision: 0.2688, recall: 0.2629, f1: 0.2658
ORG: precision: 0.4059, recall: 0.2696, f1: 0.324

['PER', 'EVENT', 'LOC', 'DATE', 'ORG']
estbertner2 vs tudeng
Overall - precision:0.2892, recall:0.3533, f1:0.3181
PER: precision: 0.38, recall: 0.4443, f1: 0.4097
EVENT: precision: 0.0116, recall: 0.0556, f1: 0.0192
LOC: precision: 0.1899, recall: 0.1066, f1: 0.1365
DATE: precision: 0.0478, recall: 0.2126, f1: 0.078
ORG: precision: 0.3195, recall: 0.37, f1: 0.3429

['PER', 'LOC', 'ORG']
bertner vs tudeng
Overall - precision:0.4527, recall:0.3588, f1:0.4003
PER: precision: 0.5222, recall: 0.654, f1: 0.5807
LOC: precision: 0.2138, recall: 0.1139, f1: 0.1486
ORG: precision: 0.2808, recall: 0.0881, f1: 0.1342



In [11]:
from nervaluate import Evaluator

input_folder = 'data/tagged_gold'

#gold_layers = ['tudeng'] #gold
gold_layer = 'gold' # tudeng
layers = ['estbertner', 'estbertner2', 'bertner']

for layer in layers:
    gold_data = []
    layer_data = []
    for name in os.listdir(input_folder):
        with open(input_folder + "/" + name, "r", encoding="utf-8") as file:
            content = file.read()
            text_import = json_to_text(json_text=content)
        temp1, temp2, eval_tags = transform_data(layer, gold_layer, text_import)
        layer_data.append(temp1)
        gold_data.append(temp2)
    print(eval_tags)
    evaluator = Evaluator(gold_data, layer_data, tags=eval_tags)
    results, results_per_tag, result_indices, result_indices_by_tag = evaluator.evaluate()
    print_results(results, results_per_tag, layer, gold_layer)

['PER', 'LOC', 'ORG']
estbertner vs gold
Overall - precision:0.6465, recall:0.5002, f1:0.564
PER: precision: 0.7364, recall: 0.8324, f1: 0.7815
LOC: precision: 0.3524, recall: 0.2261, f1: 0.2755
ORG: precision: 0.6704, recall: 0.3811, f1: 0.486

['MONEY', 'PER', 'EVENT', 'GPE', 'LOC', 'TITLE', 'ORG']
estbertner2 vs gold
Overall - precision:0.5316, recall:0.4031, f1:0.4585
MONEY: precision: 0.2127, recall: 0.1074, f1: 0.1427
PER: precision: 0.7206, recall: 0.6985, f1: 0.7094
EVENT: precision: 0.0, recall: 0.0, f1: 0
GPE: precision: 0.0667, recall: 1.0, f1: 0.125
LOC: precision: 0.1682, recall: 0.1057, f1: 0.1298
TITLE: precision: 0.3579, recall: 0.3386, f1: 0.348
ORG: precision: 0.669, recall: 0.4647, f1: 0.5485

['PER', 'LOC', 'GPE', 'ORG']
bertner vs gold
Overall - precision:0.4712, recall:0.3359, f1:0.3922
PER: precision: 0.6609, recall: 0.7807, f1: 0.7158
LOC: precision: 0.1561, recall: 0.1232, f1: 0.1377
GPE: precision: 0.0, recall: 0.0, f1: 0
ORG: precision: 0.3663, recall: 0.1035

In [24]:
from nervaluate import Evaluator

input_folder = 'data/tagged_gold'

#gold_layers = ['tudeng'] #gold
gold_layer = 'gold' # tudeng
layers = ['estbertner', 'estbertner2', 'bertner']

for layer in layers:
    gold_data = []
    layer_data = []
    for name in os.listdir(input_folder):
        with open(input_folder + "/" + name, "r", encoding="utf-8") as file:
            content = file.read()
            text_import = json_to_text(json_text=content)
        temp1, temp2, eval_tags = transform_data(layer, gold_layer, text_import)
        layer_data.append(temp1)
        gold_data.append(temp2)
    print(eval_tags)
    evaluator = Evaluator(gold_data, layer_data, tags=eval_tags)
    results, results_per_tag, result_indices, result_indices_by_tag = evaluator.evaluate()
    print_results(results, results_per_tag, layer, gold_layer)

['LOC', 'ORG', 'PER']
estbertner vs gold
Overall - precision:0.6465, recall:0.5624, f1:0.6015
LOC: precision: 0.4032, recall: 0.4648, f1: 0.4318
ORG: precision: 0.6519, recall: 0.3817, f1: 0.4814
PER: precision: 0.7163, recall: 0.8324, f1: 0.77

['LOC', 'TITLE', 'EVENT', 'GPE', 'MONEY', 'PER', 'ORG']
estbertner2 vs gold
Overall - precision:0.5303, recall:0.4395, f1:0.4807
LOC: precision: 0.1701, recall: 0.2049, f1: 0.1859
TITLE: precision: 0.3579, recall: 0.3386, f1: 0.348
EVENT: precision: 0.0, recall: 0.0, f1: 0
GPE: precision: 0.0545, recall: 1.0, f1: 0.1034
MONEY: precision: 0.2127, recall: 0.1074, f1: 0.1427
PER: precision: 0.7117, recall: 0.6985, f1: 0.7051
ORG: precision: 0.6633, recall: 0.4647, f1: 0.5465

['GPE', 'LOC', 'ORG', 'PER']
bertner vs gold
Overall - precision:0.4722, recall:0.3774, f1:0.4195
GPE: precision: 0.0, recall: 0.0, f1: 0
LOC: precision: 0.206, recall: 0.25, f1: 0.2259
ORG: precision: 0.3663, recall: 0.1036, f1: 0.1616
PER: precision: 0.6383, recall: 0.7807,

In [7]:
#vaatan ainult LOC PER ORG, eraldi line funktsioonides, et neid ainult vaadata
from nervaluate import Evaluator

input_folder = 'data/tagged_tudengid'

#gold_layers = ['tudeng'] #gold
gold_layer = 'tudeng' #gold
layers = ['estbertner', 'estbertner2', 'bertner']

for layer in layers:
    gold_data = []
    layer_data = []
    for name in os.listdir(input_folder):
        with open(input_folder + "/" + name, "r", encoding="utf-8") as file:
            content = file.read()
            text_import = json_to_text(json_text=content)
        temp1, temp2, eval_tags = transform_data(layer, gold_layer, text_import)
        layer_data.append(temp1)
        gold_data.append(temp2)
    print(eval_tags)
    evaluator = Evaluator(gold_data, layer_data, tags=eval_tags)
    results, results_per_tag, result_indices, result_indices_by_tag = evaluator.evaluate()
    print_results(results, results_per_tag, layer, gold_layer)

['PER', 'ORG', 'LOC']
estbertner vs tudeng
Overall - precision:0.4168, recall:0.409, f1:0.4128
PER: precision: 0.4642, recall: 0.5544, f1: 0.5053
ORG: precision: 0.4024, recall: 0.272, f1: 0.3246
LOC: precision: 0.2735, recall: 0.2833, f1: 0.2783

['PER', 'ORG', 'LOC']
estbertner2 vs tudeng
Overall - precision:0.3397, recall:0.366, f1:0.3524
PER: precision: 0.3803, recall: 0.4443, f1: 0.4099
ORG: precision: 0.3183, recall: 0.3729, f1: 0.3434
LOC: precision: 0.1945, recall: 0.1146, f1: 0.1442

['PER', 'ORG', 'LOC']
bertner vs tudeng
Overall - precision:0.4523, recall:0.3645, f1:0.4037
PER: precision: 0.5197, recall: 0.654, f1: 0.5792
ORG: precision: 0.2785, recall: 0.0884, f1: 0.1342
LOC: precision: 0.2198, recall: 0.1223, f1: 0.1571



In [8]:
#vaatan ainult LOC PER ORG, eraldi line funktsioonides, et neid ainult vaadata
from nervaluate import Evaluator

input_folder = 'data/tagged_gold'

#gold_layers = ['tudeng'] #gold
gold_layer = 'gold' # tudeng
layers = ['estbertner', 'estbertner2', 'bertner']

for layer in layers:
    gold_data = []
    layer_data = []
    for name in os.listdir(input_folder):
        with open(input_folder + "/" + name, "r", encoding="utf-8") as file:
            content = file.read()
            text_import = json_to_text(json_text=content)
        temp1, temp2, eval_tags = transform_data(layer, gold_layer, text_import)
        layer_data.append(temp1)
        gold_data.append(temp2)
    print(eval_tags)
    evaluator = Evaluator(gold_data, layer_data, tags=eval_tags)
    results, results_per_tag, result_indices, result_indices_by_tag = evaluator.evaluate()
    print_results(results, results_per_tag, layer, gold_layer)

['PER', 'ORG', 'LOC']
estbertner vs gold
Overall - precision:0.6465, recall:0.5624, f1:0.6015
PER: precision: 0.7163, recall: 0.8324, f1: 0.77
ORG: precision: 0.6519, recall: 0.3817, f1: 0.4814
LOC: precision: 0.4032, recall: 0.4648, f1: 0.4318

['PER', 'ORG', 'LOC']
estbertner2 vs gold
Overall - precision:0.6084, recall:0.5244, f1:0.5633
PER: precision: 0.6611, recall: 0.7011, f1: 0.6805
ORG: precision: 0.6627, recall: 0.4647, f1: 0.5463
LOC: precision: 0.2248, recall: 0.2049, f1: 0.2144

['PER', 'ORG', 'LOC']
bertner vs gold
Overall - precision:0.5556, recall:0.378, f1:0.4499
PER: precision: 0.6402, recall: 0.7807, f1: 0.7035
ORG: precision: 0.4265, recall: 0.1036, f1: 0.1668
LOC: precision: 0.3071, recall: 0.2508, f1: 0.2761



In [35]:
input_folder = 'final_results'

for filename in os.listdir(input_folder):
    df = pd.read_csv(os.path.join(input_folder, filename))

    final_models = set()
    
    for category in df['tag'].unique():
        estbertner_f1 = df[(df['model'] == 'estbertner') & (df['tag'] == category)]['f1'].iloc[0]
        print(df[(df['model'] == 'estbertner') & (df['tag'] == category)]['f1'])
        filtered = df[(df['model'] != 'estbertner') & (df['tag'] == category)]
        results = filtered[filtered['f1'] >= estbertner_f1].sort_values('f1', ascending=False)
        final_models.update(results['model'])
        print(results)
        print(f'estbertner f1 {estbertner_f1}')
        print('-' * 50)
    print(final_models)
    break

96    0.6025
Name: f1, dtype: float64
             model      tag  precision  recall      f1
8    model13000_65  Overall     0.6450  0.6486  0.6468
4    model13000_60  Overall     0.6059  0.6897  0.6451
40   model26000_65  Overall     0.6097  0.6598  0.6338
28   model19500_70  Overall     0.6216  0.6449  0.6330
104         tudeng  Overall     0.5997  0.6636  0.6300
48   model32500_60  Overall     0.6010  0.6617  0.6299
68   model50163_60  Overall     0.6038  0.6579  0.6297
32   model25254_70  Overall     0.6292  0.6280  0.6286
60   model39000_60  Overall     0.5966  0.6636  0.6283
12   model13000_70  Overall     0.6245  0.6280  0.6263
52   model32500_65  Overall     0.6118  0.6393  0.6252
24   model19500_65  Overall     0.5997  0.6523  0.6249
64   model45500_60  Overall     0.5872  0.6673  0.6247
20   model19500_60  Overall     0.5883  0.6598  0.6220
56   model38307_65  Overall     0.5905  0.6523  0.6199
16   model13000_75  Overall     0.6434  0.5869  0.6139
36   model26000_60  Overall

In [3]:
input_folder = 'final_results/pre_clean_fixed'
all_dfs = []

for filename in os.listdir(input_folder):
    df = pd.read_csv(os.path.join(input_folder, filename))
    df['filename'] = filename
    all_dfs.append(df)

combined_df = pd.concat(all_dfs, ignore_index=True)

# Now you have all results in one DataFrame with a filename column
print(combined_df.head())

combined_df['lavend'] = combined_df['model'].str.split('_').str[1]

print(combined_df.head())

avg_f1_by_lavend = combined_df.groupby('lavend')['f1'].mean().sort_index()
print(avg_f1_by_lavend)

avg_f1_by_lavend_and_tag = combined_df.groupby(['lavend', 'tag'])['f1'].mean().sort_index()
print(avg_f1_by_lavend_and_tag)

print('-'*10)
avg_f1_by_lavend_and_tag = combined_df[combined_df['tag'] == 'Overall'].groupby(['lavend'])['f1'].mean().sort_index()
print(avg_f1_by_lavend_and_tag)
print('-'*10)

for tag in combined_df['tag'].unique():
    print(f"\n--- Tag: {tag} ---")
    tag_results = (combined_df[combined_df['tag'] == tag]
                   .groupby('lavend')['f1']
                   .mean()
                   .sort_values(ascending=False))
    print(tag_results)

           model      tag  precision  recall      f1  \
0    model103_95  Overall     0.4651  0.0374  0.0692   
1    model103_95      LOC     0.0000  0.0000  0.0000   
2    model103_95      ORG     0.0000  0.0000  0.0000   
3    model103_95      PER     0.4762  0.1538  0.2326   
4  model13000_60  Overall     0.7738  0.7290  0.7507   

                                  filename  
0  results_1922-04-24_1936-09-07_fixed.csv  
1  results_1922-04-24_1936-09-07_fixed.csv  
2  results_1922-04-24_1936-09-07_fixed.csv  
3  results_1922-04-24_1936-09-07_fixed.csv  
4  results_1922-04-24_1936-09-07_fixed.csv  
           model      tag  precision  recall      f1  \
0    model103_95  Overall     0.4651  0.0374  0.0692   
1    model103_95      LOC     0.0000  0.0000  0.0000   
2    model103_95      ORG     0.0000  0.0000  0.0000   
3    model103_95      PER     0.4762  0.1538  0.2326   
4  model13000_60  Overall     0.7738  0.7290  0.7507   

                                  filename lavend  
0  r

In [53]:
#A - igast sammust parim mudel, selle tulemused üle iga sammud, f1
best_models = {}

input_folder = 'final_results'

models_in_all_steps = set()

for filename in os.listdir(input_folder):
    df = pd.read_csv(os.path.join(input_folder, filename))
    
    if len(models_in_all_steps) == 0:
        models_in_all_steps.update(df['model'].unique())
    else:
        models_in_all_steps = models_in_all_steps & set(df['model'].unique())
        
models_in_all_steps.difference_update({'estbertner', 'opetaja', 'tudeng'})

selected_models = {}

for filename in os.listdir(input_folder):
    df = pd.read_csv(os.path.join(input_folder, filename)).sort_values('f1', ascending=False)
    df = df[df['tag'] == 'Overall']
    for index, row in df.iterrows():
        if row['model'] in models_in_all_steps:
            #print(f'{filename} - Best model: {row['model']}, f1: {row['f1']}')
            selected_models[filename] = row['model']
            break
#parimad mudelid igal sammul, mudel pidi esinema igas sammus
print(selected_models.values())

tags = ['Overall', 'LOC', 'PER', 'ORG']
models = ['estbertner', 'opetaja', 'tudeng'] + list(selected_models.values())

columns = ['model', 'tags'] + ['_'.join(file.split('_')[1:]).split('.')[0] for file in os.listdir(input_folder)]
df = pd.DataFrame(columns=columns)

rows_list = []

for tag in tags:
    for model in models:
        row_data = {'model': model, 'tags': tag}
        for filename in os.listdir(input_folder):
            result_df = pd.read_csv(os.path.join(input_folder, filename))
            file_identifier = '_'.join(filename.split('_')[1:]).split('.')[0]
            value = result_df[(result_df['tag'] == tag) & (result_df['model'] == model)]
            row_data[file_identifier] = value['f1'].iloc[0]
        rows_list.append(row_data)

df = pd.DataFrame(rows_list)
df

dict_values(['model13000_65', 'model19500_70', 'model13000_65', 'model32500_60', 'model32500_60'])


Unnamed: 0,model,tags,1922-04-24_1936-09-07,1927-03-28_1941-01-03,1932-01-25,1934-10-15,1935-09-30
0,estbertner,Overall,0.6025,0.56,0.4025,0.5899,0.6876
1,opetaja,Overall,0.5322,0.4723,0.3618,0.47,0.4819
2,tudeng,Overall,0.63,0.5978,0.4521,0.6006,0.6467
3,model13000_65,Overall,0.6468,0.5603,0.4589,0.5838,0.621
4,model19500_70,Overall,0.633,0.6037,0.4503,0.5656,0.6137
5,model13000_65,Overall,0.6468,0.5603,0.4589,0.5838,0.621
6,model32500_60,Overall,0.6299,0.5887,0.4432,0.5849,0.6558
7,model32500_60,Overall,0.6299,0.5887,0.4432,0.5849,0.6558
8,estbertner,LOC,0.4393,0.4483,0.144,0.1619,0.4534
9,opetaja,LOC,0.2602,0.0741,0.186,0.2437,0.1501
