In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import prettytable
import torch

from ner import HuggingfacePredictor, NaivePredictor, NGramPredictor
from ner.utils import char_wise_f1_score_macro, BasePredictor

In [2]:
N_LABELS = 30
SEED = 42

In [3]:
train_df = pd.read_json('data/train.jsonl', lines=True)
train_df, test_df = train_test_split(train_df, test_size=.1, random_state=SEED)

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [5]:
predictors = {
    'Naive': NaivePredictor(),
    'BERT': HuggingfacePredictor(device=device),
    '2-gram': NGramPredictor(2),
    '3-gram': NGramPredictor(3),
}

In [6]:
for predictor in predictors.values():
    predictor.fit(train_df['sentences'], train_df['ners'])

Fitting on the training data:   0%|          | 0/467 [00:00<?, ?it/s]

Fitting on the training data:   0%|          | 0/467 [00:00<?, ?it/s]

Fitting on the training data:   0%|          | 0/467 [00:00<?, ?it/s]

In [7]:
def get_f1_score(test_df: pd.DataFrame, predictor: BasePredictor):
    f1_scores = []
    for text, target in zip(test_df['sentences'], test_df['ners']):
        pred = predictor(text)
        f1_score = char_wise_f1_score_macro(pred, target, n_labels=N_LABELS)
        f1_scores.append(f1_score)
    avg_f1_score = sum(f1_scores) / len(f1_scores)
    return avg_f1_score

In [8]:
%%capture
predictor_scores = {
    name: get_f1_score(test_df, predictor)
    for name, predictor in predictors.items()
}

In [11]:
table = prettytable.PrettyTable(['Predictor', 'F1'])
for name, score in predictor_scores.items():
    table.add_row((name, f'{score:.5f}'))
print(table)

+-----------+---------+
| Predictor |    F1   |
+-----------+---------+
|   Naive   | 0.72595 |
|    BERT   | 0.92273 |
|   2-gram  | 0.68089 |
|   3-gram  | 0.53300 |
+-----------+---------+
