In [None]:
!git clone https://github.com/teropa/nlp.git
!ls nlp/resources/corpora/conll2002


In [2]:
import os

DATA_DIR = "nlp/resources/corpora/conll2002"
os.chdir(DATA_DIR)

!tar -xzf esp.tgz
!ls


tar (child): esp.tgz: Cannot open: No such file or directory
tar (child): Error is not recoverable: exiting now
tar: Child returned status 2
tar: Error is not recoverable: exiting now
esp.testa  esp.testb  esp.train  ned.testa  ned.testb  ned.train  README


In [12]:
os.chdir("/content")
CONLL_DIR = "/content/nlp/resources/corpora/conll2002"


In [13]:
def read_conll2002(path):
    sentences = []
    labels = []
    tokens = []
    tags = []

    with open(path, encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                if tokens:
                    sentences.append(tokens)
                    labels.append(tags)
                    tokens = []
                    tags = []
                continue

            parts = line.split()
            tokens.append(parts[0])
            tags.append(parts[-1])

    if tokens:
        sentences.append(tokens)
        labels.append(tags)

    return sentences, labels


In [5]:
CONLL_DIR = "/content/nlp/resources/corpora/conll2002"

train_file = f"{CONLL_DIR}/esp.train"
dev_file   = f"{CONLL_DIR}/esp.testa"
test_file  = f"{CONLL_DIR}/esp.testb"

X_train_tokens, y_train_bio = read_conll2002(train_file)
X_dev_tokens,   y_dev_bio   = read_conll2002(dev_file)
X_test_tokens,  y_test_bio  = read_conll2002(test_file)

print(len(X_train_tokens), len(X_dev_tokens), len(X_test_tokens))


8323 1915 1517


In [6]:
print(X_train_tokens[0])
print(y_train_bio[0])


['Melbourne', '(', 'Australia', ')', ',', '25', 'may', '(', 'EFE', ')', '.']
['B-LOC', 'O', 'B-LOC', 'O', 'O', 'O', 'O', 'O', 'B-ORG', 'O', 'O']


In [7]:
all_labels = set()
for seq in (y_train_bio + y_dev_bio + y_test_bio):
    all_labels.update(seq)

labels = sorted(all_labels)
label2id = {lab: i for i, lab in enumerate(labels)}
id2label = {i: lab for lab, i in label2id.items()}

labels, label2id


(['B-LOC',
  'B-MISC',
  'B-ORG',
  'B-PER',
  'I-LOC',
  'I-MISC',
  'I-ORG',
  'I-PER',
  'O'],
 {'B-LOC': 0,
  'B-MISC': 1,
  'B-ORG': 2,
  'B-PER': 3,
  'I-LOC': 4,
  'I-MISC': 5,
  'I-ORG': 6,
  'I-PER': 7,
  'O': 8})

In [8]:
import numpy as np
from keras.preprocessing.sequence import pad_sequences

MAX_LEN = 100

all_train_tokens = [w for s in X_train_tokens for w in s]
vocab_words = sorted(set(all_train_tokens))

word2idx = {w: i+2 for i, w in enumerate(vocab_words)}
word2idx["<PAD>"] = 0
word2idx["<UNK>"] = 1

idx2word = {i: w for w, i in word2idx.items()}


In [9]:
def encode_sentences(tokens_list, labels_list, word2idx, label2id, max_len):
    X_ids = []
    y_ids = []
    for tokens, labels in zip(tokens_list, labels_list):
        x = [word2idx.get(w, 1) for w in tokens]
        y = [label2id[t] for t in labels]
        X_ids.append(x)
        y_ids.append(y)

    X_pad = pad_sequences(X_ids, maxlen=max_len, padding="post", truncating="post", value=0)
    y_pad = pad_sequences(y_ids, maxlen=max_len, padding="post", truncating="post", value=label2id["O"])

    return np.array(X_pad), np.array(y_pad)

X_train_lstm, y_train_lstm = encode_sentences(X_train_tokens, y_train_bio, word2idx, label2id, MAX_LEN)
X_dev_lstm,   y_dev_lstm   = encode_sentences(X_dev_tokens,   y_dev_bio,   word2idx, label2id, MAX_LEN)
X_test_lstm,  y_test_lstm  = encode_sentences(X_test_tokens,  y_test_bio,  word2idx, label2id, MAX_LEN)


In [10]:
def create_mask(X_padded):
    return (X_padded != 0).astype("float32")

train_mask = create_mask(X_train_lstm)
dev_mask   = create_mask(X_dev_lstm)
test_mask  = create_mask(X_test_lstm)


In [None]:
!pip install seqeval

from seqeval.metrics import classification_report, f1_score

def decode_with_mask(y_true_ids, y_pred_ids, mask, id2label):
    true_tags = []
    pred_tags = []

    for t_seq, p_seq, m_seq in zip(y_true_ids, y_pred_ids, mask):
        t_list = []
        p_list = []
        for t, p, m in zip(t_seq, p_seq, m_seq):
            if m == 0:
                continue
            t_list.append(id2label[int(t)])
            p_list.append(id2label[int(p)])
        true_tags.append(t_list)
        pred_tags.append(p_list)

    return true_tags, pred_tags


In [14]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, Dropout, TimeDistributed, Dense
from tensorflow.keras.optimizers import Adam

EMB_DIM    = 150
HIDDEN_DIM = 200

num_words  = len(word2idx)
num_labels = len(labels)

inputs = Input(shape=(MAX_LEN,), name="input_ids")

x = Embedding(
    input_dim=num_words,
    output_dim=EMB_DIM,
    mask_zero=True,
    name="word_embeddings"
)(inputs)

x = Bidirectional(LSTM(HIDDEN_DIM, return_sequences=True))(x)
x = Dropout(0.5)(x)

outputs = TimeDistributed(
    Dense(num_labels, activation="softmax")
)(x)

bilstm_model = Model(inputs=inputs, outputs=outputs)

bilstm_model.compile(
    optimizer=Adam(learning_rate=5e-4),       # lr 1e-3 → 5e-4
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

bilstm_model.summary()


In [15]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True
)

checkpoint = ModelCheckpoint(
    "bilstm_best.weights.h5",
    monitor="val_loss",
    save_best_only=True,
    save_weights_only=True
)

history_bilstm = bilstm_model.fit(
    X_train_lstm,
    y_train_lstm,
    sample_weight=train_mask,
    epochs=20,
    batch_size=16,
    validation_data=(X_dev_lstm, y_dev_lstm),
    callbacks=[early_stop, checkpoint]
)


Epoch 1/20
[1m521/521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 29ms/step - accuracy: 0.9491 - loss: 0.7187 - val_accuracy: 0.9751 - val_loss: 0.3079
Epoch 2/20
[1m521/521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 27ms/step - accuracy: 0.9810 - loss: 0.1902 - val_accuracy: 0.9817 - val_loss: 0.2379
Epoch 3/20
[1m521/521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 26ms/step - accuracy: 0.9902 - loss: 0.1023 - val_accuracy: 0.9837 - val_loss: 0.2117
Epoch 4/20
[1m521/521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 26ms/step - accuracy: 0.9948 - loss: 0.0557 - val_accuracy: 0.9844 - val_loss: 0.2198
Epoch 5/20
[1m521/521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 26ms/step - accuracy: 0.9963 - loss: 0.0398 - val_accuracy: 0.9850 - val_loss: 0.2226
Epoch 6/20
[1m521/521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 26ms/step - accuracy: 0.9975 - loss: 0.0274 - val_accuracy: 0.9848 - val_loss: 0.2532


In [16]:
from seqeval.metrics import classification_report, f1_score


y_pred_probs = bilstm_model.predict(X_test_lstm)
y_pred_ids   = y_pred_probs.argmax(axis=-1)


def decode_with_mask(y_true_ids, y_pred_ids, mask, id2label):
    all_true, all_pred = [], []
    for t_seq, p_seq, m_seq in zip(y_true_ids, y_pred_ids, mask):
        t_tags, p_tags = [], []
        for t, p, m in zip(t_seq, p_seq, m_seq):
            if m == 0:  # PAD
                continue
            t_tags.append(id2label[int(t)])
            p_tags.append(id2label[int(p)])
        all_true.append(t_tags)
        all_pred.append(p_tags)
    return all_true, all_pred

true_tags_bilstm, pred_tags_bilstm = decode_with_mask(
    y_test_lstm, y_pred_ids, test_mask, id2label
)

print("BiLSTM classification report:")
print(classification_report(true_tags_bilstm, pred_tags_bilstm, digits=4))
print("BiLSTM F1:", f1_score(true_tags_bilstm, pred_tags_bilstm))


# --- MISC hariç F1 ---

def remove_misc(true_tags, pred_tags):
    new_true, new_pred = [], []
    for t_seq, p_seq in zip(true_tags, pred_tags):
        t2, p2 = [], []
        for t, p in zip(t_seq, p_seq):
            if "MISC" in t:
                continue
            t2.append(t)
            p2.append(p)
        new_true.append(t2)
        new_pred.append(p2)
    return new_true, new_pred

true_no_misc, pred_no_misc = remove_misc(true_tags_bilstm, pred_tags_bilstm)

print("\nBiLSTM (MISC hariç) classification report:")
print(classification_report(true_no_misc, pred_no_misc, digits=4))
print("BiLSTM F1 (no MISC):", f1_score(true_no_misc, pred_no_misc))


[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step
BiLSTM classification report:
              precision    recall  f1-score   support

         LOC     0.6890    0.6800    0.6845      1072
        MISC     0.3171    0.3441    0.3300       340
         ORG     0.7252    0.7210    0.7231      1398
         PER     0.7712    0.7198    0.7446       721

   micro avg     0.6799    0.6720    0.6760      3531
   macro avg     0.6256    0.6163    0.6206      3531
weighted avg     0.6843    0.6720    0.6779      3531

BiLSTM F1: 0.6759720837487538

BiLSTM (MISC hariç) classification report:


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

         LOC     0.7225    0.6800    0.7006      1072
        MISC     0.0000    0.0000    0.0000         0
         ORG     0.7545    0.7210    0.7374      1398
         PER     0.7828    0.7198    0.7500       721

   micro avg     0.7103    0.7070    0.7087      3191
   macro avg     0.5649    0.5302    0.5470      3191
weighted avg     0.7501    0.7070    0.7279      3191

BiLSTM F1 (no MISC): 0.7086539971729229


In [17]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

PAD_IDX = word2idx["<PAD>"]

class NERDatasetTorch(Dataset):
    def __init__(self, X, y, mask):
        self.X = torch.tensor(X, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.long)
        self.mask = torch.tensor(mask.astype(bool))  # bool mask

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return {
            "input_ids": self.X[idx],
            "labels": self.y[idx],
            "mask": self.mask[idx]
        }

train_dataset = NERDatasetTorch(X_train_lstm, y_train_lstm, train_mask)
dev_dataset   = NERDatasetTorch(X_dev_lstm,   y_dev_lstm,   dev_mask)
test_dataset  = NERDatasetTorch(X_test_lstm,  y_test_lstm,  test_mask)

BATCH_SIZE = 8

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
dev_loader   = DataLoader(dev_dataset,   batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False)


Device: cuda


In [18]:
import math

class CRF(nn.Module):
    def __init__(self, num_tags):
        super().__init__()
        self.num_tags = num_tags

        self.start_transitions = nn.Parameter(torch.empty(num_tags))
        self.end_transitions   = nn.Parameter(torch.empty(num_tags))
        self.transitions       = nn.Parameter(torch.empty(num_tags, num_tags))

        self.reset_parameters()

    def reset_parameters(self):
        # Daha geniş aralık: CRF geçişleri daha güçlü
        nn.init.uniform_(self.start_transitions, -0.4, 0.4)
        nn.init.uniform_(self.end_transitions,   -0.4, 0.4)
        nn.init.uniform_(self.transitions,       -0.4, 0.4)

    def forward(self, emissions, tags, mask):
        log_num = self._score_sentence(emissions, tags, mask)
        log_den = self._compute_log_partition(emissions, mask)
        nll = -(log_num - log_den)
        return nll.mean()

    def _compute_log_partition(self, emissions, mask):
        batch_size, seq_len, num_tags = emissions.size()
        alpha = self.start_transitions + emissions[:, 0]  # (batch, num_tags)

        for t in range(1, seq_len):
            emit_t = emissions[:, t]
            mask_t = mask[:, t].unsqueeze(1)

            score_t = alpha.unsqueeze(2) + self.transitions + emit_t.unsqueeze(1)
            new_alpha = torch.logsumexp(score_t, dim=1)

            alpha = torch.where(mask_t, new_alpha, alpha)

        alpha = alpha + self.end_transitions
        return torch.logsumexp(alpha, dim=1)

    def _score_sentence(self, emissions, tags, mask):
        batch_size, seq_len, num_tags = emissions.size()

        first_tag = tags[:, 0]
        score = self.start_transitions[first_tag]
        score = score + emissions[:, 0].gather(1, first_tag.unsqueeze(1)).squeeze(1)

        for t in range(1, seq_len):
            mask_t   = mask[:, t]
            prev_tag = tags[:, t-1]
            curr_tag = tags[:, t]

            trans_score = self.transitions[prev_tag, curr_tag]
            emit_score  = emissions[:, t].gather(1, curr_tag.unsqueeze(1)).squeeze(1)

            score = score + (trans_score + emit_score) * mask_t

        seq_lengths = mask.long().sum(dim=1) - 1
        last_tags   = tags.gather(1, seq_lengths.unsqueeze(1)).squeeze(1)
        score = score + self.end_transitions[last_tags]

        return score

    def decode(self, emissions, mask):
        batch_size, seq_len, num_tags = emissions.size()

        score = self.start_transitions + emissions[:, 0]
        history = []

        for t in range(1, seq_len):
            emit_t = emissions[:, t]
            mask_t = mask[:, t].unsqueeze(1)

            score_t = score.unsqueeze(2) + self.transitions + emit_t.unsqueeze(1)
            best_score_t, best_path_t = score_t.max(dim=1)

            score = torch.where(mask_t, best_score_t, score)
            history.append(best_path_t)

        score = score + self.end_transitions
        best_last_score, best_last_tag = score.max(dim=1)

        best_paths = []
        for i in range(batch_size):
            seq_len_i = mask[i].sum().item()
            last_tag = best_last_tag[i].item()
            path = [last_tag]

            for hist_t in reversed(history[: seq_len_i-1]):
                last_tag = hist_t[i][last_tag].item()
                path.append(last_tag)

            path.reverse()
            best_paths.append(path)

        return best_paths


In [19]:
PAD_IDX = word2idx["<PAD>"]

class NERDatasetTorch(Dataset):
    def __init__(self, X, y, mask):
        self.X = torch.tensor(X, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.long)
        # CRF için bool mask
        self.mask = torch.tensor(mask.astype(bool))

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return {
            "input_ids": self.X[idx],
            "labels": self.y[idx],
            "mask": self.mask[idx]
        }

train_dataset = NERDatasetTorch(X_train_lstm, y_train_lstm, train_mask)
dev_dataset   = NERDatasetTorch(X_dev_lstm,   y_dev_lstm,   dev_mask)
test_dataset  = NERDatasetTorch(X_test_lstm,  y_test_lstm,  test_mask)

BATCH_SIZE = 8

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
dev_loader   = DataLoader(dev_dataset,   batch_size=BATCH_SIZE, shuffle=False)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False)


In [20]:
class BiLSTMCRF(nn.Module):
    def __init__(self, vocab_size, num_labels, pad_idx, emb_dim=100, hidden_dim=100):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=pad_idx)
        self.lstm = nn.LSTM(
            input_size=emb_dim,
            hidden_size=hidden_dim,
            num_layers=1,
            batch_first=True,
            bidirectional=True
        )
        self.hidden2tag = nn.Linear(hidden_dim * 2, num_labels)
        self.crf = CRF(num_labels)

    def forward(self, input_ids, tags=None, mask=None):
        embeds = self.embedding(input_ids)
        lstm_out, _ = self.lstm(embeds)
        emissions = self.hidden2tag(lstm_out)

        if tags is not None:
            # training: loss
            loss = self.crf(emissions, tags, mask)
            return loss
        else:
            # inference: decoded paths
            pred_paths = self.crf.decode(emissions, mask)
            return pred_paths


In [21]:
EMB_DIM    = 150
HIDDEN_DIM = 200

class BiLSTMCRF(nn.Module):
    def __init__(self, vocab_size, num_labels, pad_idx):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, EMB_DIM, padding_idx=pad_idx)
        self.lstm = nn.LSTM(
            input_size=EMB_DIM,
            hidden_size=HIDDEN_DIM,
            num_layers=1,
            batch_first=True,
            bidirectional=True
        )
        self.hidden2tag = nn.Linear(HIDDEN_DIM * 2, num_labels)
        self.crf = CRF(num_labels)

    def forward(self, input_ids, tags=None, mask=None):
        embeds = self.embedding(input_ids)
        lstm_out, _ = self.lstm(embeds)
        emissions = self.hidden2tag(lstm_out)

        if tags is not None:
            loss = self.crf(emissions, tags, mask)
            return loss
        else:
            return self.crf.decode(emissions, mask)

vocab_size = len(word2idx)
num_labels = len(labels)

bilstm_crf_model = BiLSTMCRF(
    vocab_size=vocab_size,
    num_labels=num_labels,
    pad_idx=PAD_IDX
).to(device)

optimizer = torch.optim.Adam(bilstm_crf_model.parameters(), lr=5e-4)


In [22]:
from seqeval.metrics import classification_report, f1_score

def train_epoch_crf(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0.0
    for batch in dataloader:
        input_ids = batch["input_ids"].to(device)
        labels    = batch["labels"].to(device)
        mask      = batch["mask"].to(device)

        optimizer.zero_grad()
        loss = model(input_ids, tags=labels, mask=mask)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        del input_ids, labels, mask, loss
        if device == "cuda":
            torch.cuda.empty_cache()
    return total_loss / len(dataloader)


def evaluate_ner_crf(model, dataloader, device, id2label):
    model.eval()
    all_true, all_pred = [], []

    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            labels    = batch["labels"].to(device)
            mask      = batch["mask"].to(device)

            pred_paths = model(input_ids, tags=None, mask=mask)

            for true_ids, pred_ids, m in zip(labels, pred_paths, mask):
                seq_len_i = m.sum().item()
                true_seq  = true_ids[:seq_len_i].cpu().numpy()

                t_tags = [id2label[int(t)] for t in true_seq]
                p_tags = [id2label[int(p)] for p in pred_ids]

                all_true.append(t_tags)
                all_pred.append(p_tags)

    f1 = f1_score(all_true, all_pred)
    return f1, all_true, all_pred


EPOCHS = 10
patience = 2
best_dev_f1 = 0.0
pat = 0

for epoch in range(1, EPOCHS + 1):
    train_loss = train_epoch_crf(bilstm_crf_model, train_loader, optimizer, device)
    dev_f1, _, _ = evaluate_ner_crf(bilstm_crf_model, dev_loader, device, id2label)

    print(f"Epoch {epoch}/{EPOCHS} - train loss: {train_loss:.4f}  dev F1: {dev_f1:.4f}")

    if dev_f1 > best_dev_f1:
        best_dev_f1 = dev_f1
        pat = 0
        torch.save(bilstm_crf_model.state_dict(), "bilstm_crf_best.pt")
        print("  -> new best model saved")
    else:
        pat += 1
        if pat >= patience:
            print("Early stopping triggered.")
            break


Epoch 1/10 - train loss: 13.0164  dev F1: 0.4423
  -> new best model saved
Epoch 2/10 - train loss: 6.0020  dev F1: 0.5599
  -> new best model saved
Epoch 3/10 - train loss: 3.5574  dev F1: 0.6247
  -> new best model saved
Epoch 4/10 - train loss: 2.1516  dev F1: 0.6386
  -> new best model saved
Epoch 5/10 - train loss: 1.2506  dev F1: 0.6578
  -> new best model saved
Epoch 6/10 - train loss: 0.7165  dev F1: 0.6617
  -> new best model saved
Epoch 7/10 - train loss: 0.4275  dev F1: 0.6628
  -> new best model saved
Epoch 8/10 - train loss: 0.2870  dev F1: 0.6630
  -> new best model saved
Epoch 9/10 - train loss: 0.2209  dev F1: 0.6607
Epoch 10/10 - train loss: 0.1810  dev F1: 0.6607
Early stopping triggered.


In [23]:
# En iyi ağırlıkları geri yükle
bilstm_crf_model.load_state_dict(torch.load("bilstm_crf_best.pt", map_location=device))
print("Best BiLSTM-CRF weights loaded.")

test_f1, test_true_tags, test_pred_tags = evaluate_ner_crf(
    bilstm_crf_model, test_loader, device, id2label
)

print("BiLSTM-CRF classification report:")
print(classification_report(test_true_tags, test_pred_tags, digits=4))
print("BiLSTM-CRF F1:", test_f1)


# --- MISC hariç F1 ---

def remove_misc(true_tags, pred_tags):
    new_true, new_pred = [], []
    for t_seq, p_seq in zip(true_tags, pred_tags):
        t2, p2 = [], []
        for t, p in zip(t_seq, p_seq):
            if "MISC" in t:
                continue
            t2.append(t)
            p2.append(p)
        new_true.append(t2)
        new_pred.append(p2)
    return new_true, new_pred

true_no_misc_crf, pred_no_misc_crf = remove_misc(test_true_tags, test_pred_tags)

print("\nBiLSTM-CRF (MISC hariç) classification report:")
print(classification_report(true_no_misc_crf, pred_no_misc_crf, digits=4))
print("BiLSTM-CRF F1 (no MISC):", f1_score(true_no_misc_crf, pred_no_misc_crf))


Best BiLSTM-CRF weights loaded.
BiLSTM-CRF classification report:
              precision    recall  f1-score   support

         LOC     0.8051    0.6511    0.7200      1072
        MISC     0.3902    0.4441    0.4154       340
         ORG     0.7637    0.6888    0.7243      1398
         PER     0.7187    0.7157    0.7172       721

   micro avg     0.7201    0.6593    0.6884      3531
   macro avg     0.6694    0.6249    0.6442      3531
weighted avg     0.7311    0.6593    0.6918      3531

BiLSTM-CRF F1: 0.6883500887049083

BiLSTM-CRF (MISC hariç) classification report:


  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

         LOC     0.8173    0.6511    0.7248      1072
        MISC     0.0000    0.0000    0.0000         0
         ORG     0.7829    0.6888    0.7329      1398
         PER     0.7247    0.7157    0.7202       721

   micro avg     0.7300    0.6822    0.7053      3191
   macro avg     0.5812    0.5139    0.5445      3191
weighted avg     0.7813    0.6822    0.7273      3191

BiLSTM-CRF F1 (no MISC): 0.7053296614288028
