In [828]:
import pandas as pd 
import numpy as np 
import datasets
import torch
import math
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import precision_score, recall_score, f1_score, classification_report
from torch.utils.data import TensorDataset
import time
from itertools import chain
from torch.nn.utils.rnn import pad_sequence
import torch.nn.functional as F
import copy
from torch.optim import lr_scheduler

In [48]:
import torch
import math
# this ensures that the current MacOS version is at least 12.3+
print(torch.backends.mps.is_available())
# this ensures that the current current PyTorch installation was built with MPS activated.
print(torch.backends.mps.is_built())

True
True


In [49]:
dtype = torch.float
device = torch.device("mps")

In [244]:
dataset = datasets.load_dataset("conll2003")

### Convert words/tokens to indices

In [683]:
import itertools
from collections import Counter

word_frequency = Counter(itertools.chain(*dataset['train']['tokens']))  # type: ignore

# Remove words below threshold 3
word_frequency = {
    word: frequency
    for word, frequency in word_frequency.items()
    if frequency >= 3
}

word2idx = {
    word: index
    for index, word in enumerate(word_frequency.keys(), start=2)
}

word2idx['[PAD]'] = 0
word2idx['[UNK]'] = 1

In [1005]:
dataset['train'][0]

{'id': '0',
 'tokens': ['EU',
  'rejects',
  'German',
  'call',
  'to',
  'boycott',
  'British',
  'lamb',
  '.'],
 'pos_tags': [22, 42, 16, 21, 35, 37, 16, 21, 7],
 'chunk_tags': [11, 21, 11, 12, 21, 22, 11, 12, 0],
 'ner_tags': [3, 0, 7, 0, 0, 0, 7, 0, 0],
 'input_ids': [2, 1, 3, 4, 5, 6, 7, 8, 9],
 'capitals': [2, 0, 1, 0, 0, 0, 1, 0, 3]}

In [1004]:
sample_tokens = dataset['train'][0]['tokens']
sample_tokens

['EU', 'rejects', 'German', 'call', 'to', 'boycott', 'British', 'lamb', '.']

In [1025]:
s = 'EU'
s = s.lower()
s

'eu'

In [1026]:
def convert_word_to_glove_ids(sample):
    tokens = sample['tokens']
    glove_ids =[]
    for token in tokens:
        token = token.lower()
        indices = np.where(vocab_npa == token)
        if indices[0].size > 0:
            index = indices[0][0]
        else:
            index = 1
        glove_ids.append(index)
    sample['glove_ids'] = glove_ids
    return sample
dataset = dataset.map(convert_word_to_glove_ids)


Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

KeyboardInterrupt: 

In [1012]:
dataset['train'][0]

{'id': '0',
 'tokens': ['EU',
  'rejects',
  'German',
  'call',
  'to',
  'boycott',
  'British',
  'lamb',
  '.'],
 'pos_tags': [22, 42, 16, 21, 35, 37, 16, 21, 7],
 'chunk_tags': [11, 21, 11, 12, 21, 22, 11, 12, 0],
 'ner_tags': [3, 0, 7, 0, 0, 0, 7, 0, 0],
 'input_ids': [2, 1, 3, 4, 5, 6, 7, 8, 9],
 'capitals': [2, 0, 1, 0, 0, 0, 1, 0, 3],
 'glove_ids': [1, 7580, 1, 582, 6, 5262, 1, 10240, 4]}

In [684]:
# the vocab size
vocab_size = max(word2idx.values())+1
vocab_size

8128

In [689]:
def convert_word_to_id(sample):
#Code to convert all tokens to their respective indexes
#If the token is unknown, we set index of 1
    input_ids = [ word2idx.get(token, 1) for token in sample['tokens'] ]

    sample['input_ids'] = input_ids
    return sample

dataset = dataset.map(convert_word_to_id)

Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

Map:   0%|          | 0/3453 [00:00<?, ? examples/s]

In [691]:
dataset['train'][20]

{'id': '20',
 'tokens': ['Rare',
  'Hendrix',
  'song',
  'draft',
  'sells',
  'for',
  'almost',
  '$',
  '17,000',
  '.'],
 'pos_tags': [22, 22, 21, 21, 42, 15, 30, 3, 11, 7],
 'chunk_tags': [11, 12, 12, 12, 21, 13, 11, 12, 12, 0],
 'ner_tags': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
 'input_ids': [1, 225, 1, 226, 227, 63, 228, 229, 1, 9]}

In [692]:
df_train = pd.DataFrame(dataset['train']).drop(columns=['pos_tags', 'chunk_tags', 'id', 'tokens'])
df_train.columns = ['label','input_ids']

df_test = pd.DataFrame(dataset['test']).drop(columns=['pos_tags', 'chunk_tags', 'id', 'tokens'])
df_test.columns = ['label','input_ids']

df_val = pd.DataFrame(dataset['validation']).drop(columns=['pos_tags', 'chunk_tags', 'id', 'tokens'])
df_val.columns = ['label','input_ids']

### Padding

In [328]:
import pandas as pd
import torch
from torch.utils.data import Dataset

# Create a custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        label = torch.tensor(self.data.loc[idx, "label"], dtype=torch.long)
        input_ids = torch.tensor(self.data.loc[idx, "input_ids"], dtype=torch.long)

        return input_ids, label

# Create an instance of the CustomDataset
dataset_train = CustomDataset(df_train)

# Example: Accessing a single sample
print(dataset_train[2])


(tensor([13, 14]), tensor([5, 0]))


In [329]:
def custom_collate(batch):
    # Separate input sequences and labels
    input_seqs, labels = zip(*batch)
    
    # Calculate the sequence lengths based on input sequences (assuming they have the same length as labels)
    sequence_lengths = [len(seq) for seq in input_seqs]

    # Sort input sequences and labels by sequence length (descending)
    sorted_seqs_and_labels = sorted(zip(input_seqs, labels), key=lambda x: len(x[0]), reverse=True)
    sorted_input_seqs, sorted_labels = zip(*sorted_seqs_and_labels)

    # Pad input sequences to the maximum length within the batch
    padded_input_seqs = pad_sequence(sorted_input_seqs, batch_first=True, padding_value=0)  # Use 0 as the padding value
    padded_labels = pad_sequence(sorted_labels, batch_first=True, padding_value=0)  # Use 0 as the padding value

    return padded_input_seqs, padded_labels


### Create dataloaders

In [676]:
def dataloader_generator(df,shuffle):
    dataset_from_df = CustomDataset(df)
    batch_size = 64
    dataloader = DataLoader(dataset_from_df, batch_size=batch_size, collate_fn=custom_collate, shuffle=shuffle)
    return dataloader

In [693]:
train_loader  = dataloader_generator(df_train,shuffle=True)
test_loader  = dataloader_generator(df_test,shuffle=False)
val_loader  = dataloader_generator(df_val,shuffle=False)

In [816]:
for batch in val_loader:
    input_val, target_val = batch
    break

### Glove Embedding 

In [31]:
# Define a function to load GloVe embeddings from a file
def load_glove_embeddings(file_path):
    embeddings_index = {}
    with open(file_path, encoding="utf-8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype="float32")
            embeddings_index[word] = coefs
    return embeddings_index

# Specify the path to your downloaded "glove.6B.100d.txt" file
glove_file_path = "glove.6B.100d"

# Load GloVe embeddings into memory
glove_embeddings = load_glove_embeddings(glove_file_path)

In [32]:
glove_embeddings['the'].shape

(100,)

### Building the model

In [340]:
!wget https://raw.githubusercontent.com/sighsmile/conlleval/master/conlleval.py

--2023-11-06 16:54:57--  https://raw.githubusercontent.com/sighsmile/conlleval/master/conlleval.py
正在解析主机 raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
正在连接 raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... 已连接。
已发出 HTTP 请求，正在等待回应... 200 OK
长度：7502 (7.3K) [text/plain]
正在保存至: “conlleval.py.1”


2023-11-06 16:54:57 (14.7 MB/s) - 已保存 “conlleval.py.1” [7502/7502])



In [341]:
from conlleval import evaluate 

In [696]:
class BiLSTMNER(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, num_layers, dropout):
        super(BiLSTMNER, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.bilstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, 
                              batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(hidden_dim * 2, output_dim)
        self.elu = nn.ELU()
        self.classifier = nn.Linear(output_dim, num_tags)  # num_tags is the number of unique NER tags

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.bilstm(x)
        x = self.dropout(x)
        x = self.linear(x)
        x = self.elu(x)
        x = self.classifier(x)
        return x

#initialize 
num_tags = 9
vocab_size = max(word2idx.values())+1

model = BiLSTMNER(vocab_size, 100, 256, 128, 1, 0.33) 
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_function = nn.CrossEntropyLoss()

#training
num_epochs = 20
print('start training')
for epoch in range(num_epochs):
    start_time = time.time()
    model.train()
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, targets = batch
        outputs = model(inputs)
        batch_size = inputs.size()[-1]    
        #From the instruction of CrossEntropy, we need to change the format of outputs 
        loss = loss_function(outputs.permute(0,2,1), targets) 
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    end_time = time.time()
    print(f'Epoch {epoch + 1}, Loss: {total_loss / len(train_loader)}, time: {end_time-start_time}s')
    print('validation error: ')
    precision, recall, f1 = eval(model, val_loader)



start training
Epoch 1, Loss: 0.23767325075512583, time: 45.66872692108154s
validation error: 
processed 152266 tokens with 5942 phrases; found: 2289 phrases; correct: 1248.
accuracy:  22.29%; (non-O)
accuracy:  95.45%; precision:  54.52%; recall:  21.00%; FB1:  30.32
              LOC: precision:  55.38%; recall:  38.65%; FB1:  45.53  1282
             MISC: precision:  43.48%; recall:   2.17%; FB1:   4.13  46
              ORG: precision:  40.45%; recall:   2.68%; FB1:   5.03  89
              PER: precision:  55.28%; recall:  26.17%; FB1:  35.52  872
Epoch 2, Loss: 0.11316225692968476, time: 45.273293256759644s
validation error: 
processed 152266 tokens with 5942 phrases; found: 4274 phrases; correct: 2818.
accuracy:  51.10%; (non-O)
accuracy:  97.06%; precision:  65.93%; recall:  47.43%; FB1:  55.17
              LOC: precision:  76.54%; recall:  57.38%; FB1:  65.59  1377
             MISC: precision:  64.45%; recall:  35.79%; FB1:  46.03  512
              ORG: precision:  51.53%;

In [469]:
#example of using evalute function
pred = [['O', 'O', 'B-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], ['O','O']]
true =  [['O', 'O', 'B-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], ['O','O']]
evaluate(itertools.chain(*true),itertools.chain(*pred), verbose=False)

(100.0, 100.0, 100.0)

In [444]:
# Example reversed_ner_tags dictionary
reversed_ner_tags = {
    0: 'O',
    1: 'B-PER',
    2: 'I-PER',
    3: 'B-ORG',
    4: 'I-ORG',
    5: 'B-LOC',
    6: 'I-LOC',
    7: 'B-MISC',
    8: 'I-MISC'
}

# Example tensor with shape (32, 36)
tensor = torch.randint(0, 9, (32, 36))  # Random integers between 0 and 8

# Map tensor elements using reversed_ner_tags
mapped_tensor = [[reversed_ner_tags[item.item()] for item in row] for row in tensor]

In [398]:
ner_tags = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8}

reversed_ner_tags = {value: key for key, value in ner_tags.items()}
reversed_ner_tags

{0: 'O',
 1: 'B-PER',
 2: 'I-PER',
 3: 'B-ORG',
 4: 'I-ORG',
 5: 'B-LOC',
 6: 'I-LOC',
 7: 'B-MISC',
 8: 'I-MISC'}

In [697]:
#evaluation
def eval(model, loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in loader:
            inputs, targets = batch
            outputs = model(inputs)
            _, preds = torch.max(outputs, -1)
            preds_converted = [[reversed_ner_tags[item.item()] for item in row] for row in preds]
            targets_converted = [[reversed_ner_tags[item.item()] for item in row] for row in targets]
            all_preds.extend(preds_converted)
            all_labels.extend(targets_converted)
    # all_preds = list(chain.from_iterable(all_preds))
    # all_labels = list(chain.from_iterable(all_labels))
    # all_labels = torch.cat(all_labels)
    all_preds = itertools.chain(*all_preds)    
    all_labels =itertools.chain(*all_labels)
    result = evaluate(all_labels, all_preds,verbose=True)
    precision, recall, f1 = result[0], result[1],result[2]
    return precision, recall, f1

print('Test: ')
precision, recall, f1 = eval(model, test_loader)

Test: 
processed 146937 tokens with 5648 phrases; found: 5235 phrases; correct: 3731.
accuracy:  71.18%; (non-O)
accuracy:  97.98%; precision:  71.27%; recall:  66.06%; FB1:  68.57
              LOC: precision:  81.99%; recall:  73.68%; FB1:  77.61  1499
             MISC: precision:  70.92%; recall:  61.82%; FB1:  66.06  612
              ORG: precision:  60.65%; recall:  62.91%; FB1:  61.76  1723
              PER: precision:  73.02%; recall:  63.27%; FB1:  67.79  1401


## Task 2: Glove Embedding

### Load Glove and customize the layer

In [699]:
# Define a function to load GloVe embeddings from a file
def load_glove_embeddings(file_path):
    embeddings_index = {}
    with open(file_path, encoding="utf-8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype="float32")
            embeddings_index[word] = coefs
    return embeddings_index

# Specify the path to your downloaded "glove.6B.100d.txt" file
glove_file_path = "glove.6B.100d"

# Load GloVe embeddings into memory
glove_embeddings = load_glove_embeddings(glove_file_path)

In [831]:
#convert glove into a layer
vocab,embeddings = [],[]
with open('glove.6B.100d',encoding="utf-8") as fi:
    full_content = fi.read().strip().split('\n')
for i in range(len(full_content)):
    i_word = full_content[i].split(' ')[0]
    i_embeddings = [float(val) for val in full_content[i].split(' ')[1:]]
    vocab.append(i_word)
    embeddings.append(i_embeddings)


In [832]:
vocab_npa = np.array(vocab)
embs_npa = np.array(embeddings)

#insert '<pad>' and '<unk>' tokens at start of vocab_npa.
vocab_npa = np.insert(vocab_npa, 0, '<pad>')
vocab_npa = np.insert(vocab_npa, 1, '<unk>')
print(vocab_npa[:10])

pad_emb_npa = np.zeros((1,embs_npa.shape[1]))   #embedding for '<pad>' token.
unk_emb_npa = np.mean(embs_npa,axis=0,keepdims=True)    #embedding for '<unk>' token.

#insert embeddings for pad and unk tokens at top of embs_npa.
embs_npa = np.vstack((pad_emb_npa,unk_emb_npa,embs_npa))
print(embs_npa.shape)

['<pad>' '<unk>' 'the' ',' '.' 'of' 'to' 'and' 'in' 'a']
(400002, 100)


array(['<pad>', '<unk>', 'the', ',', '.', 'of', 'to', 'and', 'in', 'a'],
      dtype='<U68')

In [877]:
import torch
my_embedding_layer = torch.nn.Embedding.from_pretrained(torch.from_numpy(embs_npa).float(),freeze=True)

assert my_embedding_layer.weight.shape == embs_npa.shape
print(my_embedding_layer.weight.shape)

torch.Size([400002, 100])


### Make Glove case-sensitive

In [858]:
#add features to the dataloader 
#case 0: lower case - no uppercase
#case 1: first word is uppercase
#case 2: whole word is uppeercase
#case 3: others: e.g. ","
def capital_case(word):
    if word.islower():
        return 0
    elif word.isupper():
        return 2
    elif word.istitle():
        return 1
    else: return 3

def convert_word_to_capital_case(sample):
    capitals = [capital_case(word) for word in sample['tokens'] ]
    sample['capitals'] =capitals
    return sample 

dataset = dataset.map(convert_word_to_capital_case)

Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

Map:   0%|          | 0/3453 [00:00<?, ? examples/s]

In [1023]:
dataset['train'][2]

{'id': '2',
 'tokens': ['BRUSSELS', '1996-08-22'],
 'pos_tags': [22, 11],
 'chunk_tags': [11, 12],
 'ner_tags': [5, 0],
 'input_ids': [12, 13],
 'capitals': [2, 3],
 'glove_ids': [1, 1]}

### Padding

In [1013]:
import pandas as pd
import torch
from torch.utils.data import Dataset

# Create a custom Dataset class
class CustomDataset(Dataset):
    
    def __init__(self,data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        label = torch.tensor(self.data[index]['ner_tags'], dtype=torch.long ) 
        glove_ids = torch.tensor(self.data[index]['glove_ids'], dtype=torch.long)
        capital = torch.tensor(self.data[index]['capitals'], dtype=torch.long)
        
        return label, glove_ids, capital

# Create an instance of the CustomDataset
dataset_train = CustomDataset(dataset['train'])
dataset_test = CustomDataset(dataset['test'])
dataset_val = CustomDataset(dataset['validation'])

# Example: Accessing a single sample
print(dataset_train[0])


(tensor([3, 0, 7, 0, 0, 0, 7, 0, 0]), tensor([    1,  7580,     1,   582,     6,  5262,     1, 10240,     4]), tensor([2, 0, 1, 0, 0, 0, 1, 0, 3]))


In [1022]:
dataset_train[4][1]

tensor([    1,    11,  2045,     6,     2,     1,     1,    11, 11915,   448,
            1,     1,    18,    15,     1,  2036,   191,   989,     1,    27,
          254,    70,    75,     1,   209,     2,  2442,  3242,    17, 14349,
            4])

In [1014]:
def custom_collate(batch):
    label, glove_ids, capital = zip(*batch)
    padded_label = pad_sequence(label, batch_first=True, padding_value=9 )
    padded_glove_ids = pad_sequence(glove_ids, batch_first=True, padding_value=0 )
    padded_capital = pad_sequence(capital, batch_first=True, padding_value=4 )
    return padded_glove_ids, padded_capital, padded_label
    

In [1015]:
batch_size = 32
train_loader = DataLoader(dataset_train, batch_size=batch_size, collate_fn= custom_collate, shuffle=True)
test_loader = DataLoader(dataset_test, batch_size=batch_size, collate_fn= custom_collate, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=batch_size, collate_fn= custom_collate, shuffle=True)

In [1017]:
class BiLSTMNER(nn.Module):
    def __init__(self,hidden_dim, output_dim, num_layers, dropout):
        super(BiLSTMNER, self).__init__()
        self.embedding = my_embedding_layer
        self.capital_layer = nn.Embedding(num_embeddings=5,embedding_dim=20,padding_idx=4)
        self.bilstm = nn.LSTM(input_size=120, hidden_size=hidden_dim, num_layers=num_layers, 
                              batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(hidden_dim * 2, output_dim,dtype=torch.float32)
        self.elu = nn.ELU()
        self.classifier = nn.Linear(output_dim, num_tags,dtype=torch.float32)  # num_tags is the number of unique NER tags

    def forward(self, x, capital):
        x = self.embedding(x.int())
        capital = self.capital_layer(capital.int())
        x = torch.cat([x, capital], dim=2)
        x, _ = self.bilstm(x)
        x = self.dropout(x)
        x = self.linear(x)
        x = self.elu(x)
        x = self.classifier(x)
        return x


#initialize 
num_tags = 9
# vocab_size = embs_npa.shape[0]
# embedding_dim = embs_npa.shape[1]
# embedding_dim= 120
model = BiLSTMNER(256,128, 1, 0.33) 
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_function = nn.CrossEntropyLoss(ignore_index=9)


#training
num_epochs = 30
print('start training')
for epoch in range(num_epochs):
    start_time = time.time()
    model.train()
    total_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        inputs, capitals ,targets = batch
        outputs = model(inputs, capitals)
        batch_size = inputs.size()[-1]    
        #From the instruction of CrossEntropy, we need to change the format of outputs 
        loss = loss_function(outputs.permute(0,2,1), targets) 
        # loss = loss_function(outputs.view(-1, 9), targets.view(-1).long()) 
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    end_time = time.time()
    print(f'Epoch {epoch + 1}, Loss: {total_loss / len(train_loader)}, time: {end_time-start_time}s')
    print('validation error: ')
    precision, recall, f1 = eval(model, val_loader)



start training
Epoch 1, Loss: 0.3167437568883961, time: 54.3510799407959s
validation error: 
processed 51362 tokens with 5942 phrases; found: 5751 phrases; correct: 3344.
accuracy:  61.20%; (non-O)
accuracy:  92.58%; precision:  58.15%; recall:  56.28%; FB1:  57.20
              LOC: precision:  75.47%; recall:  56.45%; FB1:  64.59  1374
             MISC: precision:  37.79%; recall:  49.35%; FB1:  42.80  1204
              ORG: precision:  43.88%; recall:  49.22%; FB1:  46.40  1504
              PER: precision:  71.42%; recall:  64.71%; FB1:  67.90  1669
Epoch 2, Loss: 0.19918248247448567, time: 58.02853608131409s
validation error: 
processed 51362 tokens with 5942 phrases; found: 5573 phrases; correct: 3700.
accuracy:  65.80%; (non-O)
accuracy:  93.57%; precision:  66.39%; recall:  62.27%; FB1:  64.26
              LOC: precision:  72.64%; recall:  64.89%; FB1:  68.55  1641
             MISC: precision:  56.85%; recall:  36.44%; FB1:  44.42  591
              ORG: precision:  58.49%;

KeyboardInterrupt: 

In [909]:
ner_tags = {'O': 0, 'B-PER': 1, 'I-PER': 2, 'B-ORG': 3, 'I-ORG': 4, 'B-LOC': 5, 'I-LOC': 6, 'B-MISC': 7, 'I-MISC': 8, '<PAD>':9}

reversed_ner_tags = {value: key for key, value in ner_tags.items()}
reversed_ner_tags

{0: 'O',
 1: 'B-PER',
 2: 'I-PER',
 3: 'B-ORG',
 4: 'I-ORG',
 5: 'B-LOC',
 6: 'I-LOC',
 7: 'B-MISC',
 8: 'I-MISC',
 9: '<PAD>'}

In [966]:
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for batch in val_loader:
        inputs, capitals ,targets = batch
        #get rid of paddings on targets
        label_unpad = targets
        mask = label_unpad != 9
        label_unpad = label_unpad[mask]
        
        outputs = model(inputs,capitals)
        _, preds = torch.max(outputs, -1)
        #get rid of paddings on pred
        preds = preds[mask]
        preds_converted = [reversed_ner_tags[elem.item()] for elem in preds]
        targets_converted = [reversed_ner_tags[elem.item()] for elem in label_unpad]
        all_preds.extend(preds_converted)
        all_labels.extend(targets_converted)


In [975]:
result = evaluate(all_labels, all_preds,verbose=True)

processed 51362 tokens with 5942 phrases; found: 5591 phrases; correct: 3379.
accuracy:  61.84%; (non-O)
accuracy:  92.99%; precision:  60.44%; recall:  56.87%; FB1:  58.60
              LOC: precision:  83.75%; recall:  49.65%; FB1:  62.34  1089
             MISC: precision:  50.00%; recall:  36.44%; FB1:  42.16  672
              ORG: precision:  49.43%; recall:  51.60%; FB1:  50.49  1400
              PER: precision:  59.22%; recall:  78.12%; FB1:  67.37  2430


In [973]:
len(all_labels)

51362

In [974]:
len(all_preds)

51362

In [916]:
sample_pred = all_preds[0]
sample_label = all_labels[0]

In [962]:
reversed_ner_tags

{0: 'O',
 1: 'B-PER',
 2: 'I-PER',
 3: 'B-ORG',
 4: 'I-ORG',
 5: 'B-LOC',
 6: 'I-LOC',
 7: 'B-MISC',
 8: 'I-MISC',
 9: '<PAD>'}

In [950]:
label_unpad1 = targets
mask = label_unpad1 != 9
label_unpad2 = label_unpad1[mask]
# outputs = model(data, capital)
# _, predicted = torch.max(outputs.data, 2)
# predicted = predicted[mask]


In [None]:
all_preds = itertools.chain(*all_preds)    
all_labels =itertools.chain(*all_labels)
result = evaluate(all_labels, all_preds,verbose=True)
precision, recall, f1 = result[0], result[1],result[2]

In [978]:
#evaluation
def eval(model, loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for batch in loader:
            inputs, capitals ,targets = batch
            #get rid of paddings on targets
            label_unpad = targets
            mask = label_unpad != 9
            label_unpad = label_unpad[mask]
            
            outputs = model(inputs,capitals)
            _, preds = torch.max(outputs, -1)
            #get rid of paddings on pred
            preds = preds[mask]
            
            preds_converted = [reversed_ner_tags[elem.item()] for elem in preds]
            targets_converted = [reversed_ner_tags[elem.item()] for elem in label_unpad]
            all_preds.extend(preds_converted)
            all_labels.extend(targets_converted)
    # all_preds = list(chain.from_iterable(all_preds))
    # all_labels = list(chain.from_iterable(all_labels))
    # all_labels = torch.cat(all_labels)
    # all_preds = itertools.chain(*all_preds)    
    # all_labels =itertools.chain(*all_labels)
    result = evaluate(all_labels, all_preds,verbose=True)
    precision, recall, f1 = result[0], result[1],result[2]
    return precision, recall, f1

print('Test: ')
precision, recall, f1 = eval(model, test_loader)

Test: 
processed 46435 tokens with 5648 phrases; found: 5666 phrases; correct: 4241.
accuracy:  78.39%; (non-O)
accuracy:  95.34%; precision:  74.85%; recall:  75.09%; FB1:  74.97
              LOC: precision:  79.18%; recall:  82.55%; FB1:  80.83  1739
             MISC: precision:  69.92%; recall:  62.25%; FB1:  65.86  625
              ORG: precision:  69.24%; recall:  71.28%; FB1:  70.25  1710
              PER: precision:  78.08%; recall:  76.87%; FB1:  77.47  1592
