In [1]:
'''
    This block loads the saved models. For each model, there are -
    1. word_field_obj.pth
    2. tag_field_obj.pth
    3. char_field_obj.pth
    4. transformermodel.pth [Trained weights]
'''
import os
import time
import gensim
from collections import Counter

import torch
from torch import nn
from torch.optim import Adam
from torch.optim import AdamW
from spacy.lang.id import Indonesian
import gensim.models.keyedvectors as word2vec
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
import re
from seqeval.metrics import precision_score, recall_score, f1_score, classification_report

import math
import time
import gensim
import matplotlib.pyplot as plt

from torchtext.legacy.data import Field, NestedField, BucketIterator
from torchtext.legacy.datasets import SequenceTaggingDataset
from torchtext.legacy.vocab import Vocab
from torchcrf import CRF
from collections import Counter
from spacy.lang.id import Indonesian
import logging
import gc
import torch.utils
import torch.utils.checkpoint

import torch.nn.utils.prune as prune
import torch.nn.functional as F


from transformerModel import Transformer, PositionalEncoding

"""
    Load saved model
"""
modelPath = r"/home/muddi004/muddi/citationParser/savedmodel/220K/"
available_gpu = torch.cuda.is_available()
if available_gpu:
    print(f"GPU is available: {torch.cuda.get_device_name(0)}")
    logging.info(f"GPU is available: {torch.cuda.get_device_name(0)}")
    use_device = torch.device("cuda")
else:
    use_device = torch.device("cpu")

'''
    Load saved Torchtext field objects
'''
word_field = torch.load(modelPath + "word_field_obj.pth")
tag_field = torch.load(modelPath + "tag_field_obj.pth")
char_field = torch.load(modelPath +  "char_field_obj.pth")

# Make sure to call input = input.to(device) on any input tensors that you feed to the model
# use model.eval() before inference
print(len(tag_field.vocab))

unk_idx = word_field.vocab.stoi[word_field.unk_token]
print(unk_idx)

word_pad_idx = word_field.vocab.stoi[word_field.pad_token]
char_pad_idx = char_field.vocab.stoi[char_field.pad_token]
tag_pad_idx = tag_field.vocab.stoi[tag_field.pad_token]

model = Transformer(
    input_dim=len(word_field.vocab),
    embedding_dim=300,
    char_emb_dim=25, #37,  # NEWLY MODIFIED: TRANSFORMER
    char_input_dim=len(char_field.vocab),
    char_cnn_filter_num=4,  # NEWLY MODIFIED: TRANSFORMER
    char_cnn_kernel_size=3,
    char_lstm_hidden=50,
    attn_heads=16,  # NEWLY MODIFIED: TRANSFORMER
    fc_hidden=200,  # NEWLY MODIFIED: TRANSFORMER
    trf_layers=1,
    output_dim=len(tag_field.vocab),
    emb_dropout=0.5,
    cnn_dropout=0.25,
    lstm_dropout = 0.2,
    trf_dropout=0.1,  # NEWLY MODIFIED: TRANSFORMER
    fc_dropout=0.25,
    word_pad_idx=word_pad_idx,
    char_pad_idx=char_pad_idx,
    tag_pad_idx=tag_pad_idx,
    device=use_device
)

# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
    
#model = nn.DataParallel(model)
model.load_state_dict(torch.load(modelPath +  "transformermodel.pth")) 
model.to(use_device)

print(model)
# Make sure to call input = input.to(device) on any input tensors that you feed to the model
# use model.eval() before inference

'''
    Inference code-block Starts  
'''
def infer(model, tokens, device, true_tags=None, infer_unknown_sentence=True):
    model.eval()
    # tokenize sentence
    #nlp = Indonesian()
    #tokens = [token.text for token in nlp(sentence)]
    tokens = tokens
    max_word_len = max([len(token) for token in tokens])
    # transform to indices based on corpus vocab
    numericalized_tokens = [word_field.vocab.stoi[token.lower()] for token in tokens]
    numericalized_chars = []
    char_pad_id = char_pad_idx
    for token in tokens:
        numericalized_chars.append(
            [char_field.vocab.stoi[char] for char in token]
            + [char_pad_id for _ in range(max_word_len - len(token))]
        )
    # find unknown words
    unk_idx = word_field.vocab.stoi[word_field.unk_token]
    unks = [t for t, n in zip(tokens, numericalized_tokens) if n == unk_idx]
    # begin prediction
    token_tensor = torch.as_tensor(numericalized_tokens)
    token_tensor = token_tensor.unsqueeze(-1).to(device)  # NEWLY MODIFIED: GPU
    char_tensor = torch.as_tensor(numericalized_chars)
    char_tensor = char_tensor.unsqueeze(0).to(device)  # NEWLY MODIFIED: GPU
    predictions, _ = model(token_tensor, char_tensor)
    # convert results to tags
    predicted_tags = [tag_field.vocab.itos[t] for t in predictions[0]]
    # print inferred tags
    max_len_token = max([len(token) for token in tokens] + [len('word')])
    max_len_tag = max([len(tag) for tag in predicted_tags] + [len('pred')])

    if infer_unknown_sentence:
        print(
            f"{'word'.ljust(max_len_token)}\t{'unk'.ljust(max_len_token)}\t{'pred tag'.ljust(max_len_tag)}"
            + ("\ttrue tag" if true_tags else "")
        )
        logging.info(
            f"{'word'.ljust(max_len_token)}\t{'unk'.ljust(max_len_token)}\t{'pred tag'.ljust(max_len_tag)}"
            + ("\ttrue tag" if true_tags else "")
        )
        for i, token in enumerate(tokens):
            is_unk = "✓" if token in unks else ""
            print(
                f"{token.ljust(max_len_token)}\t{is_unk.ljust(max_len_token)}\t{predicted_tags[i].ljust(max_len_tag)}"
                + (f"\t{true_tags[i]}" if true_tags else "")
            )
            logging.info(
                f"{token.ljust(max_len_token)}\t{is_unk.ljust(max_len_token)}\t{predicted_tags[i].ljust(max_len_tag)}"
                + (f"\t{true_tags[i]}" if true_tags else "")
            )
    print(' ')
    return predicted_tags    
'''
    Inference code-block ends  
'''


Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


GPU is available: Tesla V100-SXM2-16GB
21
0
Model's state_dict:
embedding.weight 	 torch.Size([3000000, 300])
char_emb.weight 	 torch.Size([97, 25])
char_lstm.weight_ih_l0 	 torch.Size([200, 25])
char_lstm.weight_hh_l0 	 torch.Size([200, 50])
char_lstm.bias_ih_l0 	 torch.Size([200])
char_lstm.bias_hh_l0 	 torch.Size([200])
char_lstm.weight_ih_l0_reverse 	 torch.Size([200, 25])
char_lstm.weight_hh_l0_reverse 	 torch.Size([200, 50])
char_lstm.bias_ih_l0_reverse 	 torch.Size([200])
char_lstm.bias_hh_l0_reverse 	 torch.Size([200])
position_encoder.pe 	 torch.Size([5000, 1, 400])
encoder.layers.0.self_attn.in_proj_weight 	 torch.Size([1200, 400])
encoder.layers.0.self_attn.in_proj_bias 	 torch.Size([1200])
encoder.layers.0.self_attn.out_proj.weight 	 torch.Size([400, 400])
encoder.layers.0.self_attn.out_proj.bias 	 torch.Size([400])
encoder.layers.0.linear1.weight 	 torch.Size([2048, 400])
encoder.layers.0.linear1.bias 	 torch.Size([2048])
encoder.layers.0.linear2.weight 	 torch.Size([400, 

'\n    Inference code-block ends  \n'

In [4]:
'''
    This block reads the preprocessed text files [Test data] for sentences/tokens which we will infer
    with the trained model
'''
import os
import re
import pandas as pd

# f1_score = []
# def glueTokens(tokens, predicted_tags):
#     # For GIANT test-data: etities will be 11 tags:
#     # name, title, container-title, issue-date, issue-month, page, volume, issue, publisher, issn, source
#     predtag_length = len(predicted_tags)

#     # Get the tuples out of predicted tags and tokens
#     authors_tuple = []
#     title_tuple =[]
#     containertitle_tuple =[]
#     publish_date_tuple = []
#     #publish_month_tuple = []
#     page_tuple = []
#     volume_tuple = []
#     issue_tuple = []
#     publisher_tuple = []
#     issn_tuple = []
#     source_tuple = []
    
#     #beginTitle_idx = 0
#     #endTitle_idx = -1
#     for idx in range(len(predicted_tags)):
#         if "AUTHOR" in predicted_tags[idx]:
#             authors_tuple.append((tokens[idx],predicted_tags[idx]))            
#         elif "CT" in predicted_tags[idx]:
#             containertitle_tuple.append((tokens[idx],predicted_tags[idx]))        
#         elif "TITLE" in predicted_tags[idx]:
#             title_tuple.append((tokens[idx],predicted_tags[idx]))        
#             '''
#             if "B" in predicted_tags[idx]:
#                 beginTitle_idx = idx
#             if "I" in predicted_tags[idx]:
#                 if idx > endTitle_idx:
#                     endTitle_idx = idx
#             '''
#         elif "VOL" in predicted_tags[idx]:
#             volume_tuple.append((tokens[idx],predicted_tags[idx]))
#         elif "ISSUE" in predicted_tags[idx]:
#             issue_tuple.append((tokens[idx],predicted_tags[idx]))
#         elif "PAGE" in predicted_tags[idx]:
#             page_tuple.append((tokens[idx],predicted_tags[idx]))            
#         elif "DATE" in predicted_tags[idx]:
#             publish_date_tuple.append((tokens[idx],predicted_tags[idx]))        
#         elif "PUBLISHER" in predicted_tags[idx]:
#             publisher_tuple.append((tokens[idx],predicted_tags[idx]))            
#         elif "ISSN" in predicted_tags[idx]:
#             issn_tuple.append((tokens[idx],predicted_tags[idx]))
#         elif "SOURCE" in predicted_tags[idx]:
#             source_tuple.append((tokens[idx],predicted_tags[idx]))
                       
#     title = " ".join([title[0] for title in title_tuple])
#     containertitle = " ".join([containertitle[0] for containertitle in containertitle_tuple])
#     publisher = " ".join([publisher[0] for publisher in publisher_tuple])
#     source = " ".join([source[0] for source in source_tuple])
#     issn = " ".join([issn[0] for issn in issn_tuple])
#     issue = " ".join([issue[0] for issue in issue_tuple])
#     volume = " ".join([volume[0] for volume in volume_tuple])
#     page = " ".join([page[0] for page in page_tuple])
#     #publication_month = " ".join([publication_month[0] for publication_month in publish_month_tuple])
#     publication_year = " ".join([publication_year[0] for publication_year in publish_date_tuple])
#     authors = " ".join([author[0] for author in authors_tuple])
    
# #     ''' Managing author names. Considering everyname has a firstname'''
# #     # Store the index of firstnames from author-tuple
# #     firstname_idxs = []
# #     idx = 0
# #     for tpl in authors_tuple:
# #         if "FN" in tpl[1]:
# #             firstname_idxs.append(idx)
        
# #         idx = idx + 1
    
# #     #print(authors_tuple)
# #     #print(range(len(authors_tuple))[0:2])
# #     totalNames = len(firstname_idxs)
# #     for idx in range(totalNames):
# #         name = []
# #         print(firstname_idxs[idx])
# #         if idx+1 < totalNames:
# #             for idx_x in range(len(authors_tuple))[firstname_idxs[idx]:firstname_idxs[idx+1]]:
# #                 name.append(authors_tuple[idx_x][0])
# #         else:
# #             for idx_x in range(len(authors_tuple))[firstname_idxs[idx]:]:
# #                 name.append(authors_tuple[idx_x][0])
        
# #         authors.append(" ".join(name))
          
# #     authors = ",".join(authors)
    
    
#     print("Glued")
#     print("authors: {}".format(authors))
#     print("title: {}".format(title))
#     print("containertitle: {}".format(containertitle))
#     print("issue: {}".format(issue))
#     print("publisher: {}".format(publisher))
#     print("source: {}".format(source))
#     print("issn: {}".format(issn))
#     print("volume: {}".format(volume))
#     print("page: {}".format(page))
#     #print("publication_month: {}".format(publication_month))
#     print("publication_year: {}".format(publication_year))   
    
#     return authors, title, containertitle, issue, publisher, source, issn, volume, page, publication_year
    
            
def predictedtags(tokens,tags=None): # Tokens of one sentence
    # If you want to see [word-predicted tag-true tag] in a list manner of your reference string,
    # set 'infer_unknown_sentence' tag to True
    
    # If you just want the classfication report and do not want to check the predicction labels,
    # then set it false.
    tags = infer(model, tokens=tokens, device=use_device, true_tags=tags, infer_unknown_sentence = True)
    return tags # Predicted tags

def infersentence(sentence,tokens=[],true_tags=[]):    
    for wt_tuple in sentence:
        token = wt_tuple[0]
        tag = wt_tuple[1]
        tokens.append(token)
        true_tags.append(tag)
    
    # Get prediction for tokens of ONE sentence
    predicted_tags = predictedtags(tokens,true_tags) 
    
    return tokens, true_tags, predicted_tags

'''
    Parameter: sentences => List of sentences
    
    Each sentence is list of tuples => [Token-TrueTag] pairs
'''
def inferdata(sentences):
    predicted_tags_all = []
    for sentence in sentences:
        tokens = []
        true_tags = []
        
        for wt_tuple in sentence:
            token = wt_tuple[0]
            tag = wt_tuple[1]
            tokens.append(token)
            true_tags.append(tag)

        predicted_tags = predictedtags(tokens,true_tags)
        predicted_tags_all.append(predicted_tags)
        
    return predicted_tags_all


# ---- Entity Level F1 calculation | Starts ---- #

author = []
ct = []
date =  []
issue = []
page = []
pub = []
punc = []
title = []
vol = []

'''
    Parameter: report 
    Type: string 
    
    It seperates each line and get the f1 scores of each entity. Then accumulate the respective list.
'''
def report_to_df(report):
    report = [x.split(' ') for x in report.split('\n')]
    header = ['Class Name']+[x for x in report[0] if x!='']
    values = []
    for row in report[1:-5]:
        row = [value for value in row if value!='']
        if len(row)>0:            
            if(row[0] == 'AUTHOR'):
                author.append(float(row[3]))
            elif (row[0] == 'CT'):
                ct.append(float(row[3]))
            elif (row[0] == 'DATE'):
                date.append(float(row[3]))
            elif (row[0] == 'ISSUE'):
                issue.append(float(row[3]))
            elif (row[0] == 'PAGE'):
                page.append(float(row[3]))
            elif (row[0] == 'PUBLISHER'):
                pub.append(float(row[3]))
            elif (row[0] == 'PUNC'):
                punc.append(float(row[3]))  
            elif (row[0] == 'TITLE'):
                title.append(float(row[3]))
            elif (row[0] == 'VOL'):
                vol.append(float(row[3]))
    return 1

# ---- Entity Level F1 calculation | Ends ---- #


# Testing source: CORA
sum_f1 = 0.0
os.chdir(r"/home/muddi004/muddi/citationParser/CORA/holdouts/") # This goes to holdout directory where preprocessed test files (CORA) are situated  

# Iterate over the Cora sample files to evaluate
for index in range(10):    
    file_name = 'cora_test_sample_' + str(index+1) + '.txt'
    
    #if file_name != 'cora_test_sample_2.txt':
    #    continue
        
    print('Current CORA Sample: {}'.format(file_name))
    testing_file = open(file_name, encoding="utf8")

    wt_tuple_list = []
    words = []
    tags = []
    
    # Each sentence is a list of word-label pairs
    sentences = [] 

    current_run_f1 = 0.0
    for line in testing_file:
        word_tag = line.split('\t')
        word = word_tag[0].rstrip()

        if len(word_tag) <= 1:
            # On every sentence break (empty line), reset wt_tuple_list
            sentences.append(wt_tuple_list) 
            wt_tuple_list = []
            continue
        else:
            tag = word_tag[1].rstrip()

        words.append(word)
        tags.append(tag)

        wt_tuple = (word,tag)
        if wt_tuple != ('','\n'):
            if wt_tuple != ('',''):
                wt_tuple_list.append(wt_tuple)


    print("Total sentence: {}".format(len(sentences)))
    predicted_lables = inferdata(sentences)

    #Token level performance
    true_labels = [[wt_tuple[1] for wt_tuple in s] for s in sentences]

    
    current_run_f1 = f1_score(true_labels, predicted_lables).item()
    print('Current F1: {}'.format(current_run_f1))
    # Sum up F1 on each iteration to get Avg later
    sum_f1 = sum_f1 + current_run_f1 
    
    print("Precision-score: {:.1%}".format(precision_score(true_labels, predicted_lables)))
    print("Recal-score: {:.1%}".format(recall_score(true_labels, predicted_lables)))    
    print("F1-score: {:.1%}".format(f1_score(true_labels, predicted_lables)))
    print("Classification-report")
    print(classification_report(true_labels, predicted_lables))
    
    # Sum up each entity level F1 score to get Avg later after the Loop
    report = classification_report(true_labels, predicted_lables)
    report_to_df(report)
    

print("Average F1-score: {:.1%}".format(sum_f1/10))
print("Author f1:", sum(author)/len(author))
print("CT f1:", sum(ct)/len(ct))
print("date f1:", sum(date)/len(date))
# print("ISUUE f1:", sum(issue)/len(issue))
print("PAGE f1:", sum(page)/len(page))
print("PUB f1:", sum(pub)/len(pub))
print("punc f1:", sum(punc)/len(punc))
print("title f1:", sum(title)/len(title))
print("vol f1:", sum(vol)/len(vol))

Current CORA Sample: cora_test_sample_2.txt
Total sentence: 50
word         	unk          	pred tag	true tag
K            	             	B-AUTHOR	B-AUTHOR
.            	✓            	B-PUNC  	B-PUNC
H            	             	I-AUTHOR	I-AUTHOR
.            	✓            	B-PUNC  	B-PUNC
Wolf         	             	I-AUTHOR	I-AUTHOR
,            	✓            	B-PUNC  	B-PUNC
K            	             	I-AUTHOR	I-AUTHOR
.            	✓            	B-PUNC  	B-PUNC
Froitzheim   	✓            	I-AUTHOR	I-AUTHOR
and          	✓            	I-AUTHOR	I-AUTHOR
P            	             	I-AUTHOR	I-AUTHOR
.            	✓            	B-PUNC  	B-PUNC
Schulthess   	✓            	I-AUTHOR	I-AUTHOR
,            	✓            	B-PUNC  	B-PUNC
(            	✓            	B-PUNC  	B-PUNC
1995         	✓            	B-DATE  	B-DATE
)            	✓            	B-PUNC  	B-PUNC
Multimedia   	             	B-TITLE 	B-TITLE
Application  	             	I-TITLE 	I-TITLE
Sharing      	             	I-TITLE 	

In [6]:
'''
# Entity Level Performance

import utility

pred_author = []
true_author = []

for sentence in sentences[0:2]:
    for sentence in sentences:
        tokens = []
        true_tags = []
        
        for wt_tuple in sentence:
            token = wt_tuple[0]
            tag = wt_tuple[1]
            tokens.append(token)
            true_tags.append(tag)
        #tokens, true_tags = infersentence(sentence)
        predicted_tags = predictedtags(tokens,true_tags)
    print(sentence)
    tokens, true_tags, predicted_tags = infersentence(sentence)
    pauthors, ptitle, pcontainertitle, pissue, ppublisher, psource, pissn, pvolume, ppage, ppublication_year = glueTokens(tokens, predicted_tags)
    tauthors, ttitle, tcontainertitle, tissue, tpublisher, tsource, tissn, tvolume, tpage, tpublication_year = glueTokens(tokens, true_tags)
    
    # Author
    pred_author.append(pauthors)
    true_author.append(tauthors)

print("True Len: {}".format(len(true_author)))
print("Pred Len: {}".format(len(pred_author)))
print("True Author {}".format(true_author))
print("Pred Author {}".format(pred_author))
print("Author precision-score: {:.1%}".format(precision_score(true_author, pred_author)))
#cosine_value = utility.calculate_cosine_between_strings(fooditem.upper(),descriptionOnList.upper())
print(sentences[0])
'''

x shape:torch.Size([35, 1, 400])
pe shape: torch.Size([35, 1, 400])
x shape:torch.Size([42, 1, 400])
pe shape: torch.Size([42, 1, 400])
x shape:torch.Size([21, 1, 400])
pe shape: torch.Size([21, 1, 400])
x shape:torch.Size([19, 1, 400])
pe shape: torch.Size([19, 1, 400])
x shape:torch.Size([21, 1, 400])
pe shape: torch.Size([21, 1, 400])
x shape:torch.Size([30, 1, 400])
pe shape: torch.Size([30, 1, 400])
x shape:torch.Size([51, 1, 400])
pe shape: torch.Size([51, 1, 400])
x shape:torch.Size([19, 1, 400])
pe shape: torch.Size([19, 1, 400])
x shape:torch.Size([16, 1, 400])
pe shape: torch.Size([16, 1, 400])
x shape:torch.Size([58, 1, 400])
pe shape: torch.Size([58, 1, 400])
x shape:torch.Size([95, 1, 400])
pe shape: torch.Size([95, 1, 400])
x shape:torch.Size([47, 1, 400])
pe shape: torch.Size([47, 1, 400])
x shape:torch.Size([32, 1, 400])
pe shape: torch.Size([32, 1, 400])
x shape:torch.Size([31, 1, 400])
pe shape: torch.Size([31, 1, 400])
x shape:torch.Size([43, 1, 400])
pe shape: torch

TypeError: Found input variables without list of list.

In [5]:
'''
    Unknown citaiotion string testing zone
'''
'''
import re
def reference_string_tokenizerA(citationString):

    charPattern = r"[\w\d]"
    noCharPattern = r"[\W]"
    endPtr = 0
    word = ""
    tokens = []
    while endPtr < len(citationString):
      if re.search(charPattern, citationString[endPtr]):
        word += citationString[endPtr]
      elif re.search(noCharPattern, citationString[endPtr]):
        if word is not "":
          tokens.append(word)   
        tokens.append(citationString[endPtr])
        word = ""

      endPtr = endPtr + 1
    
    tokensWithoutSpace = []
    for item in tokens:
        if item is not " ":
            tokensWithoutSpace.append(item)
    print(tokensWithoutSpace)
    return tokensWithoutSpace

sentence = "Lomolino, M. et al. (2013) ‘Of mice and mammoths: generality and antiquity of the island rule’, Journal of Biogeography, 40(8), pp. 1427-1439. Available at:"
print("Original reference string: {}".format(sentence))
#tags = ["O", "O", "O", "O", "O", "O", "O", "B-PERSON", "I-PERSON", "L-PERSON", "O", "O", "B-ORGANIZATION", "I-ORGANIZATION", "I-ORGANIZATION", "L-ORGANIZATION", "O", "O", "O", "O", "O", "O", "O", "O", "U-PERSON", "O", "O", "O", "O", "B-TIME", "I-TIME", "I-TIME", "I-TIME", "I-TIME", "I-TIME", "I_TIME", "L-TIME", "O"]
tokens = reference_string_tokenizerA(sentence)
predicted_tags = infer(model, tokens=tokens, device=use_device, infer_unknown_sentence=True)

print("Parse metadata from unknown reference string")
glueTokens(tokens=tokens, predicted_tags=predicted_tags)
'''
'''
          [500,  1K,  10K,  22K,  100K, 220K, 342K, 1M]
Avg:      [84.7,87.4,87.7, 92.2,  93.5, 94.2, 93.6, 93.4]    
Author:   [94.2,95.3,96.0, 97.9,  98.0, 98.8, 98.8, 98.5]
CT:       [40.7,48.7,53.0, 56.1,  60.2, 62.1, 61.3, 60.1]
Date:     [48.4,61.0,48.5, 83.1,  91.2, 87.7, 84.5, 86.0]
Page:     [53.5,62.4,60.6, 71.1,  81.6, 84.4, 77.5, 76.2]
Publisher:[48.2,62.2,59.0, 81.7,  87.3, 91.4, 92.9, 89.59]
Punc:     [99.6,99.0,100,  100 ,  100 ,  100, 100 , 100 ]
Title:    [65.9,72.4,73.5, 87.5,  87.8, 90.7, 90.2, 91.2]
Vol:      [39.4,46.6,39.5, 51.5,  54.9, 65.2, 62.5, 58.4]



After 8th epoch on 1M:

Average F1-score: 93.4%
Author f1: 0.9850000000000001
CT f1: 0.6010000000000001
date f1: 0.8600000000000001
PAGE f1: 0.7629999999999999
PUB f1: 0.8959999999999999
punc f1: 1.0
title f1: 0.9120000000000001
vol f1: 0.5840000000000001
'''

Original reference string: Lomolino, M. et al. (2013) ‘Of mice and mammoths: generality and antiquity of the island rule’, Journal of Biogeography, 40(8), pp. 1427-1439. Available at:
['Lomolino', ',', 'M', '.', 'et', 'al', '.', '(', '2013', ')', '‘', 'Of', 'mice', 'and', 'mammoths', ':', 'generality', 'and', 'antiquity', 'of', 'the', 'island', 'rule', '’', ',', 'Journal', 'of', 'Biogeography', ',', '40', '(', '8', ')', ',', 'pp', '.', '1427', '-', '1439', '.', 'Available', 'at', ':']
x shape:torch.Size([43, 1, 400])
pe shape: torch.Size([43, 1, 400])
word        	unk         	pred tag
Lomolino    	✓           	B-AUTHOR
,           	✓           	B-PUNC  
M           	            	I-AUTHOR
.           	✓           	B-PUNC  
et          	            	I-AUTHOR
al          	            	I-AUTHOR
.           	✓           	B-PUNC  
(           	✓           	B-PUNC  
2013        	✓           	B-CT    
)           	✓           	B-PUNC  
‘           	✓           	B-PUNC  
Of          	✓        

('', '', '', '8', '', '', '', '40', '1427', '', '1439')

In [3]:
from seqeval.metrics import accuracy_score
from seqeval.metrics import classification_report
from seqeval.metrics import f1_score
y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
print(f1_score(y_true, y_pred))
print(classification_report(y_true, y_pred))


0.5
              precision    recall  f1-score   support

        MISC       0.00      0.00      0.00         1
         PER       1.00      1.00      1.00         1

   micro avg       0.50      0.50      0.50         2
   macro avg       0.50      0.50      0.50         2
weighted avg       0.50      0.50      0.50         2



Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [4]:
from seqeval.metrics import accuracy_score
from seqeval.metrics import classification_report
from seqeval.metrics import f1_score
y_true = [['B-CT', 'I-CT', 'I-CT','I-CT', 'O'], ['B-TITLE', 'I-TITLE', 'O']]
y_pred = [['B-CT', 'I-CT', 'I-CT','I-TITLE','O'], ['B-TITLE', 'I-TITLE', 'O']]
print(f1_score(y_true, y_pred))
print(classification_report(y_true, y_pred))

0.4
              precision    recall  f1-score   support

          CT       0.00      0.00      0.00         1
       TITLE       0.50      1.00      0.67         1

   micro avg       0.33      0.50      0.40         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2

