In [1]:
import numpy as np
import pandas as pd
import jsonlines

from sklearn.metrics import f1_score

In [16]:
FILE_TEST_PRED = "preds/origin/out_test.jsonl"
FILE_TEST_TRUE = "scicite/data/acl-arc/test.jsonl"

FILE_DEV_PRED = "preds/origin/out_dev.jsonl"
FILE_DEV_TRUE = "scicite/data/acl-arc/dev.jsonl"

FILE_TRAIN_PRED = "preds/origin/out_train.jsonl"
FILE_TRAIN_TRUE = "scicite/data/acl-arc/train.jsonl"

In [17]:
Labels = ['Background', 'CompareOrContrast', 'Extends', 'Future', 'Motivation', 'Uses']

In [18]:
labels_dict = {}

In [19]:
for i, label in enumerate(Labels):
    labels_dict[label] = i

In [20]:
labels_dict

{'Background': 0,
 'CompareOrContrast': 1,
 'Extends': 2,
 'Future': 3,
 'Motivation': 4,
 'Uses': 5}

In [21]:
def get_f_score(true_file, pred_file):
    # true values
    true = dict()
    with jsonlines.open(true_file) as reader:
        for obj in reader:
            true[obj['citation_id']] = obj['intent']
    
    # true+pred values
    pairs = dict()
    with jsonlines.open(pred_file) as reader:
        for obj in reader:
            pairs[obj['citation_id']] = [true[obj['citation_id']], obj['prediction']]
    
    # encode
    y_true_pred = list(pairs.values())
    y = []
    for pair in y_true_pred:
        y.append([labels_dict[pair[0]], labels_dict[pair[1]]])
    y = np.array(y)
    y_true = y[:, 0]
    y_pred = y[:, 1]
    
    #result
    f_macro = f1_score(y_true, y_pred, average='macro')
    f_micro = f1_score(y_true, y_pred, average='micro')
    
    print(f"F_macro={f_macro}, F_micro={f_micro}, n_items={len(y_pred)}")
    return f_macro, f_micro

***DEV***

In [22]:
get_f_score(FILE_DEV_TRUE, FILE_DEV_PRED)

F_macro=0.7468576005374051, F_micro=0.7982456140350878, n_items=114


(0.7468576005374051, 0.7982456140350878)

***TEST***

In [23]:
get_f_score(FILE_TEST_TRUE, FILE_TEST_PRED)

F_macro=0.68523744281723, F_micro=0.7697841726618704, n_items=139


(0.68523744281723, 0.7697841726618704)

***TRAIN***

In [24]:
get_f_score(FILE_TRAIN_TRUE, FILE_TRAIN_PRED)

F_macro=0.9830034079930069, F_micro=0.9875592417061612, n_items=1688


(0.9830034079930069, 0.9875592417061612)

In [42]:
with jsonlines.open(FILE_TRAIN_TRUE) as reader:
    for obj in reader:
        break

In [43]:
obj.keys()

dict_keys(['text', 'citing_paper_id', 'cited_paper_id', 'citing_paper_year', 'cited_paper_year', 'citing_paper_title', 'cited_paper_title', 'cited_author_ids', 'citing_author_ids', 'extended_context', 'section_number', 'section_title', 'intent', 'cite_marker_offset', 'sents_before', 'sents_after', 'cleaned_cite_text', 'citation_id', 'citation_excerpt_index', 'section_name'])

In [44]:
obj['text']

'Thus , over the past few years , along with advances in the use of learning and statistical methods for acquisition of full parsers ( Collins , 1997 ; Charniak , 1997a ; Charniak , 1997b ; Ratnaparkhi , 1997 ) , significant progress has been made on the use of statistical learning methods to recognize shallow parsing patterns syntactic phrases or words that participate in a syntactic relationship ( Church , 1988 ; Ramshaw and Marcus , 1995 ; Argamon et al. , 1998 ; Cardie and Pierce , 1998 ; Munoz et al. , 1999 ; Punyakanok and Roth , 2001 ; Buchholz et al. , 1999 ; Tjong Kim Sang and Buchholz , 2000 ) .'

In [45]:
len(obj['sents_before'])

3

In [46]:
obj['sents_before'][0]

[{'index': 1,
  'word': 'Thus',
  'lemma': 'thus',
  'after': '',
  'pos': 'RB',
  'characterOffsetEnd': 375,
  'segment_span': [0, 63],
  'characterOffsetBegin': 371,
  'originalText': 'Thus',
  'ArgType': None,
  'before': ' '},
 {'index': 2,
  'word': ',',
  'lemma': ',',
  'after': ' ',
  'pos': ',',
  'characterOffsetEnd': 376,
  'segment_span': [0, 63],
  'characterOffsetBegin': 375,
  'originalText': ',',
  'ArgType': None,
  'before': ''},
 {'index': 3,
  'word': 'over',
  'lemma': 'over',
  'after': ' ',
  'pos': 'IN',
  'characterOffsetEnd': 381,
  'segment_span': [0, 63],
  'characterOffsetBegin': 377,
  'originalText': 'over',
  'ArgType': None,
  'before': ' '},
 {'index': 4,
  'word': 'the',
  'lemma': 'the',
  'after': ' ',
  'pos': 'DT',
  'characterOffsetEnd': 385,
  'segment_span': [0, 63],
  'characterOffsetBegin': 382,
  'originalText': 'the',
  'ArgType': None,
  'before': ' '},
 {'index': 5,
  'word': 'past',
  'lemma': 'past',
  'after': ' ',
  'pos': 'JJ',
  'ch

In [38]:
obj['sents_after'][0]

[{'index': 1,
  'word': 'In',
  'lemma': 'in',
  'after': ' ',
  'pos': 'IN',
  'characterOffsetEnd': 448,
  'segment_span': [0, 19],
  'characterOffsetBegin': 446,
  'originalText': 'In',
  'ArgType': None,
  'before': ' '},
 {'index': 2,
  'word': 'Variational',
  'lemma': 'Variational',
  'after': ' ',
  'pos': 'NNP',
  'characterOffsetEnd': 460,
  'segment_span': [0, 19],
  'characterOffsetBegin': 449,
  'originalText': 'Variational',
  'ArgType': None,
  'before': ' '},
 {'index': 3,
  'word': 'Bayesian',
  'lemma': 'Bayesian',
  'after': ' ',
  'pos': 'NNP',
  'characterOffsetEnd': 469,
  'segment_span': [0, 19],
  'characterOffsetBegin': 461,
  'originalText': 'Bayesian',
  'ArgType': None,
  'before': ' '},
 {'index': 4,
  'word': 'Inference',
  'lemma': 'Inference',
  'after': ' ',
  'pos': 'NNP',
  'characterOffsetEnd': 479,
  'segment_span': [0, 19],
  'characterOffsetBegin': 470,
  'originalText': 'Inference',
  'ArgType': None,
  'before': ' '},
 {'index': 5,
  'word': '('

In [35]:
obj['cleaned_cite_text']

'To solve these scaling issues , we implement Online Variational Bayesian Inference ( Hoffman et al. , 2010 ; @@CITATION ) for our models .'

In [36]:
obj['extended_context']

'Prior work using mLDA has used Gibbs Sampling to approximate the posterior , but we found this method did not scale with larger values of K , especially when applied to the relatively large deWaC corpus . To solve these scaling issues , we implement Online Variational Bayesian Inference ( Hoffman et al. , 2010 ; Hoffman et al. , 2012 ) for our models . In Variational Bayesian Inference ( VBI ) , one approximates the true posterior using simpler distributions with free variables .'