In [8]:
import pandas as pd

## Dataset exploration

In [33]:
from allennlp.data.dataset_readers.dataset_reader import DatasetReader
from allennlp.common import Params
import os
import logging
from scicite.dataset_readers.citation_data_reader_scicite import SciciteDatasetReader
from allennlp.data.instance import Instance

from typing import Dict, Iterable, Tuple

logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
os.environ['SEED'] = '21016'
os.environ['PYTORCH_SEED'] = str(int(os.environ['SEED']) // 3)
os.environ['NUMPY_SEED'] = str(int(os.environ['PYTORCH_SEED']) // 3)
os.environ["elmo"] = "true"

In [35]:
parameter_filename = "./experiment_configs/scicite-experiment-0.05-0.05.json"
overrides = ""
params = Params.from_file(parameter_filename, overrides)

dataset_reader = DatasetReader.from_params(params.pop('dataset_reader'))


03/18/2024 00:55:44 - INFO - allennlp.common.from_params -   instantiating class <class 'allennlp.data.dataset_readers.dataset_reader.DatasetReader'> from params {'multilabel': False, 'type': 'scicite_datasetreader', 'use_sparse_lexicon_features': False, 'with_elmo': 'true'} and extras {}
03/18/2024 00:55:44 - INFO - allennlp.common.params -   dataset_reader.type = scicite_datasetreader
03/18/2024 00:55:44 - INFO - allennlp.common.params -   dataset_reader.lazy = False
03/18/2024 00:55:44 - INFO - allennlp.common.from_params -   instantiating class <class 'allennlp.data.tokenizers.tokenizer.Tokenizer'> from params {} and extras {}
03/18/2024 00:55:44 - INFO - allennlp.common.params -   dataset_reader.tokenizer.type = word
03/18/2024 00:55:44 - INFO - allennlp.common.from_params -   instantiating class <class 'allennlp.data.tokenizers.word_tokenizer.WordTokenizer'> from params {} and extras {}
03/18/2024 00:55:44 - INFO - allennlp.common.params -   dataset_reader.tokenizer.start_tokens 

In [36]:
# params.as_dict()

In [37]:
validation_dataset_reader_params = params.pop("validation_dataset_reader", None)
validation_and_test_dataset_reader: DatasetReader = dataset_reader
if validation_dataset_reader_params is not None:
    logger.info("Using a separate dataset reader to load validation and test data.")
    validation_and_test_dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)

train_data_path = params.pop('train_data_path')
logger.info("Reading training data from %s", train_data_path)
train_data = dataset_reader.read(train_data_path)

datasets: Dict[str, Iterable[Instance]] = {"train": train_data}

03/18/2024 00:55:45 - INFO - allennlp.common.params -   validation_dataset_reader = None
03/18/2024 00:55:45 - INFO - allennlp.common.params -   train_data_path = scicite_data/train.jsonl
03/18/2024 00:55:45 - INFO - __main__ -   Reading training data from scicite_data/train.jsonl
8243it [00:02, 3577.75it/s]


In [51]:
train_data[0].fields

{'citation_text': <allennlp.data.fields.text_field.TextField at 0x7f5b492a1b70>,
 'labels': <allennlp.data.fields.label_field.LabelField at 0x7f5b4929f908>,
 'year_diff': <allennlp.data.fields.array_field.ArrayField at 0x7f5b4929f668>,
 'citing_paper_id': <allennlp.data.fields.metadata_field.MetadataField at 0x7f5b4929f1d0>,
 'cited_paper_id': <allennlp.data.fields.metadata_field.MetadataField at 0x7f5b4929fb00>,
 'citation_excerpt_index': <allennlp.data.fields.metadata_field.MetadataField at 0x7f5b4929fdd8>,
 'citation_id': <allennlp.data.fields.metadata_field.MetadataField at 0x7f5b4929f4e0>}

In [48]:
dir(train_data[0])

['__abstractmethods__',
 '__args__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__extra__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__next_in_mro__',
 '__orig_bases__',
 '__origin__',
 '__parameters__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__tree_hash__',
 '__weakref__',
 '_abc_cache',
 '_abc_generic_negative_cache',
 '_abc_generic_negative_cache_version',
 '_abc_negative_cache',
 '_abc_negative_cache_version',
 '_abc_registry',
 '_gorg',
 'add_field',
 'as_tensor_dict',
 'count_vocab_items',
 'fields',
 'get',
 'get_padding_lengths',
 'index_fields',
 'indexed',
 'items',
 'keys',
 'values']

## JSONL

In [120]:
with open(f"./scicite_data/dev.jsonl", encoding="utf8") as f:
    lines = f.read().splitlines()
    lines = [json.loads(x) for x in lines]

In [16]:
df_dev = pd.DataFrame(lines)

In [148]:
df_dev

Unnamed: 0,source,citeEnd,sectionName,citeStart,string,label,label2,citingPaperId,citedPaperId,isKeyCitation,id,unique_id,excerpt_index,label_confidence,label2_confidence
0,explicit,68.0,Discussion,64.0,These results are in contrast with the finding...,result,supportive,8f1fbe460a901d994e9b81d69f77bfbe32719f4c,5e413c7872f5df231bf4a4f694504384560e98ca,False,8f1fbe460a901d994e9b81d69f77bfbe32719f4c>5e413...,8f1fbe460a901d994e9b81d69f77bfbe32719f4c>5e413...,0,,
1,explicit,241.0,Discussion,222.0,…nest burrows in close proximity of one anothe...,background,,d9f3207db0c79a3b154f3875c9760cc6b056904b,2cc6ff899bf17666ad35893524a4d61624555ed7,False,d9f3207db0c79a3b154f3875c9760cc6b056904b>2cc6f...,d9f3207db0c79a3b154f3875c9760cc6b056904b>2cc6f...,10,0.7337,
2,explicit,94.0,. 6 Discussion,71.0,This is clearly in contrast to the results of ...,result,supportive,226f798d30e5523c5b9deafb826ddb04d47c11dc,,False,226f798d30e5523c5b9deafb826ddb04d47c11dc>None,226f798d30e5523c5b9deafb826ddb04d47c11dc>None_0,0,,
3,explicit,170.0,,148.0,"…in a subset of alcoholics (Chen et al., 2004;...",background,,59dba7cd80edcce831d20b35f9eb597bba290154,273996fbf99465211eb8306abe8c56c5835f332e,False,59dba7cd80edcce831d20b35f9eb597bba290154>27399...,59dba7cd80edcce831d20b35f9eb597bba290154>27399...,0,1.0000,
4,explicit,89.0,DISCUSSION,85.0,This result is consistent with the conclusions...,result,not_supportive,0640f6e098a9d241cd680473e8705357ae101e04,e33da0584b8db37816d510fd9ba7c1216858fd5f,False,0640f6e098a9d241cd680473e8705357ae101e04>e33da...,0640f6e098a9d241cd680473e8705357ae101e04>e33da...,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
911,explicit,99.0,Discussion,71.0,Our results are consistent with those of a pre...,result,not_supportive,d9ef5d1cb543d0f1330908b36f7106f23cbad404,ae7c65d4d7710bf5f36309faea886648f9a51d4a,False,d9ef5d1cb543d0f1330908b36f7106f23cbad404>ae7c6...,d9ef5d1cb543d0f1330908b36f7106f23cbad404>ae7c6...,0,,
912,explicit,136.0,1. Introduction,129.0,Some of these peptides act as neurotoxins on t...,background,,d937bc4d0722d5b45366b3c4dfde4732224bc048,9200a75534f44836ca7651c9d63d11b884947fa6,True,d937bc4d0722d5b45366b3c4dfde4732224bc048>9200a...,d937bc4d0722d5b45366b3c4dfde4732224bc048>9200a...,4,1.0000,
913,explicit,150.0,4. Discussion,144.0,"Therefore, despite an apparent higher number o...",background,,3f50975c58d861e4fbd3b4fd065f0658b1aa1e10,d16a1d95e6947da69797bb0cb59148057174e35a,True,3f50975c58d861e4fbd3b4fd065f0658b1aa1e10>d16a1...,3f50975c58d861e4fbd3b4fd065f0658b1aa1e10>d16a1...,0,1.0000,
914,explicit,28.0,INTRODUCTION,13.0,According to Xu et al (2011) the factors that ...,method,,d776ade6bb4898c032b971d2cec145976408e838,22e3889f93c19c15b746c1339ce9d7439ccb632e,False,d776ade6bb4898c032b971d2cec145976408e838>22e38...,d776ade6bb4898c032b971d2cec145976408e838>22e38...,0,,


In [149]:
with open(f"./output_pretrained_test.txt", encoding="utf8") as f:
    output_pretrained = f.read()
    output_pretrained = "[" + output_pretrained + "]"
    # output_pretrained = [json.loads(x) for x in output_pretrained]
    output_pretrained = json.loads(output_pretrained.replace('\n', ', ')[:-3]+']')

In [150]:
df_dev_predict = pd.DataFrame(output_pretrained)

In [151]:
# df_dev_predict.dropna(subset=['prediction'])
df_dev_predict = df_dev_predict[~(df_dev_predict['prediction'] == '')]

In [152]:
# df_dev_merged = df_dev.merge(df_dev_predict, how='left', left_on="unique_id", right_on="unique_id").dropna(subset=['prediction'])

In [153]:
# def compare_res(df_row):
#     if df_row["label"] == df_row["prediction"]:
#         return True
#     else:
#         return False

In [154]:
# df_dev_merged["correct_pred"] = df_dev_merged.apply(compare_res, axis=1)

In [155]:
# df_dev_merged[df_dev_merged["correct_pred"] == True]["correct_pred"].count()

In [156]:
# df_dev_merged["correct_pred"].count()

In [157]:
from sklearn.metrics import classification_report, f1_score

In [158]:
print(classification_report(df_dev_predict['label'], df_dev_predict['prediction']))

              precision    recall  f1-score   support

  background       0.88      0.87      0.88       992
      method       0.88      0.81      0.85       605
      result       0.71      0.86      0.78       259

   micro avg       0.85      0.85      0.85      1856
   macro avg       0.82      0.85      0.83      1856
weighted avg       0.86      0.85      0.85      1856



In [159]:
print(f1_score(df_dev_predict['label'], df_dev_predict['prediction'], average='macro'))
# df_dev_predict['prediction'].unique()

0.8328905208191819


In [127]:
# df_dev_merged[df_dev_merged['prediction']!= df_dev_merged['prediction']]

## Experiment results

In [146]:
lambda_pairs = [(0,0), (0.05,0.05), (0.1,0.1), (0.1, 0.2), (0.1, 0.3), (0.2, 0.2), (0.3, 0.3)]

for lamb in lambda_pairs:
    print(lamb)
    with open(f"./experiments-_{lamb[0]}_{lamb[1]}/metrics.json", encoding="utf8") as f:
        metrics = f.read()
        metrics = json.loads(metrics)
    print(metrics['best_validation_average_F1'])

(0, 0)
0.8273689041357627
(0.05, 0.05)
0.8294245106130006
(0.1, 0.1)
0.8131806035859791
(0.1, 0.2)
0.8185485261755949
(0.1, 0.3)
0.8077549709636348
(0.2, 0.2)
0.8216554689057259
(0.3, 0.3)
0.8186008842495505


In [145]:
metrics

'{\n  "training_duration": "01:41:35",\n  "training_start_epoch": 0,\n  "training_epochs": 9,\n  "epoch": 9,\n  "training_background_P": 0.9970467176708602,\n  "training_background_R": 0.9971858501755937,\n  "training_background_F1": 0.9971162790697174,\n  "training_method_P": 0.995141385676805,\n  "training_method_R": 0.9951897381079636,\n  "training_method_F1": 0.9951655613050057,\n  "training_result_P": 0.9949530634904613,\n  "training_result_R": 0.9942505547710309,\n  "training_result_F1": 0.9946016850814293,\n  "training_average_F1": 0.9956278418187176,\n  "training_aux-sec--introduction_P": 0.09836065573770492,\n  "training_aux-sec--introduction_R": 0.00021122298106033936,\n  "training_aux-sec--introduction_F1": 0.00042154073137274127,\n  "training_aux-sec--conclusion_P": 0.3947169811320755,\n  "training_aux-sec--conclusion_R": 0.03441242268719568,\n  "training_aux-sec--conclusion_F1": 0.0633056950916756,\n  "training_aux-sec--experiments_P": 0.18673724452286428,\n  "training_aux