In [116]:
import sys
sys.path.insert(0,'..')
import json
from abc import abstractmethod, ABC
from collections import OrderedDict
from logging import Logger
from typing import List
from tqdm import tqdm
from transformers import BertTokenizer

from spert import util, models, prediction,  sampling
from spert.entities import Dataset, EntityType, RelationType, Entity, Relation, Document
from spert.opt import spacy
from spert.evaluator import Evaluator
from spert.input_reader import JsonInputReader, BaseInputReader
from spert.loss import SpERTLoss, Loss
from spert.trainer import BaseTrainer
from spert.models import SpERT,SpROB, SpLONG

import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from logging import raiseExceptions
import math
import os
from pathlib import Path
from typing import Type
from unittest import case

import torch
from torch.nn import DataParallel
from torch.optim import Optimizer
import transformers
from torch.utils.data import DataLoader
from transformers import AdamW, BertConfig,RobertaConfig,LongformerConfig  
from transformers import BertTokenizer,RobertaTokenizer,LongformerTokenizer
from transformers import BertModel, RobertaModel,LongformerModel
from transformers import BertPreTrainedModel, RobertaPreTrainedModel,LongformerPreTrainedModel


# Log Extraction

We find all log directories that appear both in the data/save and data/log directories. We then extract arguments (args.json) of the run and last epoch macro/micro scores (last row of eval_valid.csv). If the eval_valid.csv file is missing we discard the run altogether. 

The code below assembles a pandas data frame of all run IDs that are names of both log and save directories.

In [134]:
LOGS = Path('../data/log')
SAVES = Path('../data/save')

logLabels = {x.name : x for x in LOGS.iterdir() if x.is_dir()}
#print(logLabels)
logRunLabels = {x.name : L for L,D in logLabels.items() for x in D.iterdir() if x.is_dir()}
logRunPaths = {x.name :  x for L,D in logLabels.items() for x in D.iterdir() if x.is_dir()}
runDF = pd.DataFrame([logRunLabels.keys(),logRunLabels.values(),logRunPaths.values()],columns=logRunLabels.keys(),index=['run','label','logPath']).T
saveLabels = {x.name : x for x in SAVES.iterdir() if x.is_dir()}
#print(logLabels)
saveRunLabels = {x.name : L for L,D in saveLabels.items() for x in D.iterdir() if x.is_dir() if x.name in logRunLabels.keys()}
saveRunPaths =  {x.name :  x for L,D in saveLabels.items() for x in D.iterdir() if x.is_dir() if x.name in logRunLabels.keys()}
runDF = runDF.join(pd.DataFrame([saveRunPaths.keys(),saveRunPaths.values()],columns=logRunLabels.keys(),index=['runSave','savePath']).T,how='left').drop(columns = ['runSave'])
runDF

Unnamed: 0,run,label,logPath,savePath
2022-03-22_10.17.46.145076,2022-03-22_10.17.46.145076,scierc_bert_train,../data/log/scierc_bert_train/2022-03-22_10.17...,../data/save/scierc_bert_train/2022-03-22_10.1...
2022-03-22_10.46.00.854943,2022-03-22_10.46.00.854943,scierc_bert_train,../data/log/scierc_bert_train/2022-03-22_10.46...,../data/save/scierc_bert_train/2022-03-22_10.4...
2022-03-22_09.51.47.931037,2022-03-22_09.51.47.931037,scierc_bert_train,../data/log/scierc_bert_train/2022-03-22_09.51...,../data/save/scierc_bert_train/2022-03-22_09.5...
2022-03-22_17.10.24.480132,2022-03-22_17.10.24.480132,scierc_rob_train,../data/log/scierc_rob_train/2022-03-22_17.10....,../data/save/scierc_rob_train/2022-03-22_17.10...
2022-03-22_17.58.14.707227,2022-03-22_17.58.14.707227,scierc_rob_train,../data/log/scierc_rob_train/2022-03-22_17.58....,../data/save/scierc_rob_train/2022-03-22_17.58...
2022-03-22_17.36.49.251748,2022-03-22_17.36.49.251748,scierc_rob_train,../data/log/scierc_rob_train/2022-03-22_17.36....,../data/save/scierc_rob_train/2022-03-22_17.36...
2022-03-22_15.13.18.446042,2022-03-22_15.13.18.446042,scierc_rob_train,../data/log/scierc_rob_train/2022-03-22_15.13....,../data/save/scierc_rob_train/2022-03-22_15.13...
2022-03-22_16.17.36.028741,2022-03-22_16.17.36.028741,scierc_rob_train,../data/log/scierc_rob_train/2022-03-22_16.17....,../data/save/scierc_rob_train/2022-03-22_16.17...
2022-03-22_12.42.15.274505,2022-03-22_12.42.15.274505,scierc_rob_train,../data/log/scierc_rob_train/2022-03-22_12.42....,../data/save/scierc_rob_train/2022-03-22_12.42...
2022-03-22_16.16.10.286131,2022-03-22_16.16.10.286131,scierc_rob_train,../data/log/scierc_rob_train/2022-03-22_16.16....,../data/save/scierc_rob_train/2022-03-22_16.16...


For each of the log directories selected above, we extract the arguments (`args.json`) and the eval_valid.csv. We discard directories that don't contain eval_valid.csv (indicating incomplete runs). We create a data frame with all arguments, run info from the previous data frame and the scores of the last run. There is one row for each Run-ID. 

We show some of the columns below

In [179]:
argList= []
for run in runDF.itertuples():
    with open(run.logPath.joinpath('args.json')) as A:
        D = json.load(A)
    D['label']=run.label
    D['logPath'] = run.logPath
    D['savePath'] = run.savePath
    D['runID'] = run.run
    
    evPath = run.logPath.joinpath('eval_valid.csv')
    if evPath.exists():
        D.update(pd.read_csv(evPath,sep=';').iloc[-1].to_dict())
        argList.append(D)
#data/log/scierc_rob_train/2022-03-22_16.49.15.916860/eval_valid.csv
argDF = pd.DataFrame.from_dict(argList).drop(columns=['store_predictions',	'store_examples','tokenizer_path']).reset_index()
argDF[:][['label','runID','ner_f1_macro','rel_f1_macro','rel_nec_f1_macro','train_batch_size', 'epochs', 'neg_entity_count', 'neg_relation_count', 'lr', 'weight_decay',  'lowercase', 'model_path', 'rel_filter_threshold', 'prop_drop']]


Unnamed: 0,label,runID,ner_f1_macro,rel_f1_macro,rel_nec_f1_macro,train_batch_size,epochs,neg_entity_count,neg_relation_count,lr,weight_decay,lowercase,model_path,rel_filter_threshold,prop_drop
0,scierc_bert_train,2022-03-22_10.17.46.145076,69.027011,47.519995,39.019091,4,20,100,100,6e-05,0.02,True,allenai/scibert_scivocab_uncased,0.4,0.15
1,scierc_bert_train,2022-03-22_10.46.00.854943,67.22729,42.934329,30.85308,4,20,100,100,6e-05,0.02,False,bert-base-cased,0.4,0.15
2,scierc_bert_train,2022-03-22_09.51.47.931037,70.308508,49.589145,38.588136,4,20,100,100,6e-05,0.02,False,allenai/scibert_scivocab_cased,0.4,0.15
3,scierc_rob_train,2022-03-22_17.10.24.480132,67.316155,42.998732,30.314092,4,20,100,100,4e-05,0.15,False,allenai/biomed_roberta_base,0.4,0.25
4,scierc_rob_train,2022-03-22_17.58.14.707227,64.967055,42.488559,30.390785,8,40,125,100,5e-05,0.15,False,allenai/biomed_roberta_base,0.4,0.25
5,scierc_rob_train,2022-03-22_17.36.49.251748,67.186608,42.142987,29.096455,6,30,100,100,4e-05,0.15,False,allenai/biomed_roberta_base,0.4,0.25
6,scierc_rob_train,2022-03-22_15.13.18.446042,66.783234,42.914476,29.339126,4,20,100,100,5e-05,0.15,False,allenai/biomed_roberta_base,0.4,0.25
7,scierc_rob_train,2022-03-22_12.42.15.274505,67.049817,44.869976,32.329741,4,20,100,100,5e-05,0.1,False,allenai/biomed_roberta_base,0.4,0.2
8,scierc_rob_train,2022-03-22_13.52.36.286048,66.768695,40.855688,28.105331,4,20,100,100,3e-05,0.2,False,allenai/biomed_roberta_base,0.4,0.2
9,scierc_rob_train,2022-03-22_16.49.15.916860,65.829467,38.183752,26.887889,4,20,100,100,5e-05,0.15,False,roberta-base,0.4,0.25


In [180]:
def best_run(df=None,groupingLabel = 'label', 
    maxMetrics = ['ner_f1_macro','rel_f1_macro',	'rel_nec_f1_macro']):
    '''
    Function to extract best runs from a pandas dataframe of all runs 
    found in the standard directories. 
    '''
    res = pd.DataFrame()
    for metric in maxMetrics:
        idx=df.groupby(by='label')[metric].idxmax()
        df1 = df.loc[idx,['label','runID']+maxMetrics]
        df1['maximize'] = metric
        res = pd.concat([res,df1],axis=0)
    return res

The best runs are identified below. The label is the one assigned to each run that tracks dataset and model type. The metrics shown are those based on which we make the choice of best run. The column "maximize" indicates which of the list of metrics was used to select the row. 

In [183]:
best = best_run(argDF)
best

Unnamed: 0,label,runID,ner_f1_macro,rel_f1_macro,rel_nec_f1_macro,maximize
2,scierc_bert_train,2022-03-22_09.51.47.931037,70.308508,49.589145,38.588136,ner_f1_macro
21,scierc_elec_train,2022-03-22_20.15.48.486074,67.948684,42.290068,30.97763,ner_f1_macro
10,scierc_rob_train,2022-03-22_13.01.05.757922,67.571642,44.340069,35.433664,ner_f1_macro
2,scierc_bert_train,2022-03-22_09.51.47.931037,70.308508,49.589145,38.588136,rel_f1_macro
16,scierc_elec_train,2022-03-22_18.25.20.499416,66.454233,43.435618,35.455048,rel_f1_macro
7,scierc_rob_train,2022-03-22_12.42.15.274505,67.049817,44.869976,32.329741,rel_f1_macro
0,scierc_bert_train,2022-03-22_10.17.46.145076,69.027011,47.519995,39.019091,rel_nec_f1_macro
16,scierc_elec_train,2022-03-22_18.25.20.499416,66.454233,43.435618,35.455048,rel_nec_f1_macro
10,scierc_rob_train,2022-03-22_13.01.05.757922,67.571642,44.340069,35.433664,rel_nec_f1_macro


We now look at the columns of this dataframe and identify colummn names that have more than one value. If all the rows have the same value in a column, then the column is not interesting in terms of hyperparameter selection. We then select most significant columns from the original dataframe for the experiments that appear in the best-list.

In [193]:
varCols = [col for col in argDF.columns if len(set(argDF[col])) > 1 ]
lmt = varCols.index('label') # we discard anything on the right of column label (addidional metrics)
print(varCols)
argDF.iloc[list(set(best.index))][['label','runID','ner_f1_macro','rel_f1_macro','rel_nec_f1_macro']+varCols[:lmt]].drop(columns=['config','model_type','index'])

['index', 'train_batch_size', 'epochs', 'neg_entity_count', 'neg_relation_count', 'lr', 'weight_decay', 'config', 'lowercase', 'model_path', 'model_type', 'rel_filter_threshold', 'prop_drop', 'label', 'logPath', 'savePath', 'runID', 'ner_prec_micro', 'ner_rec_micro', 'ner_f1_micro', 'ner_prec_macro', 'ner_rec_macro', 'ner_f1_macro', 'rel_prec_micro', 'rel_rec_micro', 'rel_f1_micro', 'rel_prec_macro', 'rel_rec_macro', 'rel_f1_macro', 'rel_nec_prec_micro', 'rel_nec_rec_micro', 'rel_nec_f1_micro', 'rel_nec_prec_macro', 'rel_nec_rec_macro', 'rel_nec_f1_macro', 'epoch', 'global_iteration']


Unnamed: 0,label,runID,ner_f1_macro,rel_f1_macro,rel_nec_f1_macro,train_batch_size,epochs,neg_entity_count,neg_relation_count,lr,weight_decay,lowercase,model_path,rel_filter_threshold,prop_drop
0,scierc_bert_train,2022-03-22_10.17.46.145076,69.027011,47.519995,39.019091,4,20,100,100,6e-05,0.02,True,allenai/scibert_scivocab_uncased,0.4,0.15
2,scierc_bert_train,2022-03-22_09.51.47.931037,70.308508,49.589145,38.588136,4,20,100,100,6e-05,0.02,False,allenai/scibert_scivocab_cased,0.4,0.15
7,scierc_rob_train,2022-03-22_12.42.15.274505,67.049817,44.869976,32.329741,4,20,100,100,5e-05,0.1,False,allenai/biomed_roberta_base,0.4,0.2
10,scierc_rob_train,2022-03-22_13.01.05.757922,67.571642,44.340069,35.433664,4,20,100,100,5e-05,0.2,False,allenai/biomed_roberta_base,0.4,0.2
16,scierc_elec_train,2022-03-22_18.25.20.499416,66.454233,43.435618,35.455048,4,20,100,100,6e-05,0.01,False,google/electra-base-discriminator,0.5,0.1
21,scierc_elec_train,2022-03-22_20.15.48.486074,67.948684,42.290068,30.97763,4,20,125,100,6e-05,0.02,False,kamalkraj/bioelectra-base-discriminator-pubmed,0.5,0.1


Unnamed: 0,label,runID,ner_f1_macro,rel_f1_macro,rel_nec_f1_macro,train_batch_size,epochs,neg_entity_count,neg_relation_count,lr,weight_decay,lowercase,model_path,rel_filter_threshold,prop_drop
0,scierc_bert_train,2022-03-22_10.17.46.145076,69.027011,47.519995,39.019091,4,20,100,100,6e-05,0.02,True,allenai/scibert_scivocab_uncased,0.4,0.15
2,scierc_bert_train,2022-03-22_09.51.47.931037,70.308508,49.589145,38.588136,4,20,100,100,6e-05,0.02,False,allenai/scibert_scivocab_cased,0.4,0.15
7,scierc_rob_train,2022-03-22_12.42.15.274505,67.049817,44.869976,32.329741,4,20,100,100,5e-05,0.1,False,allenai/biomed_roberta_base,0.4,0.2
10,scierc_rob_train,2022-03-22_13.01.05.757922,67.571642,44.340069,35.433664,4,20,100,100,5e-05,0.2,False,allenai/biomed_roberta_base,0.4,0.2
16,scierc_elec_train,2022-03-22_18.25.20.499416,66.454233,43.435618,35.455048,4,20,100,100,6e-05,0.01,False,google/electra-base-discriminator,0.5,0.1
21,scierc_elec_train,2022-03-22_20.15.48.486074,67.948684,42.290068,30.97763,4,20,125,100,6e-05,0.02,False,kamalkraj/bioelectra-base-discriminator-pubmed,0.5,0.1
