In [1]:
# Set a seed value
seed_value= 42
# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)
# 2. Set `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)
# 3. Set `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)
# 4. Set `pytorch` pseudo-random generator at a fixed value
import torch
torch.manual_seed(seed_value)
torch.backends.cudnn.deterministic = True

In [2]:
data_path = "/sbksvol/gaurav/NER_data/"

In [3]:
ENT = "Cellline"
DATASET = "cll"

In [4]:
import os
data_dir = os.path.join(data_path, ENT, DATASET)

In [5]:
# !pip install pytorch-crf

In [6]:
from torchcrf import CRF

## Prepare Data

In [7]:
import json

files = ["train", "dev", "test"]


def convert(lines, f):
    tokens_ = []
    tags_ = []

    data = {"words": [], "ner": []}

    for line in lines:
        line = line.strip()
        if len(line) == 0:
            data["words"].append(tokens_)
            data["ner"].append(tags_)
            tokens_ = []
            tags_ = []
        else:
            token, tag = line.split("\t")
            if len(tag) > 1:
                tag = tag.split("-")[0]
            tokens_.append(token.strip())
            tags_.append(tag.strip())
            
    if len(tokens_) > 0:
        data["words"].append(tokens_)
        data["ner"].append(tags_)

    return data

In [8]:
def writer(data, fp, add_str=""):

    for (tokens, tags) in zip(data["words"], data["ner"]):
        for (token, tag) in zip(tokens, tags):
            if tag == "B" or tag == "I":
                tag += add_str
            fp.write("{}\t{}\n".format(token, tag))
        fp.write("\n")

In [9]:
## convert all tsv files to txt

all_data = {}
for f in files:
    with open(os.path.join(data_dir, f + ".tsv"), "r") as fp:
        lines = fp.readlines()
        all_data[f] = convert(lines, fp)
    fp = open(os.path.join(data_dir, f + ".txt"), "w")
    writer(all_data[f], fp)
    fp.close()

In [10]:
num_train_sents = len(all_data["train"]["words"])
num_dev_sents = len(all_data["dev"]["words"])
num_test_sents = len(all_data["test"]["words"])
print(num_train_sents, num_dev_sents, num_test_sents)

121 22 61


In [11]:
import pandas as pd

# add the index to keep track of sentences
train_tuples = []
for i,(tokens,tags) in enumerate(zip(all_data["train"]["words"],all_data["train"]["ner"])):
    for token,tag in zip(tokens,tags):
        train_tuples.append([i,token,tag])

test_tuples = []
for i,(tokens,tags) in enumerate(zip(all_data["test"]["words"],all_data["test"]["ner"])):
    for token,tag in zip(tokens,tags):
        test_tuples.append([i,token,tag])
    
train_df = pd.DataFrame(train_tuples, columns=['sentence_id', 'words', 'labels'])
test_df = pd.DataFrame(test_tuples, columns=['sentence_id', 'words', 'labels'])

In [12]:
print(test_df.head(10))

   sentence_id       words labels
0            0          By      O
1            0    Northern      O
2            0        blot      O
3            0    analysis      O
4            0           ,      O
5            0         the      O
6            0  expression      O
7            0          of      O
8            0          IL      O
9            0           -      O


In [13]:
import numpy as np

In [14]:
# a list that has all possible labels 
labels = np.sort(train_df['labels'].unique()).tolist()
label_map =  {i: label for i, label in enumerate(labels)}
num_labels = len(labels)
print(labels)

['B', 'I', 'O']


## Model Definition

In [15]:
model_args = dict()

# Path to pretrained model or model identifier from huggingface.co/models
model_args['model_name_or_path'] = 'dmis-lab/biobert-base-cased-v1.1'
# saved_model_path
# saved_model_path
# pytorch_dump_path
# 'dmis-lab/biobert-base-cased-v1.1'

# Where do you want to store the pretrained models downloaded from s3
model_args['cache_dir'] = "/sbksvol/gaurav/NER_out/"

# we skip basic white-space tokenization by passing do_basic_tokenize = False to the tokenizer
model_args['do_basic_tokenize'] = False


data_args = dict()

data_args['data_dir'] = data_dir

# "The maximum total input sequence length after tokenization. Sequences longer "
# "than this will be truncated, sequences shorter will be padded."
data_args['max_seq_length'] = 256

# Overwrite the cached training and evaluation sets
# this means the model does not have to tokenize/preprocess and cache the data each time it's called
# this can be made different for each NerDataset (training NerDataset, testing NerDataset)
data_args['overwrite_cache'] = True

In [16]:
import transformers
transformers.__version__

'4.4.2'

In [17]:
import torch
# device = torch.device('cpu')

In [18]:
from transformers import (
    BertConfig,
    BertTokenizer
)

config = BertConfig.from_pretrained(
    model_args['model_name_or_path'],
    num_labels=num_labels,
    id2label=label_map,
    label2id={label: i for i, label in enumerate(labels)},
    cache_dir=model_args['cache_dir']
)

# we skip basic white-space tokenization by passing do_basic_tokenize = False to the tokenizer
tokenizer = BertTokenizer.from_pretrained(
    model_args['model_name_or_path'],
    cache_dir=model_args['cache_dir']
#     ,do_basic_tokenize = model_args['do_basic_tokenize']
)



## Create Dataset Objects

In [19]:
data_utils_path = "/sbksvol/gaurav/transformers/examples/token-classification/"

In [20]:
import sys
if data_utils_path not in sys.path:
    sys.path.append(data_utils_path)

In [21]:
from utils_ner import NerDataset, Split
# %reset_selective -f "utils_ner"
# NerDataset.__init__

In [22]:
train_dataset = NerDataset(
  data_dir=data_args['data_dir'],
  tokenizer=tokenizer,
  labels=labels,
  model_type=config.model_type,
  max_seq_length=data_args['max_seq_length'],
  overwrite_cache=data_args['overwrite_cache'], # True
  mode=Split.train)

In [23]:
eval_dataset = NerDataset(
  data_dir=data_args['data_dir'],
  tokenizer=tokenizer,
  labels=labels,
  model_type=config.model_type,
  max_seq_length=data_args['max_seq_length'],
  overwrite_cache=data_args['overwrite_cache'],
  mode=Split.dev)

In [24]:
print(train_dataset.__len__(), eval_dataset.__len__())

120 21


## Train top-model using the Trainer API

In [25]:
# from models import FullyConnectedLayers

In [26]:
from transformers import BertPreTrainedModel, BertModel
from transformers.modeling_outputs import TokenClassifierOutput
import torch.nn as nn
import torch.nn.functional as F

In [27]:
class BertNERTopModel(BertPreTrainedModel):

    _keys_to_ignore_on_load_unexpected = [r"pooler"]

    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config, add_pooling_layer=False)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
#         self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        hidden_units_list=[500, 250, 125]
#         activations_list = ["none", "none", "none", "none"]

        hid1, hid2, hid3 = hidden_units_list
        self.fc1 = nn.Linear(config.hidden_size, hid1)
        self.fc2 = nn.Linear(hid1, hid2)
        self.fc3 = nn.Linear(hid2, hid3)
        self.fc4 = nn.Linear(hid3, config.num_labels)
        
        self.crf = CRF(config.num_labels, batch_first=True)

#         self.classifier = FullyConnectedLayers(hidden_units_list, activations_list,
#                                                config.hidden_size, config.num_labels)


        ## 0-hidden layers ##
#         self.fc1 = nn.Linear(config.hidden_size, config.num_labels)


        ## 1-hidden layer ##
#         self.fc1 = nn.Linear(config.hidden_size, 250)
#         self.fc2 = nn.Linear(250, config.num_labels)

        print("Initializing weights")
        self.init_weights()
        

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
            Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels -
            1]``.
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        sequence_output = outputs[0]

        sequence_output = self.dropout(sequence_output)
        
#         logits = self.classifier(sequence_output)

        x = sequence_output
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        
        logits = x

        loss = None
        if labels is not None:
            ## the tokens whose labels == -100 denote padding tokens
            ## since they are ignored for loss calculation and because crf cannot accept label values other 
            ## than 0, 1, ... num_tags-1, we just set all the pad token indices to 2 instead of -100
            labels_copy = labels.detach().clone()
            labels_copy[labels_copy == -100] = 2
            loss = -self.crf.forward(logits, labels_copy, attention_mask.type(torch.uint8), reduction="mean")
#             loss_fct = nn.CrossEntropyLoss()
#             # Only keep active parts of the loss
#             if attention_mask is not None:
#                 active_loss = attention_mask.view(-1) == 1
#                 active_logits = logits.view(-1, self.num_labels)
#                 active_labels = torch.where(
#                     active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
#                 )
                
#                 loss = loss_fct(active_logits, active_labels)
#             else:
#                 print("Labels None")
#                 loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return TokenClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

In [28]:
# config.top_model_only

In [29]:
# config

## First freeze bert weights and train

In [30]:


model = BertNERTopModel.from_pretrained(
    model_args['model_name_or_path'],
    config=config
    ,cache_dir=model_args['cache_dir']
)

## base_model -> bert (excluding the classification layer)
for param in model.base_model.parameters():
    param.requires_grad = False


model.train()

Initializing weights


Some weights of the model checkpoint at dmis-lab/biobert-base-cased-v1.1 were not used when initializing BertNERTopModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertNERTopModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertNERTopModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertNERTopModel were not initialized from the model checkpoint at dmis-lab

BertNERTopModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tr

## Create Trainer

In [31]:
from transformers.hf_argparser import HfArgumentParser
from transformers import TrainingArguments
from transformers import Trainer

In [32]:
import json

training_args_dict = {
    'output_dir' : "model_output/",
    'num_train_epochs' : 20,
    'train_batch_size': 32,
    "save_strategy": "epoch",
    "evaluation_strategy": "epoch"
#     ,
#     "load_best_model_at_end": True
}

with open('training_args.json', 'w') as fp:
    json.dump(training_args_dict, fp)
    
parser = HfArgumentParser(TrainingArguments)
# this function returns a tuple so we get the first item in the tuple since we only passed one arguement type "TrainingArguments"
training_args = parser.parse_json_file(json_file="training_args.json")[0]

## Train

In [33]:
# Initialize the Trainer
trainer = Trainer(
  model=model,
  args=training_args,
  train_dataset=train_dataset,
  eval_dataset=eval_dataset
)

trainOutput = trainer.train()
# trainer.save_model()

Epoch,Training Loss,Validation Loss,Runtime,Samples Per Second
1,No log,59.282146,0.4655,45.115
2,No log,48.672626,0.4593,45.725
3,No log,33.185772,0.4604,45.613
4,No log,17.980986,0.457,45.954
5,No log,9.539531,0.4608,45.572
6,No log,6.969814,0.4585,45.804
7,No log,5.941916,0.4606,45.594
8,No log,5.141191,0.4613,45.522
9,No log,4.474339,0.4627,45.381
10,No log,3.931677,0.4719,44.497


In [34]:
list(model.parameters())[-1].data

tensor([[0.0533, 0.0881, 0.0246],
        [0.0301, 0.0743, 0.0117],
        [0.0033, 0.0089, 0.0249]], device='cuda:0')

## Now reload the model from saved checkpoint

In [35]:
num_steps = trainOutput.global_step # 17880
checkpoint = f"checkpoint-{num_steps}"
top_model_path = f"{training_args_dict['output_dir']}/{checkpoint}" 

# model_output/checkpoint-17880

#### Config ####
config = BertConfig.from_pretrained(
    top_model_path,
    num_labels=num_labels,
    id2label=label_map,
    label2id={label: i for i, label in enumerate(labels)},
    cache_dir=model_args['cache_dir']
)

#### Model ####

reloaded_model = BertNERTopModel.from_pretrained(
    top_model_path,
    config=config,
    cache_dir=model_args['cache_dir']
)

Initializing weights


In [36]:
list(reloaded_model.parameters())[-1].data

tensor([[0.0533, 0.0881, 0.0246],
        [0.0301, 0.0743, 0.0117],
        [0.0033, 0.0089, 0.0249]])

In [37]:
#### Training args ####
training_args_dict = {
    'output_dir' : "model_output",
    'num_train_epochs' : 5,
    'train_batch_size': 32,
    'seed':seed_value,
    "evaluation_strategy": "epoch"
#     ,"load_best_model_at_end": True
}

with open('training_args.json', 'w') as fp:
    json.dump(training_args_dict, fp)
    
parser = HfArgumentParser(TrainingArguments)
# this function returns a tuple so we get the first item in the tuple since we only passed one arguement type "TrainingArguments"
training_args = parser.parse_json_file(json_file="training_args.json")[0]

## Then unfreeze the bert weights and train end-to-end

In [38]:
model = reloaded_model

for param in model.base_model.parameters():
    param.requires_grad = True

In [39]:
# for name, param in model.named_parameters():
#     print(name, param.shape, param.requires_grad)

In [40]:
model.to('cuda')
model.train()

BertNERTopModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tr

In [41]:
model.training

True

In [42]:
print(trainer.model_init)

None


In [43]:
model.device

device(type='cuda', index=0)

In [44]:
# Initialize our Trainer
trainer = Trainer(
  model=model,
  args=training_args,
  train_dataset=train_dataset,
  eval_dataset=eval_dataset
)

# Begin training from the latest checkpoint
trainer.train(checkpoint)
# trainer.save_model()

Epoch,Training Loss,Validation Loss,Runtime,Samples Per Second
1,No log,1.8912,0.4611,45.538
2,No log,1.461137,0.4599,45.658
3,No log,1.151145,0.4703,44.654
4,No log,0.953274,0.4626,45.397
5,No log,0.862683,0.4632,45.341


TrainOutput(global_step=75, training_loss=3.7158329264322916, metrics={'train_runtime': 40.482, 'train_samples_per_second': 1.853, 'total_flos': 99773520076800.0, 'epoch': 5.0, 'init_mem_cpu_alloc_delta': 48563, 'init_mem_gpu_alloc_delta': 0, 'init_mem_cpu_peaked_delta': 18306, 'init_mem_gpu_peaked_delta': 0, 'train_mem_cpu_alloc_delta': 208707, 'train_mem_gpu_alloc_delta': 1305872896, 'train_mem_cpu_peaked_delta': 138334, 'train_mem_gpu_peaked_delta': 2282095616})

In [45]:
list(model.parameters())[-1].data

tensor([[0.0520, 0.0873, 0.0263],
        [0.0290, 0.0743, 0.0098],
        [0.0050, 0.0071, 0.0243]], device='cuda:0')

## Clean-up

In [46]:
import gc
gc.collect()
torch.cuda.empty_cache()

## Prepare test data

In [47]:
import numpy as np
from torch import nn

### For Softmax models ###

In [48]:

# # we can pass overwrite_cache as True since we might like to make new predictions by just changing test.txt 
# test_dataset = NerDataset(
#   data_dir=data_args['data_dir'],
#   tokenizer=tokenizer,
#   labels=labels,
#   model_type=config.model_type,
#   max_seq_length=data_args['max_seq_length'],
#   overwrite_cache=True,
#   mode=Split.test)

# # last layer output/activation has the shape of (batch_size, seq_len,num_of_labels)
# output, label_ids, metrics = trainer.predict(test_dataset)
# preds = np.argmax(output, axis=2)
# batch_size, seq_len = preds.shape

# # list of token-level predictions shape = (batch_size, seq_len)
# preds_list = [[] for _ in range(batch_size)]
# for i in range(batch_size):
#     for j in range(seq_len):
#         # ignore pad_tokens
#         if label_ids[i, j] != nn.CrossEntropyLoss().ignore_index:
#             preds_list[i].append(label_map[preds[i][j]])

### For CRF models ###

In [49]:


# we can pass overwrite_cache as True since we might like to make new predictions by just changing test.txt 
test_dataset = NerDataset(
  data_dir=data_args['data_dir'],
  tokenizer=tokenizer,
  labels=labels,
  model_type=config.model_type,
  max_seq_length=data_args['max_seq_length'],
  overwrite_cache=True,
  mode=Split.test)

# last layer output/activation has the shape of (batch_size, seq_len, num_labels)
output, label_ids, metrics = trainer.predict(test_dataset)
batch_size, seq_len, num_labels = output.shape

output = torch.tensor(output).to('cuda')

all_attention_masks = []
for sample in test_dataset:
    all_attention_masks.append(sample.attention_mask)
    
all_attention_masks = torch.tensor(all_attention_masks).to('cuda')

# get the best tag sequences using CRF's viterbi decode algo
preds = model.crf.decode(output, all_attention_masks.type(torch.uint8))


preds_list = [[] for _ in range(batch_size)]
for i in range(batch_size):
    for j in range(seq_len):
        # ignore pad_tokens
        if label_ids[i, j] != -100:
            preds_list[i].append(label_map[preds[i][j]])

In [50]:
def sentences_combiner(df):
    # 'words' and 'labels' are the column names in the CSV file
    tupple_function = lambda x: [(w, t) for w, t in zip(x["words"].values.tolist(),
                                                      x["labels"].values.tolist())]
    grouped = df.groupby("sentence_id").apply(tupple_function)
    return [s for s in grouped]

testing_sentences = sentences_combiner(test_df)
test_labels = [[w[1] for w in s] for s in testing_sentences]
test_tokens = [[w[0] for w in s] for s in testing_sentences]

# reconstruct full sentences from lists of (token,label) tuples 
# test_reconstructed = [" ".join([w[0] for w in s] ) for s in testing_sentences]

In [51]:
# make sure all test and pred sentences have the same length

test_labels_new = []
preds_list_new = []

for i, x in enumerate(test_labels):
    if len(x) == len(preds_list[i]):
        test_labels_new.append(x)
        preds_list_new.append(preds_list[i])
    else:
        print("ABORT")

## Get entity level scores

In [52]:
from seqeval.metrics import f1_score, classification_report
print("F1-score: {:.1%}".format(f1_score(test_labels_new, preds_list_new)))
print(classification_report(test_labels_new, preds_list_new))

F1-score: 95.5%
              precision    recall  f1-score   support

           _       0.95      0.96      0.95        77

   micro avg       0.95      0.96      0.95        77
   macro avg       0.95      0.96      0.95        77
weighted avg       0.95      0.96      0.95        77



In [53]:
# Gene, bioinfer (no relu)
# seed = 42 -> 0.84      0.87      0.85
# seed = 0 -> 0.86      0.89      0.88
# seed = 13 -> 0.83      0.88      0.85

# Gene, bioinfer (all relu)
# seed = 42 -> 0.84      0.90      0.87
# seed = 0 -> 0.83      0.87      0.85
# seed = 13 -> 0.84      0.88      0.86

In [54]:
# 0.78      0.66      0.71 -> gellus, 3-layer +softmax with relu after each fc layer 
# 0.97 0.94 0.95 -> cll, 3-layer +softmax with relu after each fc layer except the 1st, seed=42
# 0.96 0.86, 0.90, 0.94, 0.96, 0.95 -> '' seed=0 
# 0.97, 0.91, 0.94 -> '' seed=13

**The following results are for the cll dataset with different # relu layers**

In [55]:
# relu after 1st, 2nd and 3rd layer -> 0, 0, 0
# relu after 2nd and 3rd layer -> 0.86, 0.96, 0.91
# relu only after 3rd layer -> 0.92, 0.99, 0.95
# No relu -> 0.97, 0.94, 0.95

**The following results are for 3 hidden layers without any relu**

In [56]:
# cll
# seed = 42 -> 0.92      0.99      0.95
# seed = 0  -> 0.95      0.97      0.96
# seed = 13 -> 0.97      0.96      0.97
# seed = 20 -> 0.90      0.95      0.92
# seed = 50 -> 0.96      0.96      0.96
# seed = 75 -> 0.92      0.95      0.94
# seed = 100 -> 0.93      0.92      0.93
# -----------------------
# average f1 -> 95

**The following results are for 1 hidden layers without any relu**

In [57]:
# cll
# seed = 42 -> Not obtained yet
# seed = 0  -> 0.83      0.99      0.90
# seed = 13 -> 0.90      0.99      0.94
# seed = 20 -> 0.90      0.97      0.94
# seed = 50 -> Not obtained yet
# seed = 75 -> Not obtained yet
# seed = 100 -> Not obtained yet
# -----------------------
# average f1 -> Not obtained yet


In [58]:
# cellfinder
# seed = 42 -> 0.86      0.63      0.73
# seed = 0  -> 0.84      0.77      0.80
# seed = 13 -> 0.83      0.70      0.76
