In [20]:
config = {
    "pretrained_model": "bert-base-cased",
    "tokenizer": "bert-base-cased",
    "max_seq_length": 256,
    "batch_size": 16,
    "lr": 2e-5,
    "epochs": 10,
    "device": "cuda",
    "gpu_ids": "0,1,2,3",
    "seed": 2020,
    "fp16": False,
    "loss_scale": 0,
    "gradient_accumulation_steps":1,
    "warmup_proportion": 0.1,
    "gradient_accumulation_steps": 1,
    "num_labels": 4,
    "is_multilabel": False,
    "valid_metric": "macro_f1",
    "model_save_dir": "../checkpoints/bert_cased_512_biocaster_4cate_20200903/",
    "patience": 4,
}

In [21]:
import sys
import pandas as pd
sys.path.append("../")

In [22]:
train_set = pd.read_csv("/home/zm324/workspace/doc_cls/datasets/biocaster/train.csv")
dev_set = pd.read_csv("/home/zm324/workspace/doc_cls/datasets/biocaster/dev.csv")
test_set = pd.read_csv("/home/zm324/workspace/doc_cls/datasets/biocaster/test.csv")
train_bt_set = pd.read_csv("/home/zm324/workspace/doc_cls/datasets/biocaster/train_bt.csv")

In [14]:
label_mapping = {
    0:"0001",
    1:"0010",
    2:"0100",
    3:"1000",
}
train_set.labels=train_set.labels.apply(lambda x:label_mapping[x])
train_set.docs = train_set.docs.apply(lambda x: x.replace("\n",". ").replace("\r",""))
dev_set.docs = dev_set.docs.apply(lambda x: x.replace("\n",". ").replace("\r",""))
test_set.docs = test_set.docs.apply(lambda x: x.replace("\n",". ").replace("\r",""))

train_set.head()

Unnamed: 0.1,Unnamed: 0,index,docs,labels,source,flag
0,0,993,". ONCHOCERCIASIS, DRUG RESISTANCE - GHANA. Dat...",1000,Biocaster,train
1,1,467,. Avian influenza situation in Thailand ? up...,1000,Biocaster,train
2,2,135,. TB spreading fast. . Published on Jul 5...,1,Biocaster,train
3,3,204,. MEASLES - UKRAINE (KHMELNIZKIY) (02) *****...,1000,Biocaster,train
4,4,760,. Romania confirms deadly bird flu in six vi...,100,Biocaster,train


In [15]:
dev_set.labels=dev_set.labels.apply(lambda x:label_mapping[x])
test_set.labels=test_set.labels.apply(lambda x:label_mapping[x])


In [19]:
train_set.to_csv("../covid19-classification/hedwig-data/datasets/Biocaster4C/train.tsv",
                columns=["labels","docs"],
                header=False,
                index=False,
                sep="\t")
dev_set.to_csv("../covid19-classification/hedwig-data/datasets/Biocaster4C/dev.tsv",
                columns=["labels","docs"],
                header=False,
                index=False,
                sep="\t")
test_set.to_csv("../covid19-classification/hedwig-data/datasets/Biocaster4C/test.tsv",
                columns=["labels","docs"],
                header=False,
                index=False,
                sep="\t")

In [None]:
len(train_bt_set.index)

In [None]:
def run_bert(train_set,dev_set,test_set, lans, sample_rate):
    print("train/dev/test numbers: ", len(train_set.index),len(dev_set.index),len(test_set.index))
    
    from beta_nlp.models.bert_cls import BertModel
    from beta_nlp.utils.common import save_to_csv
    
    cls = BertModel(config)
    cls.train(train_set,dev_set)
    
    result = cls.test(test_set)
    result["lans"] = lans
    result["sample_rate"] = sample_rate
    save_to_csv(result,result_file="../results/20201001_backtrans.csv")
    
def train_sample(train_bt_set,lan_set,sample_rate):
    """ sample back translation document samples
    """
    train_set = train_bt_set[train_bt_set["lan"]=="en"]
    for lan in lan_set:
        if lan =="en":
            continue
        train_set_sample = train_bt_set[train_bt_set["lan"]==lan]
        from sklearn.utils import shuffle
        train_set_sample = shuffle(train_set_sample).reset_index()
        sample_n = int(len(train_set_sample)*sample_rate)
        train_set_sample = train_set_sample.iloc[:sample_n]
        train_set = train_set.append(train_set_sample)
    return train_set

### original bert

In [None]:
# repeat = 5
# for i in range(repeat):
#     run_bert(train_set,dev_set,test_set, lans=["en"], sample_rate=1)

### single language backtranslation

In [None]:
repeat = 2
lang_list = ["ar","zh","fr","es","pt","ru"]
sample_rate_list = [0.1,0.25,0.5,0.75,1]
for lang in lang_list:
    lan_set = ["en"]
    lan_set.append(lang)
    for sample_rate in sample_rate_list:
        for i in range(repeat):
            train_set = train_sample(train_bt_set,lan_set,sample_rate)
            run_bert(train_set, dev_set, test_set, lans=lan_set, sample_rate=sample_rate)

### six languages backtranslation

In [None]:
repeat = 2
lang_list = ["ar","zh","fr","es","pt","ru"]
sample_rate_list = [0.1,0.25,0.5,0.75,1]
lan_set = lang_list
lan_set.append(lang)
for sample_rate in sample_rate_list:
    for i in range(repeat):
        train_set = train_sample(train_bt_set,lan_set,sample_rate)
        run_bert(train_set, dev_set, test_set, lans=lan_set, sample_rate=sample_rate)

### two languages backtranslation

In [None]:
repeat = 2
lang_list = ["ar","zh","fr","es","pt","ru"]
sample_rate_list = [0.1,0.25,0.5,0.75,1]
for i in range(len(lang_list)-1):
    for j in range(i+1,len(lang_list)):
        lan_set = ["en"]+[lang_list[i]]+[lang_list[j]]
        for sample_rate in sample_rate_list:
            for i in range(repeat):
                train_set = train_sample(train_bt_set,lan_set,sample_rate)
                run_bert(train_set, dev_set, test_set, lans=lan_set, sample_rate=sample_rate)

In [10]:
train_set = pd.read_csv("/home/zm324/workspace/doc_cls/datasets/biocaster/train.csv")
dev_set = pd.read_csv("/home/zm324/workspace/doc_cls/datasets/biocaster/dev.csv")
test_set = pd.read_csv("/home/zm324/workspace/doc_cls/datasets/biocaster/test.csv")
train_bt_set = pd.read_csv("/home/zm324/workspace/doc_cls/datasets/biocaster/train_bt_full.csv")

In [11]:
len(train_bt_set.index)

5614

In [12]:
def run_bert(train_set,dev_set,test_set, lans, sample_rate):
    print("train/dev/test numbers: ", len(train_set.index),len(dev_set.index),len(test_set.index))
    
    from beta_nlp.models.bert_cls import BertModel
    from beta_nlp.utils.common import save_to_csv
    
    cls = BertModel(config)
    cls.train(train_set,dev_set)
    
    result = cls.test(test_set)
    result["lans"] = lans
    result["sample_rate"] = sample_rate
    save_to_csv(result,result_file="../results/20201001_backtrans_full.csv")
    
def train_sample(train_bt_set,lan_set,sample_rate):
    """ sample back translation document samples
    """
    train_set = train_bt_set[train_bt_set["lan"]=="en"]
    for lan in lan_set:
        if lan =="en":
            continue
        train_set_sample = train_bt_set[train_bt_set["lan"]==lan]
        from sklearn.utils import shuffle
        train_set_sample = shuffle(train_set_sample).reset_index()
        sample_n = int(len(train_set_sample)*sample_rate)
        train_set_sample = train_set_sample.iloc[:sample_n]
        train_set = train_set.append(train_set_sample)
    return train_set

### single language backtranslation

In [13]:
repeat = 2
lang_list = ["ar","zh","fr","es","pt","ru"]
sample_rate_list = [0.1,0.25,0.5,0.75,1]
for lang in lang_list:
    lan_set = ["en"]
    lan_set.append(lang)
    for sample_rate in sample_rate_list:
        for i in range(repeat):
            train_set = train_sample(train_bt_set,lan_set,sample_rate)
            run_bert(train_set, dev_set, test_set, lans=lan_set, sample_rate=sample_rate)

train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11492.74 ms
[Epoch 0] loss: 65.05565077066422


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.83 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.73     |
|  1 | macro_precision | 0.428191 |
|  2 | macro_recall    | 0.298387 |
|  3 | macro_f1        | 0.289156 |
|  4 | micro_precision | 0.73     |
|  5 | micro_recall    | 0.73     |
|  6 | micro_f1        | 0.73     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11191.13 ms
[Epoch 1] loss: 41.0179183781147


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.21 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.88     |
|  1 | macro_precision | 0.42869  |
|  2 | macro_recall    | 0.458353 |
|  3 | macro_f1        | 0.438492 |
|  4 | micro_precision | 0.88     |
|  5 | micro_recall    | 0.88     |
|  6 | micro_f1        | 0.88     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11564.67 ms
[Epoch 2] loss: 27.73814955353737


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.07 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.459389 |
|  2 | macro_recall    | 0.472677 |
|  3 | macro_f1        | 0.465501 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11518.91 ms
[Epoch 3] loss: 21.80490631237626


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.34 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11561.38 ms
[Epoch 4] loss: 15.817991226911545


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.04 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11558.68 ms
[Epoch 5] loss: 16.274373177438974


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 513.00 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11225.59 ms
[Epoch 6] loss: 12.24749912880361


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.45 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.553571 |
|  2 | macro_recall    | 0.700891 |
|  3 | macro_f1        | 0.599788 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11528.68 ms
[Epoch 7] loss: 8.539181604515761


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.50 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11562.07 ms
[Epoch 8] loss: 6.761390179861337


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.13 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11288.29 ms
[Epoch 9] loss: 6.944735491648316


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 805.88 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.94 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8217821782178217 |
|  1 | macro_precision | 0.5285414725069898 |
|  2 | macro_recall    | 0.5267857142857143 |
|  3 | macro_f1        | 0.5212664384795532 |
|  4 | micro_precision | 0.8217821782178217 |
|  5 | micro_recall    | 0.8217821782178217 |
|  6 | micro_f1        | 0.8217821782178217 |
|  7 | lans            | ['en', 'ar']       |
|  8 | sample_rate     | 0.1                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
Create new result_file: ../results/20201001_backtrans_full.csv
train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11222.66 ms
[Epoch 0] loss: 65.05565077066422


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.40 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.73     |
|  1 | macro_precision | 0.428191 |
|  2 | macro_recall    | 0.298387 |
|  3 | macro_f1        | 0.289156 |
|  4 | micro_precision | 0.73     |
|  5 | micro_recall    | 0.73     |
|  6 | micro_f1        | 0.73     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11527.29 ms
[Epoch 1] loss: 41.0179183781147


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.87 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.88     |
|  1 | macro_precision | 0.42869  |
|  2 | macro_recall    | 0.458353 |
|  3 | macro_f1        | 0.438492 |
|  4 | micro_precision | 0.88     |
|  5 | micro_recall    | 0.88     |
|  6 | micro_f1        | 0.88     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11548.63 ms
[Epoch 2] loss: 27.73814955353737


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.79 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.459389 |
|  2 | macro_recall    | 0.472677 |
|  3 | macro_f1        | 0.465501 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11572.03 ms
[Epoch 3] loss: 21.80490631237626


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.01 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11294.83 ms
[Epoch 4] loss: 15.817991226911545


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 815.30 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11358.95 ms
[Epoch 5] loss: 16.274373177438974


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 533.47 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11578.59 ms
[Epoch 6] loss: 12.24749912880361


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.64 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.553571 |
|  2 | macro_recall    | 0.700891 |
|  3 | macro_f1        | 0.599788 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11485.44 ms
[Epoch 7] loss: 8.539181604515761


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.80 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11207.36 ms
[Epoch 8] loss: 6.761390179861337


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.01 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11525.03 ms
[Epoch 9] loss: 6.944735491648316


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.34 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.63 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8217821782178217 |
|  1 | macro_precision | 0.5285414725069898 |
|  2 | macro_recall    | 0.5267857142857143 |
|  3 | macro_f1        | 0.5212664384795532 |
|  4 | micro_precision | 0.8217821782178217 |
|  5 | micro_recall    | 0.8217821782178217 |
|  6 | micro_f1        | 0.8217821782178217 |
|  7 | lans            | ['en', 'ar']       |
|  8 | sample_rate     | 0.1                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13068.69 ms
[Epoch 0] loss: 70.67203933000565


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.59 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.74     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.306452 |
|  3 | macro_f1        | 0.30148  |
|  4 | micro_precision | 0.74     |
|  5 | micro_recall    | 0.74     |
|  6 | micro_f1        | 0.74     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13082.41 ms
[Epoch 1] loss: 45.00864818692207


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.17 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.448734 |
|  2 | macro_recall    | 0.473279 |
|  3 | macro_f1        | 0.45905  |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12829.75 ms
[Epoch 2] loss: 30.06283077597618


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 810.23 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12806.14 ms
[Epoch 3] loss: 22.778553679585457


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.06 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13111.04 ms
[Epoch 4] loss: 17.517601862549782


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.70 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13139.62 ms
[Epoch 5] loss: 15.088378065265715


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.40 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13097.97 ms
[Epoch 6] loss: 11.230264437384903


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.45 ms
Early Stopping. Epoch: 6, best_valid_metric (macro_f1): 0.47513013302486984



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.77 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8316831683168316 |
|  1 | macro_precision | 0.5705459770114942 |
|  2 | macro_recall    | 0.5133928571428572 |
|  3 | macro_f1        | 0.5115937582186376 |
|  4 | micro_precision | 0.8316831683168316 |
|  5 | micro_recall    | 0.8316831683168316 |
|  6 | micro_f1        | 0.8316831683168316 |
|  7 | lans            | ['en', 'ar']       |
|  8 | sample_rate     | 0.25               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13026.79 ms
[Epoch 0] loss: 70.67203933000565


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.99 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.74     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.306452 |
|  3 | macro_f1        | 0.30148  |
|  4 | micro_precision | 0.74     |
|  5 | micro_recall    | 0.74     |
|  6 | micro_f1        | 0.74     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13096.76 ms
[Epoch 1] loss: 45.00864818692207


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.24 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.448734 |
|  2 | macro_recall    | 0.473279 |
|  3 | macro_f1        | 0.45905  |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12806.48 ms
[Epoch 2] loss: 30.06283077597618


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.19 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13138.06 ms
[Epoch 3] loss: 22.778553679585457


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.58 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13094.21 ms
[Epoch 4] loss: 17.517601862549782


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.27 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13103.38 ms
[Epoch 5] loss: 15.088378065265715


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.19 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13156.34 ms
[Epoch 6] loss: 11.230264437384903


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.06 ms
Early Stopping. Epoch: 6, best_valid_metric (macro_f1): 0.47513013302486984



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.21 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8316831683168316 |
|  1 | macro_precision | 0.5705459770114942 |
|  2 | macro_recall    | 0.5133928571428572 |
|  3 | macro_f1        | 0.5115937582186376 |
|  4 | micro_precision | 0.8316831683168316 |
|  5 | micro_recall    | 0.8316831683168316 |
|  6 | micro_f1        | 0.8316831683168316 |
|  7 | lans            | ['en', 'ar']       |
|  8 | sample_rate     | 0.25               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15631.91 ms
[Epoch 0] loss: 82.79762929677963


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 809.12 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.77     |
|  1 | macro_precision | 0.436111 |
|  2 | macro_recall    | 0.330645 |
|  3 | macro_f1        | 0.335327 |
|  4 | micro_precision | 0.77     |
|  5 | micro_recall    | 0.77     |
|  6 | micro_f1        | 0.77     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15686.69 ms
[Epoch 1] loss: 48.37781631946564


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.63 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15714.49 ms
[Epoch 2] loss: 33.55780956149101


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.88 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15795.55 ms
[Epoch 3] loss: 26.192939560860395


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.87 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15814.58 ms
[Epoch 4] loss: 21.042398788034916


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 534.06 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.533712 |
|  2 | macro_recall    | 0.710279 |
|  3 | macro_f1        | 0.565672 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15433.05 ms
[Epoch 5] loss: 15.056783717125654


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.63 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15684.06 ms
[Epoch 6] loss: 10.90959600545466


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 532.88 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15742.64 ms
[Epoch 7] loss: 8.796218007337302


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 801.86 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15415.83 ms
[Epoch 8] loss: 7.305041889892891


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.09 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.546442 |
|  2 | macro_recall    | 0.702215 |
|  3 | macro_f1        | 0.582407 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15652.39 ms
[Epoch 9] loss: 5.9446320462739095


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.76 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.70 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7722772277227723  |
|  1 | macro_precision | 0.46513914095583786 |
|  2 | macro_recall    | 0.4425223214285714  |
|  3 | macro_f1        | 0.4375310481867859  |
|  4 | micro_precision | 0.7722772277227723  |
|  5 | micro_recall    | 0.7722772277227723  |
|  6 | micro_f1        | 0.7722772277227723  |
|  7 | lans            | ['en', 'ar']        |
|  8 | sample_rate     | 0.5                 |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15664.58 ms
[Epoch 0] loss: 82.79762929677963


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.67 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.77     |
|  1 | macro_precision | 0.436111 |
|  2 | macro_recall    | 0.330645 |
|  3 | macro_f1        | 0.335327 |
|  4 | micro_precision | 0.77     |
|  5 | micro_recall    | 0.77     |
|  6 | micro_f1        | 0.77     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15657.05 ms
[Epoch 1] loss: 48.37781631946564


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.77 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15620.85 ms
[Epoch 2] loss: 33.55780956149101


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.15 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15647.50 ms
[Epoch 3] loss: 26.192939560860395


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.47 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15674.26 ms
[Epoch 4] loss: 21.042398788034916


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.74 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.533712 |
|  2 | macro_recall    | 0.710279 |
|  3 | macro_f1        | 0.565672 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15700.57 ms
[Epoch 5] loss: 15.056783717125654


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.42 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15666.44 ms
[Epoch 6] loss: 10.90959600545466


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.20 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15680.36 ms
[Epoch 7] loss: 8.796218007337302


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.21 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15709.15 ms
[Epoch 8] loss: 7.305041889892891


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.20 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.546442 |
|  2 | macro_recall    | 0.702215 |
|  3 | macro_f1        | 0.582407 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15669.96 ms
[Epoch 9] loss: 5.9446320462739095


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.11 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.80 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7722772277227723  |
|  1 | macro_precision | 0.46513914095583786 |
|  2 | macro_recall    | 0.4425223214285714  |
|  3 | macro_f1        | 0.4375310481867859  |
|  4 | micro_precision | 0.7722772277227723  |
|  5 | micro_recall    | 0.7722772277227723  |
|  6 | micro_f1        | 0.7722772277227723  |
|  7 | lans            | ['en', 'ar']        |
|  8 | sample_rate     | 0.5                 |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18099.84 ms
[Epoch 0] loss: 96.05222815275192


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.06 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.75     |
|  1 | macro_precision | 0.432065 |
|  2 | macro_recall    | 0.314516 |
|  3 | macro_f1        | 0.313256 |
|  4 | micro_precision | 0.75     |
|  5 | micro_recall    | 0.75     |
|  6 | micro_f1        | 0.75     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18171.72 ms
[Epoch 1] loss: 57.662678718566895


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.17 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.443336 |
|  2 | macro_recall    | 0.469547 |
|  3 | macro_f1        | 0.453825 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18226.96 ms
[Epoch 2] loss: 42.217663422226906


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.74 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.462234 |
|  2 | macro_recall    | 0.476408 |
|  3 | macro_f1        | 0.4691   |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18163.92 ms
[Epoch 3] loss: 29.230419032275677


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.14 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18231.79 ms
[Epoch 4] loss: 21.37549541518092


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.15 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18168.49 ms
[Epoch 5] loss: 16.378583636134863


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.31 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.506661 |
|  2 | macro_recall    | 0.678021 |
|  3 | macro_f1        | 0.524058 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18201.87 ms
[Epoch 6] loss: 10.556101898662746


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.35 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.522667 |
|  2 | macro_recall    | 0.702215 |
|  3 | macro_f1        | 0.530999 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18032.47 ms
[Epoch 7] loss: 7.3327301408862695


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 805.43 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.595588 |
|  2 | macro_recall    | 0.730741 |
|  3 | macro_f1        | 0.641711 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17800.23 ms
[Epoch 8] loss: 8.587275495636277


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 808.35 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18141.70 ms
[Epoch 9] loss: 5.716691457666457


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.34 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 517.28 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7821782178217822 |
|  1 | macro_precision | 0.4742555831265508 |
|  2 | macro_recall    | 0.5033482142857143 |
|  3 | macro_f1        | 0.4828042328042328 |
|  4 | micro_precision | 0.7821782178217822 |
|  5 | micro_recall    | 0.7821782178217822 |
|  6 | micro_f1        | 0.7821782178217822 |
|  7 | lans            | ['en', 'ar']       |
|  8 | sample_rate     | 0.75               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18057.33 ms
[Epoch 0] loss: 96.05222815275192


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.24 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.75     |
|  1 | macro_precision | 0.432065 |
|  2 | macro_recall    | 0.314516 |
|  3 | macro_f1        | 0.313256 |
|  4 | micro_precision | 0.75     |
|  5 | micro_recall    | 0.75     |
|  6 | micro_f1        | 0.75     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18143.96 ms
[Epoch 1] loss: 57.662678718566895


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.31 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.443336 |
|  2 | macro_recall    | 0.469547 |
|  3 | macro_f1        | 0.453825 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18202.86 ms
[Epoch 2] loss: 42.217663422226906


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.95 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.462234 |
|  2 | macro_recall    | 0.476408 |
|  3 | macro_f1        | 0.4691   |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18174.47 ms
[Epoch 3] loss: 29.230419032275677


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.22 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18192.27 ms
[Epoch 4] loss: 21.37549541518092


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.15 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18326.75 ms
[Epoch 5] loss: 16.378583636134863


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.13 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.506661 |
|  2 | macro_recall    | 0.678021 |
|  3 | macro_f1        | 0.524058 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18131.34 ms
[Epoch 6] loss: 10.556101898662746


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.37 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.522667 |
|  2 | macro_recall    | 0.702215 |
|  3 | macro_f1        | 0.530999 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18066.57 ms
[Epoch 7] loss: 7.3327301408862695


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.81 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.595588 |
|  2 | macro_recall    | 0.730741 |
|  3 | macro_f1        | 0.641711 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18191.83 ms
[Epoch 8] loss: 8.587275495636277


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.34 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18099.14 ms
[Epoch 9] loss: 5.716691457666457


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.20 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 816.44 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7821782178217822 |
|  1 | macro_precision | 0.4742555831265508 |
|  2 | macro_recall    | 0.5033482142857143 |
|  3 | macro_f1        | 0.4828042328042328 |
|  4 | micro_precision | 0.7821782178217822 |
|  5 | micro_recall    | 0.7821782178217822 |
|  6 | micro_f1        | 0.7821782178217822 |
|  7 | lans            | ['en', 'ar']       |
|  8 | sample_rate     | 0.75               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20712.54 ms
[Epoch 0] loss: 109.09333860874176


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.42 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.84     |
|  1 | macro_precision | 0.425184 |
|  2 | macro_recall    | 0.400096 |
|  3 | macro_f1        | 0.407407 |
|  4 | micro_precision | 0.84     |
|  5 | micro_recall    | 0.84     |
|  6 | micro_f1        | 0.84     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20792.69 ms
[Epoch 1] loss: 62.19363710284233


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.79 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.457721 |
|  2 | macro_recall    | 0.468344 |
|  3 | macro_f1        | 0.462963 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20903.79 ms
[Epoch 2] loss: 44.42047190666199


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.47 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.462234 |
|  2 | macro_recall    | 0.476408 |
|  3 | macro_f1        | 0.4691   |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20898.08 ms
[Epoch 3] loss: 30.12651139497757


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.16 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20783.63 ms
[Epoch 4] loss: 19.855312389321625


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.23 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21151.50 ms
[Epoch 5] loss: 13.513890095055103


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.25 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20779.53 ms
[Epoch 6] loss: 10.472981621627696


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.53 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.504545 |
|  2 | macro_recall    | 0.699085 |
|  3 | macro_f1        | 0.534581 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20760.64 ms
[Epoch 7] loss: 6.9754370470764115


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.86 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.86     |
|  1 | macro_precision | 0.519048 |
|  2 | macro_recall    | 0.679827 |
|  3 | macro_f1        | 0.556097 |
|  4 | micro_precision | 0.86     |
|  5 | micro_recall    | 0.86     |
|  6 | micro_f1        | 0.86     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20801.76 ms
[Epoch 8] loss: 4.210731249710079


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.50 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20637.91 ms
[Epoch 9] loss: 2.663380850397516


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.00 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.81 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7425742574257426  |
|  1 | macro_precision | 0.43920454545454546 |
|  2 | macro_recall    | 0.46316964285714285 |
|  3 | macro_f1        | 0.44104747162022706 |
|  4 | micro_precision | 0.7425742574257426  |
|  5 | micro_recall    | 0.7425742574257426  |
|  6 | micro_f1        | 0.7425742574257425  |
|  7 | lans            | ['en', 'ar']        |
|  8 | sample_rate     | 1                   |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21033.46 ms
[Epoch 0] loss: 109.09333860874176


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.48 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.84     |
|  1 | macro_precision | 0.425184 |
|  2 | macro_recall    | 0.400096 |
|  3 | macro_f1        | 0.407407 |
|  4 | micro_precision | 0.84     |
|  5 | micro_recall    | 0.84     |
|  6 | micro_f1        | 0.84     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20754.75 ms
[Epoch 1] loss: 62.19363710284233


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.48 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.457721 |
|  2 | macro_recall    | 0.468344 |
|  3 | macro_f1        | 0.462963 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20712.76 ms
[Epoch 2] loss: 44.42047190666199


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.64 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.462234 |
|  2 | macro_recall    | 0.476408 |
|  3 | macro_f1        | 0.4691   |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20782.15 ms
[Epoch 3] loss: 30.12651139497757


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.12 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21067.49 ms
[Epoch 4] loss: 19.855312389321625


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.79 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20756.39 ms
[Epoch 5] loss: 13.513890095055103


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.37 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20840.31 ms
[Epoch 6] loss: 10.472981621627696


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.30 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.504545 |
|  2 | macro_recall    | 0.699085 |
|  3 | macro_f1        | 0.534581 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20698.85 ms
[Epoch 7] loss: 6.9754370470764115


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.05 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.86     |
|  1 | macro_precision | 0.519048 |
|  2 | macro_recall    | 0.679827 |
|  3 | macro_f1        | 0.556097 |
|  4 | micro_precision | 0.86     |
|  5 | micro_recall    | 0.86     |
|  6 | micro_f1        | 0.86     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21011.44 ms
[Epoch 8] loss: 4.210731249710079


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.14 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20703.98 ms
[Epoch 9] loss: 2.663380850397516


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.31 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.80 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7425742574257426  |
|  1 | macro_precision | 0.43920454545454546 |
|  2 | macro_recall    | 0.46316964285714285 |
|  3 | macro_f1        | 0.44104747162022706 |
|  4 | micro_precision | 0.7425742574257426  |
|  5 | micro_recall    | 0.7425742574257426  |
|  6 | micro_f1        | 0.7425742574257425  |
|  7 | lans            | ['en', 'ar']        |
|  8 | sample_rate     | 1                   |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11517.35 ms
[Epoch 0] loss: 65.08724123239517


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.08 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.67     |
|  1 | macro_precision | 0.1675   |
|  2 | macro_recall    | 0.25     |
|  3 | macro_f1        | 0.200599 |
|  4 | micro_precision | 0.67     |
|  5 | micro_recall    | 0.67     |
|  6 | micro_f1        | 0.67     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11554.15 ms
[Epoch 1] loss: 41.53201347589493


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.97 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.454427 |
|  2 | macro_recall    | 0.47701  |
|  3 | macro_f1        | 0.464339 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11309.49 ms
[Epoch 2] loss: 27.681291654706


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.95 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11511.61 ms
[Epoch 3] loss: 21.294254954904318


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.46 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.465278 |
|  2 | macro_recall    | 0.488806 |
|  3 | macro_f1        | 0.475618 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11561.32 ms
[Epoch 4] loss: 17.919469609856606


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.37 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11574.05 ms
[Epoch 5] loss: 16.912349600344896


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.13 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.5292   |
|  2 | macro_recall    | 0.714612 |
|  3 | macro_f1        | 0.547186 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11212.65 ms
[Epoch 6] loss: 10.80975656863302


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.38 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.595588 |
|  2 | macro_recall    | 0.730741 |
|  3 | macro_f1        | 0.641711 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11529.54 ms
[Epoch 7] loss: 8.38847784884274


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.02 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11589.85 ms
[Epoch 8] loss: 4.943814044352621


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 533.05 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11218.03 ms
[Epoch 9] loss: 7.096413384890184


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.40 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.63 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8217821782178217 |
|  1 | macro_precision | 0.5188852813852813 |
|  2 | macro_recall    | 0.5217633928571428 |
|  3 | macro_f1        | 0.5183632998413538 |
|  4 | micro_precision | 0.8217821782178217 |
|  5 | micro_recall    | 0.8217821782178217 |
|  6 | micro_f1        | 0.8217821782178217 |
|  7 | lans            | ['en', 'zh']       |
|  8 | sample_rate     | 0.1                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11237.54 ms
[Epoch 0] loss: 65.08724123239517


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.68 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.67     |
|  1 | macro_precision | 0.1675   |
|  2 | macro_recall    | 0.25     |
|  3 | macro_f1        | 0.200599 |
|  4 | micro_precision | 0.67     |
|  5 | micro_recall    | 0.67     |
|  6 | micro_f1        | 0.67     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11513.25 ms
[Epoch 1] loss: 41.53201347589493


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.79 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.454427 |
|  2 | macro_recall    | 0.47701  |
|  3 | macro_f1        | 0.464339 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11271.32 ms
[Epoch 2] loss: 27.681291654706


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.89 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11587.91 ms
[Epoch 3] loss: 21.294254954904318


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.82 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.465278 |
|  2 | macro_recall    | 0.488806 |
|  3 | macro_f1        | 0.475618 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11303.09 ms
[Epoch 4] loss: 17.919469609856606


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 820.33 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11330.68 ms
[Epoch 5] loss: 16.912349600344896


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.24 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.5292   |
|  2 | macro_recall    | 0.714612 |
|  3 | macro_f1        | 0.547186 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11595.46 ms
[Epoch 6] loss: 10.80975656863302


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.67 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.595588 |
|  2 | macro_recall    | 0.730741 |
|  3 | macro_f1        | 0.641711 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11604.35 ms
[Epoch 7] loss: 8.38847784884274


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.54 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11229.64 ms
[Epoch 8] loss: 4.943814044352621


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 818.62 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11222.57 ms
[Epoch 9] loss: 7.096413384890184


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.56 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.14 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8217821782178217 |
|  1 | macro_precision | 0.5188852813852813 |
|  2 | macro_recall    | 0.5217633928571428 |
|  3 | macro_f1        | 0.5183632998413538 |
|  4 | micro_precision | 0.8217821782178217 |
|  5 | micro_recall    | 0.8217821782178217 |
|  6 | micro_f1        | 0.8217821782178217 |
|  7 | lans            | ['en', 'zh']       |
|  8 | sample_rate     | 0.1                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13047.79 ms
[Epoch 0] loss: 70.77769035100937


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.89 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.7      |
|  1 | macro_precision | 0.42268  |
|  2 | macro_recall    | 0.274194 |
|  3 | macro_f1        | 0.248386 |
|  4 | micro_precision | 0.7      |
|  5 | micro_recall    | 0.7      |
|  6 | micro_f1        | 0.7      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13073.11 ms
[Epoch 1] loss: 45.16646698117256


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.73 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.449451 |
|  2 | macro_recall    | 0.468946 |
|  3 | macro_f1        | 0.458333 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13083.00 ms
[Epoch 2] loss: 32.147621884942055


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 515.88 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13083.40 ms
[Epoch 3] loss: 23.739340260624886


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.03 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.454427 |
|  2 | macro_recall    | 0.47701  |
|  3 | macro_f1        | 0.464339 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12781.66 ms
[Epoch 4] loss: 22.648455139249563


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 517.00 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13138.21 ms
[Epoch 5] loss: 16.49339863471687


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.92 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.464286 |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.471548 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13158.57 ms
[Epoch 6] loss: 12.302698664367199


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.52 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13133.86 ms
[Epoch 7] loss: 9.383684381842613


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.85 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.509852 |
|  2 | macro_recall    | 0.698483 |
|  3 | macro_f1        | 0.523441 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13016.55 ms
[Epoch 8] loss: 6.778325473191217


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.24 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12991.08 ms
[Epoch 9] loss: 7.3795548636699095


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 514.24 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.21 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7524752475247525  |
|  1 | macro_precision | 0.47181603773584907 |
|  2 | macro_recall    | 0.4893973214285714  |
|  3 | macro_f1        | 0.4734634238310709  |
|  4 | micro_precision | 0.7524752475247525  |
|  5 | micro_recall    | 0.7524752475247525  |
|  6 | micro_f1        | 0.7524752475247525  |
|  7 | lans            | ['en', 'zh']        |
|  8 | sample_rate     | 0.25                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13030.83 ms
[Epoch 0] loss: 70.77769035100937


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.18 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.7      |
|  1 | macro_precision | 0.42268  |
|  2 | macro_recall    | 0.274194 |
|  3 | macro_f1        | 0.248386 |
|  4 | micro_precision | 0.7      |
|  5 | micro_recall    | 0.7      |
|  6 | micro_f1        | 0.7      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13042.82 ms
[Epoch 1] loss: 45.16646698117256


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.45 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.449451 |
|  2 | macro_recall    | 0.468946 |
|  3 | macro_f1        | 0.458333 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12805.17 ms
[Epoch 2] loss: 32.147621884942055


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.01 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13134.67 ms
[Epoch 3] loss: 23.739340260624886


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.06 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.454427 |
|  2 | macro_recall    | 0.47701  |
|  3 | macro_f1        | 0.464339 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13061.80 ms
[Epoch 4] loss: 22.648455139249563


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.77 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12836.62 ms
[Epoch 5] loss: 16.49339863471687


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.28 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.464286 |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.471548 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13169.29 ms
[Epoch 6] loss: 12.302698664367199


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.74 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13187.59 ms
[Epoch 7] loss: 9.383684381842613


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.57 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.509852 |
|  2 | macro_recall    | 0.698483 |
|  3 | macro_f1        | 0.523441 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13157.03 ms
[Epoch 8] loss: 6.778325473191217


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.66 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13146.08 ms
[Epoch 9] loss: 7.3795548636699095


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.24 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.88 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7524752475247525  |
|  1 | macro_precision | 0.47181603773584907 |
|  2 | macro_recall    | 0.4893973214285714  |
|  3 | macro_f1        | 0.4734634238310709  |
|  4 | micro_precision | 0.7524752475247525  |
|  5 | micro_recall    | 0.7524752475247525  |
|  6 | micro_f1        | 0.7524752475247525  |
|  7 | lans            | ['en', 'zh']        |
|  8 | sample_rate     | 0.25                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15719.21 ms
[Epoch 0] loss: 83.68987661600113


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.44 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.75     |
|  1 | macro_precision | 0.432065 |
|  2 | macro_recall    | 0.314516 |
|  3 | macro_f1        | 0.313256 |
|  4 | micro_precision | 0.75     |
|  5 | micro_recall    | 0.75     |
|  6 | micro_f1        | 0.75     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15724.72 ms
[Epoch 1] loss: 49.90377290546894


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.65 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15749.17 ms
[Epoch 2] loss: 34.90270611643791


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.68 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15815.00 ms
[Epoch 3] loss: 28.455918170511723


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.27 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15774.15 ms
[Epoch 4] loss: 20.537013582885265


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 532.93 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15768.78 ms
[Epoch 5] loss: 17.437870202586055


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.63 ms
Early Stopping. Epoch: 5, best_valid_metric (macro_f1): 0.47513013302486984



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.66 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8415841584158416 |
|  1 | macro_precision | 0.5610702614379085 |
|  2 | macro_recall    | 0.5641741071428572 |
|  3 | macro_f1        | 0.5608117372823256 |
|  4 | micro_precision | 0.8415841584158416 |
|  5 | micro_recall    | 0.8415841584158416 |
|  6 | micro_f1        | 0.8415841584158416 |
|  7 | lans            | ['en', 'zh']       |
|  8 | sample_rate     | 0.5                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15642.54 ms
[Epoch 0] loss: 83.68987661600113


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 517.61 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.75     |
|  1 | macro_precision | 0.432065 |
|  2 | macro_recall    | 0.314516 |
|  3 | macro_f1        | 0.313256 |
|  4 | micro_precision | 0.75     |
|  5 | micro_recall    | 0.75     |
|  6 | micro_f1        | 0.75     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15686.43 ms
[Epoch 1] loss: 49.90377290546894


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.51 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15702.05 ms
[Epoch 2] loss: 34.90270611643791


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.89 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15741.75 ms
[Epoch 3] loss: 28.455918170511723


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.68 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15674.62 ms
[Epoch 4] loss: 20.537013582885265


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.25 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15689.58 ms
[Epoch 5] loss: 17.437870202586055


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.32 ms
Early Stopping. Epoch: 5, best_valid_metric (macro_f1): 0.47513013302486984



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.91 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8415841584158416 |
|  1 | macro_precision | 0.5610702614379085 |
|  2 | macro_recall    | 0.5641741071428572 |
|  3 | macro_f1        | 0.5608117372823256 |
|  4 | micro_precision | 0.8415841584158416 |
|  5 | micro_recall    | 0.8415841584158416 |
|  6 | micro_f1        | 0.8415841584158416 |
|  7 | lans            | ['en', 'zh']       |
|  8 | sample_rate     | 0.5                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18134.65 ms
[Epoch 0] loss: 96.18535548448563


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.09 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.72     |
|  1 | macro_precision | 0.391705 |
|  2 | macro_recall    | 0.294656 |
|  3 | macro_f1        | 0.285197 |
|  4 | micro_precision | 0.72     |
|  5 | micro_recall    | 0.72     |
|  6 | micro_f1        | 0.72     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18174.68 ms
[Epoch 1] loss: 59.22007930278778


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.84 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.433333 |
|  2 | macro_recall    | 0.462085 |
|  3 | macro_f1        | 0.443551 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18208.03 ms
[Epoch 2] loss: 43.34486363828182


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.71 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18223.11 ms
[Epoch 3] loss: 31.738685950636864


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.69 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18301.75 ms
[Epoch 4] loss: 21.780531896278262


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.92 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18332.18 ms
[Epoch 5] loss: 15.797483598813415


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 532.14 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.86     |
|  1 | macro_precision | 0.487013 |
|  2 | macro_recall    | 0.67116  |
|  3 | macro_f1        | 0.483338 |
|  4 | micro_precision | 0.86     |
|  5 | micro_recall    | 0.86     |
|  6 | micro_f1        | 0.86     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18149.90 ms
[Epoch 6] loss: 11.003216790035367


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.06 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.542045 |
|  2 | macro_recall    | 0.718344 |
|  3 | macro_f1        | 0.573869 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18175.33 ms
[Epoch 7] loss: 8.644245830830187


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.46 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18216.03 ms
[Epoch 8] loss: 7.562955369823612


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.74 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.96     |
|  1 | macro_precision | 0.723485 |
|  2 | macro_recall    | 0.734473 |
|  3 | macro_f1        | 0.728736 |
|  4 | micro_precision | 0.96     |
|  5 | micro_recall    | 0.96     |
|  6 | micro_f1        | 0.96     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18487.74 ms
[Epoch 9] loss: 7.5837882129708305


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.88 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.80 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7524752475247525  |
|  1 | macro_precision | 0.45046791443850265 |
|  2 | macro_recall    | 0.4793526785714286  |
|  3 | macro_f1        | 0.4589793982108973  |
|  4 | micro_precision | 0.7524752475247525  |
|  5 | micro_recall    | 0.7524752475247525  |
|  6 | micro_f1        | 0.7524752475247525  |
|  7 | lans            | ['en', 'zh']        |
|  8 | sample_rate     | 0.75                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18089.47 ms
[Epoch 0] loss: 96.18535548448563


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.84 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.72     |
|  1 | macro_precision | 0.391705 |
|  2 | macro_recall    | 0.294656 |
|  3 | macro_f1        | 0.285197 |
|  4 | micro_precision | 0.72     |
|  5 | micro_recall    | 0.72     |
|  6 | micro_f1        | 0.72     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18124.80 ms
[Epoch 1] loss: 59.22007930278778


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.63 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.433333 |
|  2 | macro_recall    | 0.462085 |
|  3 | macro_f1        | 0.443551 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18151.23 ms
[Epoch 2] loss: 43.34486363828182


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.84 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18182.08 ms
[Epoch 3] loss: 31.738685950636864


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.54 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17878.74 ms
[Epoch 4] loss: 21.780531896278262


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 828.17 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17859.73 ms
[Epoch 5] loss: 15.797483598813415


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 822.37 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.86     |
|  1 | macro_precision | 0.487013 |
|  2 | macro_recall    | 0.67116  |
|  3 | macro_f1        | 0.483338 |
|  4 | micro_precision | 0.86     |
|  5 | micro_recall    | 0.86     |
|  6 | micro_f1        | 0.86     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18160.70 ms
[Epoch 6] loss: 11.003216790035367


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.34 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.542045 |
|  2 | macro_recall    | 0.718344 |
|  3 | macro_f1        | 0.573869 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18092.42 ms
[Epoch 7] loss: 8.644245830830187


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.06 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18225.88 ms
[Epoch 8] loss: 7.562955369823612


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.82 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.96     |
|  1 | macro_precision | 0.723485 |
|  2 | macro_recall    | 0.734473 |
|  3 | macro_f1        | 0.728736 |
|  4 | micro_precision | 0.96     |
|  5 | micro_recall    | 0.96     |
|  6 | micro_f1        | 0.96     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18258.89 ms
[Epoch 9] loss: 7.5837882129708305


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.95 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.13 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7524752475247525  |
|  1 | macro_precision | 0.45046791443850265 |
|  2 | macro_recall    | 0.4793526785714286  |
|  3 | macro_f1        | 0.4589793982108973  |
|  4 | micro_precision | 0.7524752475247525  |
|  5 | micro_recall    | 0.7524752475247525  |
|  6 | micro_f1        | 0.7524752475247525  |
|  7 | lans            | ['en', 'zh']        |
|  8 | sample_rate     | 0.75                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20738.83 ms
[Epoch 0] loss: 108.49695283174515


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.12 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.8      |
|  1 | macro_precision | 0.442529 |
|  2 | macro_recall    | 0.354839 |
|  3 | macro_f1        | 0.36526  |
|  4 | micro_precision | 0.8      |
|  5 | micro_recall    | 0.8      |
|  6 | micro_f1        | 0.8      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20723.58 ms
[Epoch 1] loss: 62.69416305422783


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.43 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20765.86 ms
[Epoch 2] loss: 44.29142067581415


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 836.45 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20448.04 ms
[Epoch 3] loss: 32.217417635023594


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.79 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20783.52 ms
[Epoch 4] loss: 22.909588743001223


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.05 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21072.18 ms
[Epoch 5] loss: 18.215658286353573


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.41 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.601408 |
|  2 | macro_recall    | 0.726408 |
|  3 | macro_f1        | 0.643075 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20783.41 ms
[Epoch 6] loss: 11.794749843189493


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.53 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21108.92 ms
[Epoch 7] loss: 9.2007504273206


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.51 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20702.98 ms
[Epoch 8] loss: 9.376534074137453


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.78 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20803.61 ms
[Epoch 9] loss: 6.4666829912457615


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.70 ms
Early Stopping. Epoch: 9, best_valid_metric (macro_f1): 0.6430749478414379



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.33 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7623762376237624  |
|  1 | macro_precision | 0.46400862068965515 |
|  2 | macro_recall    | 0.4832589285714286  |
|  3 | macro_f1        | 0.4713114754098361  |
|  4 | micro_precision | 0.7623762376237624  |
|  5 | micro_recall    | 0.7623762376237624  |
|  6 | micro_f1        | 0.7623762376237624  |
|  7 | lans            | ['en', 'zh']        |
|  8 | sample_rate     | 1                   |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20667.80 ms
[Epoch 0] loss: 108.49695283174515


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.03 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.8      |
|  1 | macro_precision | 0.442529 |
|  2 | macro_recall    | 0.354839 |
|  3 | macro_f1        | 0.36526  |
|  4 | micro_precision | 0.8      |
|  5 | micro_recall    | 0.8      |
|  6 | micro_f1        | 0.8      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21083.63 ms
[Epoch 1] loss: 62.69416305422783


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.87 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20836.99 ms
[Epoch 2] loss: 44.29142067581415


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.74 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20763.26 ms
[Epoch 3] loss: 32.217417635023594


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.68 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20818.24 ms
[Epoch 4] loss: 22.909588743001223


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.96 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20789.90 ms
[Epoch 5] loss: 18.215658286353573


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.56 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.601408 |
|  2 | macro_recall    | 0.726408 |
|  3 | macro_f1        | 0.643075 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21182.30 ms
[Epoch 6] loss: 11.794749843189493


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.01 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20429.52 ms
[Epoch 7] loss: 9.2007504273206


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 831.24 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20398.79 ms
[Epoch 8] loss: 9.376534074137453


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 832.65 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20694.56 ms
[Epoch 9] loss: 6.4666829912457615


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.46 ms
Early Stopping. Epoch: 9, best_valid_metric (macro_f1): 0.6430749478414379



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.84 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7623762376237624  |
|  1 | macro_precision | 0.46400862068965515 |
|  2 | macro_recall    | 0.4832589285714286  |
|  3 | macro_f1        | 0.4713114754098361  |
|  4 | micro_precision | 0.7623762376237624  |
|  5 | micro_recall    | 0.7623762376237624  |
|  6 | micro_f1        | 0.7623762376237624  |
|  7 | lans            | ['en', 'zh']        |
|  8 | sample_rate     | 1                   |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11225.49 ms
[Epoch 0] loss: 64.94043320417404


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 841.32 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.7      |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.274194 |
|  3 | macro_f1        | 0.253493 |
|  4 | micro_precision | 0.7      |
|  5 | micro_recall    | 0.7      |
|  6 | micro_f1        | 0.7      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11239.54 ms
[Epoch 1] loss: 41.574236422777176


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.64 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.88     |
|  1 | macro_precision | 0.42869  |
|  2 | macro_recall    | 0.458353 |
|  3 | macro_f1        | 0.438492 |
|  4 | micro_precision | 0.88     |
|  5 | micro_recall    | 0.88     |
|  6 | micro_f1        | 0.88     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11552.37 ms
[Epoch 2] loss: 27.912456408143044


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.69 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.443336 |
|  2 | macro_recall    | 0.469547 |
|  3 | macro_f1        | 0.453825 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11572.15 ms
[Epoch 3] loss: 21.921162515878677


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.71 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.96     |
|  1 | macro_precision | 0.473541 |
|  2 | macro_recall    | 0.488204 |
|  3 | macro_f1        | 0.480644 |
|  4 | micro_precision | 0.96     |
|  5 | micro_recall    | 0.96     |
|  6 | micro_f1        | 0.96     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11570.36 ms
[Epoch 4] loss: 18.37886805832386


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.05 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11247.31 ms
[Epoch 5] loss: 15.036971075460315


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.61 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11554.69 ms
[Epoch 6] loss: 9.118949794210494


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.03 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.590851 |
|  2 | macro_recall    | 0.722677 |
|  3 | macro_f1        | 0.635653 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11609.18 ms
[Epoch 7] loss: 6.727861295919865


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.66 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11263.64 ms
[Epoch 8] loss: 7.2891454345081


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 825.27 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11279.56 ms
[Epoch 9] loss: 5.2082503363490105


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.56 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.14 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7920792079207921 |
|  1 | macro_precision | 0.5257757757757757 |
|  2 | macro_recall    | 0.5373883928571428 |
|  3 | macro_f1        | 0.5264993481095176 |
|  4 | micro_precision | 0.7920792079207921 |
|  5 | micro_recall    | 0.7920792079207921 |
|  6 | micro_f1        | 0.7920792079207921 |
|  7 | lans            | ['en', 'fr']       |
|  8 | sample_rate     | 0.1                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11541.89 ms
[Epoch 0] loss: 64.94043320417404


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.81 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.7      |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.274194 |
|  3 | macro_f1        | 0.253493 |
|  4 | micro_precision | 0.7      |
|  5 | micro_recall    | 0.7      |
|  6 | micro_f1        | 0.7      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11558.04 ms
[Epoch 1] loss: 41.574236422777176


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.30 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.88     |
|  1 | macro_precision | 0.42869  |
|  2 | macro_recall    | 0.458353 |
|  3 | macro_f1        | 0.438492 |
|  4 | micro_precision | 0.88     |
|  5 | micro_recall    | 0.88     |
|  6 | micro_f1        | 0.88     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11237.22 ms
[Epoch 2] loss: 27.912456408143044


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 835.48 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.443336 |
|  2 | macro_recall    | 0.469547 |
|  3 | macro_f1        | 0.453825 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11233.43 ms
[Epoch 3] loss: 21.921162515878677


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.27 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.96     |
|  1 | macro_precision | 0.473541 |
|  2 | macro_recall    | 0.488204 |
|  3 | macro_f1        | 0.480644 |
|  4 | micro_precision | 0.96     |
|  5 | micro_recall    | 0.96     |
|  6 | micro_f1        | 0.96     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11573.93 ms
[Epoch 4] loss: 18.37886805832386


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.72 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11244.92 ms
[Epoch 5] loss: 15.036971075460315


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 830.13 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11238.95 ms
[Epoch 6] loss: 9.118949794210494


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.30 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.590851 |
|  2 | macro_recall    | 0.722677 |
|  3 | macro_f1        | 0.635653 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11538.93 ms
[Epoch 7] loss: 6.727861295919865


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.96 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11537.74 ms
[Epoch 8] loss: 7.2891454345081


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.28 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11220.20 ms
[Epoch 9] loss: 5.2082503363490105


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.46 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.75 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7920792079207921 |
|  1 | macro_precision | 0.5257757757757757 |
|  2 | macro_recall    | 0.5373883928571428 |
|  3 | macro_f1        | 0.5264993481095176 |
|  4 | micro_precision | 0.7920792079207921 |
|  5 | micro_recall    | 0.7920792079207921 |
|  6 | micro_f1        | 0.7920792079207921 |
|  7 | lans            | ['en', 'fr']       |
|  8 | sample_rate     | 0.1                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13061.11 ms
[Epoch 0] loss: 70.653027176857


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.73 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.8      |
|  1 | macro_precision | 0.442529 |
|  2 | macro_recall    | 0.354839 |
|  3 | macro_f1        | 0.36526  |
|  4 | micro_precision | 0.8      |
|  5 | micro_recall    | 0.8      |
|  6 | micro_f1        | 0.8      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13090.56 ms
[Epoch 1] loss: 44.42929059267044


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.78 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.438209 |
|  2 | macro_recall    | 0.465816 |
|  3 | macro_f1        | 0.448661 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13104.06 ms
[Epoch 2] loss: 30.14775763452053


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.33 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13116.61 ms
[Epoch 3] loss: 22.025847554206848


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.51 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12867.18 ms
[Epoch 4] loss: 19.84652103856206


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.00 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13100.90 ms
[Epoch 5] loss: 16.14354327507317


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.60 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13097.18 ms
[Epoch 6] loss: 12.012630022130907


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.44 ms
Early Stopping. Epoch: 6, best_valid_metric (macro_f1): 0.4696969696969696



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.75 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.801980198019802   |
|  1 | macro_precision | 0.5181818181818182  |
|  2 | macro_recall    | 0.47433035714285715 |
|  3 | macro_f1        | 0.45744631185807655 |
|  4 | micro_precision | 0.801980198019802   |
|  5 | micro_recall    | 0.801980198019802   |
|  6 | micro_f1        | 0.801980198019802   |
|  7 | lans            | ['en', 'fr']        |
|  8 | sample_rate     | 0.25                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12734.39 ms
[Epoch 0] loss: 70.653027176857


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.04 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.8      |
|  1 | macro_precision | 0.442529 |
|  2 | macro_recall    | 0.354839 |
|  3 | macro_f1        | 0.36526  |
|  4 | micro_precision | 0.8      |
|  5 | micro_recall    | 0.8      |
|  6 | micro_f1        | 0.8      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13076.17 ms
[Epoch 1] loss: 44.42929059267044


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.32 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.438209 |
|  2 | macro_recall    | 0.465816 |
|  3 | macro_f1        | 0.448661 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13056.89 ms
[Epoch 2] loss: 30.14775763452053


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.46 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13071.48 ms
[Epoch 3] loss: 22.025847554206848


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.96 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12743.46 ms
[Epoch 4] loss: 19.84652103856206


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 840.80 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12781.19 ms
[Epoch 5] loss: 16.14354327507317


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.45 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13082.55 ms
[Epoch 6] loss: 12.012630022130907


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.78 ms
Early Stopping. Epoch: 6, best_valid_metric (macro_f1): 0.4696969696969696



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.86 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.801980198019802   |
|  1 | macro_precision | 0.5181818181818182  |
|  2 | macro_recall    | 0.47433035714285715 |
|  3 | macro_f1        | 0.45744631185807655 |
|  4 | micro_precision | 0.801980198019802   |
|  5 | micro_recall    | 0.801980198019802   |
|  6 | micro_f1        | 0.801980198019802   |
|  7 | lans            | ['en', 'fr']        |
|  8 | sample_rate     | 0.25                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15651.93 ms
[Epoch 0] loss: 82.88883864879608


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.00 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.77     |
|  1 | macro_precision | 0.436111 |
|  2 | macro_recall    | 0.330645 |
|  3 | macro_f1        | 0.335327 |
|  4 | micro_precision | 0.77     |
|  5 | micro_recall    | 0.77     |
|  6 | micro_f1        | 0.77     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15740.09 ms
[Epoch 1] loss: 48.37953966856003


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.58 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15841.83 ms
[Epoch 2] loss: 34.15189881622791


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.29 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15520.61 ms
[Epoch 3] loss: 26.550193034112453


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 853.60 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15531.04 ms
[Epoch 4] loss: 18.60325386375189


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.33 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.533712 |
|  2 | macro_recall    | 0.710279 |
|  3 | macro_f1        | 0.565672 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15800.55 ms
[Epoch 5] loss: 14.815148167312145


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.76 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15853.49 ms
[Epoch 6] loss: 9.38126500765793


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.17 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15800.71 ms
[Epoch 7] loss: 8.701516422443092


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.97 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.555352 |
|  2 | macro_recall    | 0.718344 |
|  3 | macro_f1        | 0.595167 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15656.66 ms
[Epoch 8] loss: 6.493679665378295


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.75 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15666.86 ms
[Epoch 9] loss: 6.8782399874180555


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.34 ms
--------------------------------------------------------------------------------
[Epoch 9]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.608825 |
|  2 | macro_recall    | 0.717742 |
|  3 | macro_f1        | 0.641804 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.06 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8514851485148515 |
|  1 | macro_precision | 0.56796875         |
|  2 | macro_recall    | 0.5385044642857143 |
|  3 | macro_f1        | 0.5458533653846154 |
|  4 | micro_precision | 0.8514851485148515 |
|  5 | micro_recall    | 0.8514851485148515 |
|  6 | micro_f1        | 0.8514851485148515 |
|  7 | lans            | ['en', 'fr']       |
|  8 | sample_rate     | 0.5                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15700.32 ms
[Epoch 0] loss: 82.88883864879608


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.24 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.77     |
|  1 | macro_precision | 0.436111 |
|  2 | macro_recall    | 0.330645 |
|  3 | macro_f1        | 0.335327 |
|  4 | micro_precision | 0.77     |
|  5 | micro_recall    | 0.77     |
|  6 | micro_f1        | 0.77     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15725.62 ms
[Epoch 1] loss: 48.37953966856003


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 514.81 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15645.72 ms
[Epoch 2] loss: 34.15189881622791


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.51 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15711.24 ms
[Epoch 3] loss: 26.550193034112453


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.07 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15706.54 ms
[Epoch 4] loss: 18.60325386375189


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.42 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.533712 |
|  2 | macro_recall    | 0.710279 |
|  3 | macro_f1        | 0.565672 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15387.69 ms
[Epoch 5] loss: 14.815148167312145


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 832.27 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15459.77 ms
[Epoch 6] loss: 9.38126500765793


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.61 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15662.55 ms
[Epoch 7] loss: 8.701516422443092


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.50 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.555352 |
|  2 | macro_recall    | 0.718344 |
|  3 | macro_f1        | 0.595167 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15662.14 ms
[Epoch 8] loss: 6.493679665378295


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.39 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15677.93 ms
[Epoch 9] loss: 6.8782399874180555


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.96 ms
--------------------------------------------------------------------------------
[Epoch 9]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.608825 |
|  2 | macro_recall    | 0.717742 |
|  3 | macro_f1        | 0.641804 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.99 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8514851485148515 |
|  1 | macro_precision | 0.56796875         |
|  2 | macro_recall    | 0.5385044642857143 |
|  3 | macro_f1        | 0.5458533653846154 |
|  4 | micro_precision | 0.8514851485148515 |
|  5 | micro_recall    | 0.8514851485148515 |
|  6 | micro_f1        | 0.8514851485148515 |
|  7 | lans            | ['en', 'fr']       |
|  8 | sample_rate     | 0.5                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18100.25 ms
[Epoch 0] loss: 95.51562803983688


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.02 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.82     |
|  1 | macro_precision | 0.394811 |
|  2 | macro_recall    | 0.4013   |
|  3 | macro_f1        | 0.398008 |
|  4 | micro_precision | 0.82     |
|  5 | micro_recall    | 0.82     |
|  6 | micro_f1        | 0.82     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18179.06 ms
[Epoch 1] loss: 55.757557794451714


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.64 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18563.65 ms
[Epoch 2] loss: 40.65350026637316


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 512.00 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18265.72 ms
[Epoch 3] loss: 28.58822274208069


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.53 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18255.50 ms
[Epoch 4] loss: 19.354827877134085


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.55 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.511093 |
|  2 | macro_recall    | 0.69415  |
|  3 | macro_f1        | 0.527654 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18133.66 ms
[Epoch 5] loss: 14.353376290760934


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.41 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.53125  |
|  2 | macro_recall    | 0.714612 |
|  3 | macro_f1        | 0.566497 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18082.57 ms
[Epoch 6] loss: 8.873276811558753


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.20 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18157.58 ms
[Epoch 7] loss: 8.80637641530484


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 840.61 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.595588 |
|  2 | macro_recall    | 0.730741 |
|  3 | macro_f1        | 0.641711 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17770.41 ms
[Epoch 8] loss: 7.296219160896726


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.05 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18134.31 ms
[Epoch 9] loss: 8.750988222192973


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.90 ms
--------------------------------------------------------------------------------
[Epoch 9]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.714904 |
|  2 | macro_recall    | 0.714612 |
|  3 | macro_f1        | 0.714646 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.51 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.801980198019802  |
|  1 | macro_precision | 0.5200742240215924 |
|  2 | macro_recall    | 0.4916294642857143 |
|  3 | macro_f1        | 0.4897105382180009 |
|  4 | micro_precision | 0.801980198019802  |
|  5 | micro_recall    | 0.801980198019802  |
|  6 | micro_f1        | 0.801980198019802  |
|  7 | lans            | ['en', 'fr']       |
|  8 | sample_rate     | 0.75               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18476.45 ms
[Epoch 0] loss: 95.51562803983688


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.79 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.82     |
|  1 | macro_precision | 0.394811 |
|  2 | macro_recall    | 0.4013   |
|  3 | macro_f1        | 0.398008 |
|  4 | micro_precision | 0.82     |
|  5 | micro_recall    | 0.82     |
|  6 | micro_f1        | 0.82     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18194.56 ms
[Epoch 1] loss: 55.757557794451714


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.47 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18246.95 ms
[Epoch 2] loss: 40.65350026637316


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.17 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18221.22 ms
[Epoch 3] loss: 28.58822274208069


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.20 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18325.84 ms
[Epoch 4] loss: 19.354827877134085


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.24 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.511093 |
|  2 | macro_recall    | 0.69415  |
|  3 | macro_f1        | 0.527654 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18151.62 ms
[Epoch 5] loss: 14.353376290760934


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.99 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.53125  |
|  2 | macro_recall    | 0.714612 |
|  3 | macro_f1        | 0.566497 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18146.28 ms
[Epoch 6] loss: 8.873276811558753


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.06 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18185.40 ms
[Epoch 7] loss: 8.80637641530484


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.28 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.595588 |
|  2 | macro_recall    | 0.730741 |
|  3 | macro_f1        | 0.641711 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17761.60 ms
[Epoch 8] loss: 7.296219160896726


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.79 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18433.00 ms
[Epoch 9] loss: 8.750988222192973


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.09 ms
--------------------------------------------------------------------------------
[Epoch 9]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.714904 |
|  2 | macro_recall    | 0.714612 |
|  3 | macro_f1        | 0.714646 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.85 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.801980198019802  |
|  1 | macro_precision | 0.5200742240215924 |
|  2 | macro_recall    | 0.4916294642857143 |
|  3 | macro_f1        | 0.4897105382180009 |
|  4 | micro_precision | 0.801980198019802  |
|  5 | micro_recall    | 0.801980198019802  |
|  6 | micro_f1        | 0.801980198019802  |
|  7 | lans            | ['en', 'fr']       |
|  8 | sample_rate     | 0.75               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20695.94 ms
[Epoch 0] loss: 108.47900295257568


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.12 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.83     |
|  1 | macro_precision | 0.428125 |
|  2 | macro_recall    | 0.387699 |
|  3 | macro_f1        | 0.397559 |
|  4 | micro_precision | 0.83     |
|  5 | micro_recall    | 0.83     |
|  6 | micro_f1        | 0.83     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20803.26 ms
[Epoch 1] loss: 62.21194779872894


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.36 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20767.18 ms
[Epoch 2] loss: 42.3921123445034


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 843.49 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20628.53 ms
[Epoch 3] loss: 30.362808119505644


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.78 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21313.35 ms
[Epoch 4] loss: 24.17299611121416


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.83 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.503338 |
|  2 | macro_recall    | 0.690419 |
|  3 | macro_f1        | 0.522186 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20748.37 ms
[Epoch 5] loss: 13.262651721946895


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.39 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.506997 |
|  2 | macro_recall    | 0.695354 |
|  3 | macro_f1        | 0.544126 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20762.29 ms
[Epoch 6] loss: 8.69925500662066


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.99 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20909.87 ms
[Epoch 7] loss: 7.235026868642308


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.79 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20680.81 ms
[Epoch 8] loss: 3.7506286329589784


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.01 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.547229 |
|  2 | macro_recall    | 0.706548 |
|  3 | macro_f1        | 0.585166 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20849.53 ms
[Epoch 9] loss: 1.9836950540775433


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.24 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.80 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8118811881188119 |
|  1 | macro_precision | 0.5163461538461538 |
|  2 | macro_recall    | 0.4681919642857143 |
|  3 | macro_f1        | 0.4589431872893597 |
|  4 | micro_precision | 0.8118811881188119 |
|  5 | micro_recall    | 0.8118811881188119 |
|  6 | micro_f1        | 0.8118811881188119 |
|  7 | lans            | ['en', 'fr']       |
|  8 | sample_rate     | 1                  |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20790.04 ms
[Epoch 0] loss: 108.47900295257568


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.98 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.83     |
|  1 | macro_precision | 0.428125 |
|  2 | macro_recall    | 0.387699 |
|  3 | macro_f1        | 0.397559 |
|  4 | micro_precision | 0.83     |
|  5 | micro_recall    | 0.83     |
|  6 | micro_f1        | 0.83     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20733.71 ms
[Epoch 1] loss: 62.21194779872894


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.33 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20785.62 ms
[Epoch 2] loss: 42.3921123445034


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.39 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20840.89 ms
[Epoch 3] loss: 30.362808119505644


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.55 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21196.76 ms
[Epoch 4] loss: 24.17299611121416


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.51 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.503338 |
|  2 | macro_recall    | 0.690419 |
|  3 | macro_f1        | 0.522186 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20729.74 ms
[Epoch 5] loss: 13.262651721946895


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.14 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.506997 |
|  2 | macro_recall    | 0.695354 |
|  3 | macro_f1        | 0.544126 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20738.94 ms
[Epoch 6] loss: 8.69925500662066


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.59 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20692.05 ms
[Epoch 7] loss: 7.235026868642308


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.62 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20716.52 ms
[Epoch 8] loss: 3.7506286329589784


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.54 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.547229 |
|  2 | macro_recall    | 0.706548 |
|  3 | macro_f1        | 0.585166 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20736.14 ms
[Epoch 9] loss: 1.9836950540775433


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.44 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.33 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8118811881188119 |
|  1 | macro_precision | 0.5163461538461538 |
|  2 | macro_recall    | 0.4681919642857143 |
|  3 | macro_f1        | 0.4589431872893597 |
|  4 | micro_precision | 0.8118811881188119 |
|  5 | micro_recall    | 0.8118811881188119 |
|  6 | micro_f1        | 0.8118811881188119 |
|  7 | lans            | ['en', 'fr']       |
|  8 | sample_rate     | 1                  |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11515.32 ms
[Epoch 0] loss: 64.94179064035416


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.02 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.71     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.282258 |
|  3 | macro_f1        | 0.266518 |
|  4 | micro_precision | 0.71     |
|  5 | micro_recall    | 0.71     |
|  6 | micro_f1        | 0.71     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11531.14 ms
[Epoch 1] loss: 41.02215424180031


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.50 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11242.09 ms
[Epoch 2] loss: 27.282282203435898


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.29 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11539.58 ms
[Epoch 3] loss: 23.01539606601


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.38 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11587.52 ms
[Epoch 4] loss: 16.547792054712772


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.15 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.96     |
|  1 | macro_precision | 0.473541 |
|  2 | macro_recall    | 0.488204 |
|  3 | macro_f1        | 0.480644 |
|  4 | micro_precision | 0.96     |
|  5 | micro_recall    | 0.96     |
|  6 | micro_f1        | 0.96     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11558.99 ms
[Epoch 5] loss: 16.214888242073357


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.04 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11231.78 ms
[Epoch 6] loss: 10.543208619579673


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.93 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.577703 |
|  2 | macro_recall    | 0.719547 |
|  3 | macro_f1        | 0.625536 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11577.64 ms
[Epoch 7] loss: 7.903421462513506


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.72 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.583333 |
|  2 | macro_recall    | 0.723279 |
|  3 | macro_f1        | 0.630857 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11578.60 ms
[Epoch 8] loss: 6.868284740485251


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.20 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11209.59 ms
[Epoch 9] loss: 5.259846377419308


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.13 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 517.66 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.8316831683168316  |
|  1 | macro_precision | 0.5290254237288136  |
|  2 | macro_recall    | 0.48604910714285715 |
|  3 | macro_f1        | 0.47288378766140604 |
|  4 | micro_precision | 0.8316831683168316  |
|  5 | micro_recall    | 0.8316831683168316  |
|  6 | micro_f1        | 0.8316831683168316  |
|  7 | lans            | ['en', 'es']        |
|  8 | sample_rate     | 0.1                 |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11495.04 ms
[Epoch 0] loss: 64.94179064035416


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.25 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.71     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.282258 |
|  3 | macro_f1        | 0.266518 |
|  4 | micro_precision | 0.71     |
|  5 | micro_recall    | 0.71     |
|  6 | micro_f1        | 0.71     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11523.71 ms
[Epoch 1] loss: 41.02215424180031


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.62 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11560.34 ms
[Epoch 2] loss: 27.282282203435898


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.00 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11303.43 ms
[Epoch 3] loss: 23.01539606601


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 532.04 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11660.52 ms
[Epoch 4] loss: 16.547792054712772


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.97 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.96     |
|  1 | macro_precision | 0.473541 |
|  2 | macro_recall    | 0.488204 |
|  3 | macro_f1        | 0.480644 |
|  4 | micro_precision | 0.96     |
|  5 | micro_recall    | 0.96     |
|  6 | micro_f1        | 0.96     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11740.26 ms
[Epoch 5] loss: 16.214888242073357


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.02 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11314.45 ms
[Epoch 6] loss: 10.543208619579673


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.85 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.577703 |
|  2 | macro_recall    | 0.719547 |
|  3 | macro_f1        | 0.625536 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11573.66 ms
[Epoch 7] loss: 7.903421462513506


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.87 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.583333 |
|  2 | macro_recall    | 0.723279 |
|  3 | macro_f1        | 0.630857 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11315.71 ms
[Epoch 8] loss: 6.868284740485251


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.22 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11603.37 ms
[Epoch 9] loss: 5.259846377419308


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.15 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.45 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.8316831683168316  |
|  1 | macro_precision | 0.5290254237288136  |
|  2 | macro_recall    | 0.48604910714285715 |
|  3 | macro_f1        | 0.47288378766140604 |
|  4 | micro_precision | 0.8316831683168316  |
|  5 | micro_recall    | 0.8316831683168316  |
|  6 | micro_f1        | 0.8316831683168316  |
|  7 | lans            | ['en', 'es']        |
|  8 | sample_rate     | 0.1                 |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13069.81 ms
[Epoch 0] loss: 70.76067984104156


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.88 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.74     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.306452 |
|  3 | macro_f1        | 0.30148  |
|  4 | micro_precision | 0.74     |
|  5 | micro_recall    | 0.74     |
|  6 | micro_f1        | 0.74     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13113.27 ms
[Epoch 1] loss: 44.19239813089371


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.98 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.438209 |
|  2 | macro_recall    | 0.465816 |
|  3 | macro_f1        | 0.448661 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13094.28 ms
[Epoch 2] loss: 30.880545631051064


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.55 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13049.04 ms
[Epoch 3] loss: 23.426958806812763


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.97 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12795.09 ms
[Epoch 4] loss: 19.84749185293913


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 533.26 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13196.35 ms
[Epoch 5] loss: 14.370261981151998


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.74 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13192.85 ms
[Epoch 6] loss: 10.660271880216897


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 532.36 ms
Early Stopping. Epoch: 6, best_valid_metric (macro_f1): 0.47513013302486984



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.95 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7920792079207921  |
|  1 | macro_precision | 0.6389000401445203  |
|  2 | macro_recall    | 0.4754464285714286  |
|  3 | macro_f1        | 0.45777777777777784 |
|  4 | micro_precision | 0.7920792079207921  |
|  5 | micro_recall    | 0.7920792079207921  |
|  6 | micro_f1        | 0.7920792079207921  |
|  7 | lans            | ['en', 'es']        |
|  8 | sample_rate     | 0.25                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13096.72 ms
[Epoch 0] loss: 70.76067984104156


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 517.86 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.74     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.306452 |
|  3 | macro_f1        | 0.30148  |
|  4 | micro_precision | 0.74     |
|  5 | micro_recall    | 0.74     |
|  6 | micro_f1        | 0.74     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13063.28 ms
[Epoch 1] loss: 44.19239813089371


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.74 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.438209 |
|  2 | macro_recall    | 0.465816 |
|  3 | macro_f1        | 0.448661 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12777.65 ms
[Epoch 2] loss: 30.880545631051064


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.85 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13103.48 ms
[Epoch 3] loss: 23.426958806812763


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.26 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13119.96 ms
[Epoch 4] loss: 19.84749185293913


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.28 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13208.52 ms
[Epoch 5] loss: 14.370261981151998


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.33 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12873.94 ms
[Epoch 6] loss: 10.660271880216897


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 533.48 ms
Early Stopping. Epoch: 6, best_valid_metric (macro_f1): 0.47513013302486984



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.07 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7920792079207921  |
|  1 | macro_precision | 0.6389000401445203  |
|  2 | macro_recall    | 0.4754464285714286  |
|  3 | macro_f1        | 0.45777777777777784 |
|  4 | micro_precision | 0.7920792079207921  |
|  5 | micro_recall    | 0.7920792079207921  |
|  6 | micro_f1        | 0.7920792079207921  |
|  7 | lans            | ['en', 'es']        |
|  8 | sample_rate     | 0.25                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15741.45 ms
[Epoch 0] loss: 82.64838606119156


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.54 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.83     |
|  1 | macro_precision | 0.437331 |
|  2 | macro_recall    | 0.383365 |
|  3 | macro_f1        | 0.394946 |
|  4 | micro_precision | 0.83     |
|  5 | micro_recall    | 0.83     |
|  6 | micro_f1        | 0.83     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15698.76 ms
[Epoch 1] loss: 47.66208986938


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.84 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15727.87 ms
[Epoch 2] loss: 34.267591550946236


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.36 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.459459 |
|  2 | macro_recall    | 0.485075 |
|  3 | macro_f1        | 0.470249 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15731.03 ms
[Epoch 3] loss: 26.285477370023727


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 534.98 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15801.54 ms
[Epoch 4] loss: 20.20995405688882


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.81 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.546371 |
|  2 | macro_recall    | 0.726408 |
|  3 | macro_f1        | 0.580083 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15736.04 ms
[Epoch 5] loss: 16.07701744697988


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.55 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15698.41 ms
[Epoch 6] loss: 10.666933275293559


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.00 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15604.75 ms
[Epoch 7] loss: 9.73719688388519


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.51 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15345.46 ms
[Epoch 8] loss: 7.054802236962132


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 844.79 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.539216 |
|  2 | macro_recall    | 0.70715  |
|  3 | macro_f1        | 0.580695 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15414.27 ms
[Epoch 9] loss: 5.975075391354039


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.09 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.49 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7128712871287128  |
|  1 | macro_precision | 0.44519884566729917 |
|  2 | macro_recall    | 0.45870535714285715 |
|  3 | macro_f1        | 0.44090825350036844 |
|  4 | micro_precision | 0.7128712871287128  |
|  5 | micro_recall    | 0.7128712871287128  |
|  6 | micro_f1        | 0.7128712871287128  |
|  7 | lans            | ['en', 'es']        |
|  8 | sample_rate     | 0.5                 |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15685.85 ms
[Epoch 0] loss: 82.64838606119156


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.68 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.83     |
|  1 | macro_precision | 0.437331 |
|  2 | macro_recall    | 0.383365 |
|  3 | macro_f1        | 0.394946 |
|  4 | micro_precision | 0.83     |
|  5 | micro_recall    | 0.83     |
|  6 | micro_f1        | 0.83     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15670.73 ms
[Epoch 1] loss: 47.66208986938


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.47 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15634.34 ms
[Epoch 2] loss: 34.267591550946236


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.66 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.459459 |
|  2 | macro_recall    | 0.485075 |
|  3 | macro_f1        | 0.470249 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15699.19 ms
[Epoch 3] loss: 26.285477370023727


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.80 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15738.24 ms
[Epoch 4] loss: 20.20995405688882


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.82 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.546371 |
|  2 | macro_recall    | 0.726408 |
|  3 | macro_f1        | 0.580083 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15712.81 ms
[Epoch 5] loss: 16.07701744697988


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.42 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15346.88 ms
[Epoch 6] loss: 10.666933275293559


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.04 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15652.57 ms
[Epoch 7] loss: 9.73719688388519


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.04 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15698.04 ms
[Epoch 8] loss: 7.054802236962132


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.46 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.539216 |
|  2 | macro_recall    | 0.70715  |
|  3 | macro_f1        | 0.580695 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15645.45 ms
[Epoch 9] loss: 5.975075391354039


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.20 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.60 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7128712871287128  |
|  1 | macro_precision | 0.44519884566729917 |
|  2 | macro_recall    | 0.45870535714285715 |
|  3 | macro_f1        | 0.44090825350036844 |
|  4 | micro_precision | 0.7128712871287128  |
|  5 | micro_recall    | 0.7128712871287128  |
|  6 | micro_f1        | 0.7128712871287128  |
|  7 | lans            | ['en', 'es']        |
|  8 | sample_rate     | 0.5                 |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18121.67 ms
[Epoch 0] loss: 95.693499147892


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.23 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.83     |
|  1 | macro_precision | 0.437331 |
|  2 | macro_recall    | 0.383365 |
|  3 | macro_f1        | 0.394946 |
|  4 | micro_precision | 0.83     |
|  5 | micro_recall    | 0.83     |
|  6 | micro_f1        | 0.83     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18158.99 ms
[Epoch 1] loss: 55.99548923969269


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.01 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18196.27 ms
[Epoch 2] loss: 40.65027213096619


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.28 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18331.51 ms
[Epoch 3] loss: 29.468036636710167


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 533.92 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18357.06 ms
[Epoch 4] loss: 20.643336322158575


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.15 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18212.11 ms
[Epoch 5] loss: 13.015306610614061


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.41 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.48125  |
|  2 | macro_recall    | 0.674892 |
|  3 | macro_f1        | 0.492587 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18205.90 ms
[Epoch 6] loss: 10.198246805695817


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.12 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18104.42 ms
[Epoch 7] loss: 6.75186690420378


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.75 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.499626 |
|  2 | macro_recall    | 0.682354 |
|  3 | macro_f1        | 0.504876 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18122.98 ms
[Epoch 8] loss: 6.256626928690821


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.86 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.517778 |
|  2 | macro_recall    | 0.689817 |
|  3 | macro_f1        | 0.512406 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18142.43 ms
[Epoch 9] loss: 7.16585067496635


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.55 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.53 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7821782178217822 |
|  1 | macro_precision | 0.5030701754385964 |
|  2 | macro_recall    | 0.4464285714285714 |
|  3 | macro_f1        | 0.4424731182795699 |
|  4 | micro_precision | 0.7821782178217822 |
|  5 | micro_recall    | 0.7821782178217822 |
|  6 | micro_f1        | 0.7821782178217822 |
|  7 | lans            | ['en', 'es']       |
|  8 | sample_rate     | 0.75               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18114.64 ms
[Epoch 0] loss: 95.693499147892


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.63 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.83     |
|  1 | macro_precision | 0.437331 |
|  2 | macro_recall    | 0.383365 |
|  3 | macro_f1        | 0.394946 |
|  4 | micro_precision | 0.83     |
|  5 | micro_recall    | 0.83     |
|  6 | micro_f1        | 0.83     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18329.11 ms
[Epoch 1] loss: 55.99548923969269


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.78 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18324.99 ms
[Epoch 2] loss: 40.65027213096619


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.32 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18357.70 ms
[Epoch 3] loss: 29.468036636710167


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 533.38 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18229.05 ms
[Epoch 4] loss: 20.643336322158575


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.78 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18208.74 ms
[Epoch 5] loss: 13.015306610614061


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.11 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.48125  |
|  2 | macro_recall    | 0.674892 |
|  3 | macro_f1        | 0.492587 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18196.63 ms
[Epoch 6] loss: 10.198246805695817


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.28 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18204.16 ms
[Epoch 7] loss: 6.75186690420378


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.61 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.499626 |
|  2 | macro_recall    | 0.682354 |
|  3 | macro_f1        | 0.504876 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18101.20 ms
[Epoch 8] loss: 6.256626928690821


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.68 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.517778 |
|  2 | macro_recall    | 0.689817 |
|  3 | macro_f1        | 0.512406 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18152.31 ms
[Epoch 9] loss: 7.16585067496635


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.89 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.04 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7821782178217822 |
|  1 | macro_precision | 0.5030701754385964 |
|  2 | macro_recall    | 0.4464285714285714 |
|  3 | macro_f1        | 0.4424731182795699 |
|  4 | micro_precision | 0.7821782178217822 |
|  5 | micro_recall    | 0.7821782178217822 |
|  6 | micro_f1        | 0.7821782178217822 |
|  7 | lans            | ['en', 'es']       |
|  8 | sample_rate     | 0.75               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20713.24 ms
[Epoch 0] loss: 108.51383900642395


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.68 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.81     |
|  1 | macro_precision | 0.420393 |
|  2 | macro_recall    | 0.37157  |
|  3 | macro_f1        | 0.381386 |
|  4 | micro_precision | 0.81     |
|  5 | micro_recall    | 0.81     |
|  6 | micro_f1        | 0.81     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20858.66 ms
[Epoch 1] loss: 62.463216066360474


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.60 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20793.81 ms
[Epoch 2] loss: 43.984900772571564


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.88 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20746.85 ms
[Epoch 3] loss: 30.448208015412092


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.98 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21175.35 ms
[Epoch 4] loss: 22.01429254002869


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.24 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.525    |
|  2 | macro_recall    | 0.710279 |
|  3 | macro_f1        | 0.550857 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20453.99 ms
[Epoch 5] loss: 14.017967623891309


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 854.08 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20592.93 ms
[Epoch 6] loss: 12.35726838489063


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.14 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21349.47 ms
[Epoch 7] loss: 9.77025969978422


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.72 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20709.67 ms
[Epoch 8] loss: 4.91853123891633


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.75 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.528646 |
|  2 | macro_recall    | 0.679225 |
|  3 | macro_f1        | 0.561694 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20668.52 ms
[Epoch 9] loss: 1.4303196766995825


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.07 ms
--------------------------------------------------------------------------------
[Epoch 9]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.582143 |
|  2 | macro_recall    | 0.715214 |
|  3 | macro_f1        | 0.626674 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.22 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7722772277227723  |
|  1 | macro_precision | 0.4676894013419321  |
|  2 | macro_recall    | 0.48995535714285715 |
|  3 | macro_f1        | 0.46715656256378846 |
|  4 | micro_precision | 0.7722772277227723  |
|  5 | micro_recall    | 0.7722772277227723  |
|  6 | micro_f1        | 0.7722772277227723  |
|  7 | lans            | ['en', 'es']        |
|  8 | sample_rate     | 1                   |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20681.69 ms
[Epoch 0] loss: 108.51383900642395


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.38 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.81     |
|  1 | macro_precision | 0.420393 |
|  2 | macro_recall    | 0.37157  |
|  3 | macro_f1        | 0.381386 |
|  4 | micro_precision | 0.81     |
|  5 | micro_recall    | 0.81     |
|  6 | micro_f1        | 0.81     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20831.16 ms
[Epoch 1] loss: 62.463216066360474


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 532.51 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20918.49 ms
[Epoch 2] loss: 43.984900772571564


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.14 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21014.28 ms
[Epoch 3] loss: 30.448208015412092


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.43 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20989.18 ms
[Epoch 4] loss: 22.01429254002869


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.01 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.525    |
|  2 | macro_recall    | 0.710279 |
|  3 | macro_f1        | 0.550857 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21005.45 ms
[Epoch 5] loss: 14.017967623891309


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 532.00 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20936.91 ms
[Epoch 6] loss: 12.35726838489063


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 872.36 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20943.72 ms
[Epoch 7] loss: 9.77025969978422


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.11 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20349.84 ms
[Epoch 8] loss: 4.91853123891633


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 856.70 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.528646 |
|  2 | macro_recall    | 0.679225 |
|  3 | macro_f1        | 0.561694 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20687.07 ms
[Epoch 9] loss: 1.4303196766995825


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.45 ms
--------------------------------------------------------------------------------
[Epoch 9]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.582143 |
|  2 | macro_recall    | 0.715214 |
|  3 | macro_f1        | 0.626674 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.28 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7722772277227723  |
|  1 | macro_precision | 0.4676894013419321  |
|  2 | macro_recall    | 0.48995535714285715 |
|  3 | macro_f1        | 0.46715656256378846 |
|  4 | micro_precision | 0.7722772277227723  |
|  5 | micro_recall    | 0.7722772277227723  |
|  6 | micro_f1        | 0.7722772277227723  |
|  7 | lans            | ['en', 'es']        |
|  8 | sample_rate     | 1                   |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11159.33 ms
[Epoch 0] loss: 64.96710962057114


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 863.23 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.72     |
|  1 | macro_precision | 0.426316 |
|  2 | macro_recall    | 0.290323 |
|  3 | macro_f1        | 0.276235 |
|  4 | micro_precision | 0.72     |
|  5 | micro_recall    | 0.72     |
|  6 | micro_f1        | 0.72     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11226.34 ms
[Epoch 1] loss: 41.06074899435043


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.49 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.433333 |
|  2 | macro_recall    | 0.462085 |
|  3 | macro_f1        | 0.443551 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11586.69 ms
[Epoch 2] loss: 27.67262376844883


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.27 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11621.68 ms
[Epoch 3] loss: 22.681668411940336


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.91 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11242.73 ms
[Epoch 4] loss: 16.093305945396423


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.81 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11622.46 ms
[Epoch 5] loss: 17.746108831837773


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.74 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11671.83 ms
[Epoch 6] loss: 11.502907305955887


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.31 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11332.61 ms
[Epoch 7] loss: 7.307616362348199


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.55 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.503456 |
|  2 | macro_recall    | 0.699085 |
|  3 | macro_f1        | 0.520552 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11661.79 ms
[Epoch 8] loss: 6.787203759886324


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.13 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11346.16 ms
[Epoch 9] loss: 6.699355230783112


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.23 ms
--------------------------------------------------------------------------------
[Epoch 9]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.583215 |
|  2 | macro_recall    | 0.710881 |
|  3 | macro_f1        | 0.625875 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.74 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8118811881188119 |
|  1 | macro_precision | 0.5264844804318488 |
|  2 | macro_recall    | 0.5005580357142857 |
|  3 | macro_f1        | 0.4971732247851651 |
|  4 | micro_precision | 0.8118811881188119 |
|  5 | micro_recall    | 0.8118811881188119 |
|  6 | micro_f1        | 0.8118811881188119 |
|  7 | lans            | ['en', 'pt']       |
|  8 | sample_rate     | 0.1                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11201.72 ms
[Epoch 0] loss: 64.96710962057114


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.53 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.72     |
|  1 | macro_precision | 0.426316 |
|  2 | macro_recall    | 0.290323 |
|  3 | macro_f1        | 0.276235 |
|  4 | micro_precision | 0.72     |
|  5 | micro_recall    | 0.72     |
|  6 | micro_f1        | 0.72     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11616.43 ms
[Epoch 1] loss: 41.06074899435043


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.04 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.433333 |
|  2 | macro_recall    | 0.462085 |
|  3 | macro_f1        | 0.443551 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11565.30 ms
[Epoch 2] loss: 27.67262376844883


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.89 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11214.55 ms
[Epoch 3] loss: 22.681668411940336


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 858.03 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11245.16 ms
[Epoch 4] loss: 16.093305945396423


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.39 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11612.28 ms
[Epoch 5] loss: 17.746108831837773


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.45 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11348.09 ms
[Epoch 6] loss: 11.502907305955887


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 864.26 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11320.36 ms
[Epoch 7] loss: 7.307616362348199


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.39 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.503456 |
|  2 | macro_recall    | 0.699085 |
|  3 | macro_f1        | 0.520552 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11632.64 ms
[Epoch 8] loss: 6.787203759886324


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.36 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11220.85 ms
[Epoch 9] loss: 6.699355230783112


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.63 ms
--------------------------------------------------------------------------------
[Epoch 9]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.583215 |
|  2 | macro_recall    | 0.710881 |
|  3 | macro_f1        | 0.625875 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 863.06 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8118811881188119 |
|  1 | macro_precision | 0.5264844804318488 |
|  2 | macro_recall    | 0.5005580357142857 |
|  3 | macro_f1        | 0.4971732247851651 |
|  4 | micro_precision | 0.8118811881188119 |
|  5 | micro_recall    | 0.8118811881188119 |
|  6 | micro_f1        | 0.8118811881188119 |
|  7 | lans            | ['en', 'pt']       |
|  8 | sample_rate     | 0.1                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12718.72 ms
[Epoch 0] loss: 70.72846668958664


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.74 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.74     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.306452 |
|  3 | macro_f1        | 0.30148  |
|  4 | micro_precision | 0.74     |
|  5 | micro_recall    | 0.74     |
|  6 | micro_f1        | 0.74     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13101.67 ms
[Epoch 1] loss: 45.36156648397446


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 517.96 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.450927 |
|  2 | macro_recall    | 0.464612 |
|  3 | macro_f1        | 0.457556 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13104.06 ms
[Epoch 2] loss: 32.422000125050545


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.65 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12764.46 ms
[Epoch 3] loss: 24.238072052598


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.89 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13160.12 ms
[Epoch 4] loss: 20.86368218064308


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.59 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12889.51 ms
[Epoch 5] loss: 16.179992796853185


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 883.22 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12850.44 ms
[Epoch 6] loss: 11.249779834412038


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.01 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13158.74 ms
[Epoch 7] loss: 8.148556703701615


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.98 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13040.22 ms
[Epoch 8] loss: 6.860253869090229


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.00 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13067.54 ms
[Epoch 9] loss: 5.48129724455066


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.00 ms
Early Stopping. Epoch: 9, best_valid_metric (macro_f1): 0.47513013302486984



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.96 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7722772277227723 |
|  1 | macro_precision | 0.5044612794612795 |
|  2 | macro_recall    | 0.51953125         |
|  3 | macro_f1        | 0.5078185298385354 |
|  4 | micro_precision | 0.7722772277227723 |
|  5 | micro_recall    | 0.7722772277227723 |
|  6 | micro_f1        | 0.7722772277227723 |
|  7 | lans            | ['en', 'pt']       |
|  8 | sample_rate     | 0.25               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12729.60 ms
[Epoch 0] loss: 70.72846668958664


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.78 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.74     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.306452 |
|  3 | macro_f1        | 0.30148  |
|  4 | micro_precision | 0.74     |
|  5 | micro_recall    | 0.74     |
|  6 | micro_f1        | 0.74     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13116.30 ms
[Epoch 1] loss: 45.36156648397446


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.25 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.450927 |
|  2 | macro_recall    | 0.464612 |
|  3 | macro_f1        | 0.457556 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13114.40 ms
[Epoch 2] loss: 32.422000125050545


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.67 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12782.10 ms
[Epoch 3] loss: 24.238072052598


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.31 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13188.26 ms
[Epoch 4] loss: 20.86368218064308


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.44 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13213.68 ms
[Epoch 5] loss: 16.179992796853185


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.40 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13245.73 ms
[Epoch 6] loss: 11.249779834412038


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.84 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13274.64 ms
[Epoch 7] loss: 8.148556703701615


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.04 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12904.40 ms
[Epoch 8] loss: 6.860253869090229


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 864.58 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12871.43 ms
[Epoch 9] loss: 5.48129724455066


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.80 ms
Early Stopping. Epoch: 9, best_valid_metric (macro_f1): 0.47513013302486984



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 878.01 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7722772277227723 |
|  1 | macro_precision | 0.5044612794612795 |
|  2 | macro_recall    | 0.51953125         |
|  3 | macro_f1        | 0.5078185298385354 |
|  4 | micro_precision | 0.7722772277227723 |
|  5 | micro_recall    | 0.7722772277227723 |
|  6 | micro_f1        | 0.7722772277227723 |
|  7 | lans            | ['en', 'pt']       |
|  8 | sample_rate     | 0.25               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15343.07 ms
[Epoch 0] loss: 82.55159509181976


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.04 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.81     |
|  1 | macro_precision | 0.444767 |
|  2 | macro_recall    | 0.362903 |
|  3 | macro_f1        | 0.37451  |
|  4 | micro_precision | 0.81     |
|  5 | micro_recall    | 0.81     |
|  6 | micro_f1        | 0.81     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15707.80 ms
[Epoch 1] loss: 47.89056345820427


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.93 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.462234 |
|  2 | macro_recall    | 0.476408 |
|  3 | macro_f1        | 0.4691   |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15753.69 ms
[Epoch 2] loss: 34.40077564120293


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.01 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15821.11 ms
[Epoch 3] loss: 25.28530688583851


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.42 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15856.25 ms
[Epoch 4] loss: 19.134629886597395


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.82 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.588971 |
|  2 | macro_recall    | 0.710279 |
|  3 | macro_f1        | 0.628719 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15835.53 ms
[Epoch 5] loss: 14.2161974767223


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.76 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15867.69 ms
[Epoch 6] loss: 12.030517202336341


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.85 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15882.86 ms
[Epoch 7] loss: 9.873719355091453


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.99 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15498.22 ms
[Epoch 8] loss: 8.917749978485517


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.48 ms
Early Stopping. Epoch: 8, best_valid_metric (macro_f1): 0.6287188828172435



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.20 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7227722772277227  |
|  1 | macro_precision | 0.4713068181818182  |
|  2 | macro_recall    | 0.5496651785714286  |
|  3 | macro_f1        | 0.48089871194379386 |
|  4 | micro_precision | 0.7227722772277227  |
|  5 | micro_recall    | 0.7227722772277227  |
|  6 | micro_f1        | 0.7227722772277227  |
|  7 | lans            | ['en', 'pt']        |
|  8 | sample_rate     | 0.5                 |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15383.12 ms
[Epoch 0] loss: 82.55159509181976


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.42 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.81     |
|  1 | macro_precision | 0.444767 |
|  2 | macro_recall    | 0.362903 |
|  3 | macro_f1        | 0.37451  |
|  4 | micro_precision | 0.81     |
|  5 | micro_recall    | 0.81     |
|  6 | micro_f1        | 0.81     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15759.79 ms
[Epoch 1] loss: 47.89056345820427


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.02 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.462234 |
|  2 | macro_recall    | 0.476408 |
|  3 | macro_f1        | 0.4691   |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15833.51 ms
[Epoch 2] loss: 34.40077564120293


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.17 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15809.88 ms
[Epoch 3] loss: 25.28530688583851


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.18 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15856.61 ms
[Epoch 4] loss: 19.134629886597395


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 532.33 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.588971 |
|  2 | macro_recall    | 0.710279 |
|  3 | macro_f1        | 0.628719 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15411.59 ms
[Epoch 5] loss: 14.2161974767223


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.19 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15697.75 ms
[Epoch 6] loss: 12.030517202336341


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.52 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15715.15 ms
[Epoch 7] loss: 9.873719355091453


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.66 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15819.12 ms
[Epoch 8] loss: 8.917749978485517


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.88 ms
Early Stopping. Epoch: 8, best_valid_metric (macro_f1): 0.6287188828172435



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.18 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7227722772277227  |
|  1 | macro_precision | 0.4713068181818182  |
|  2 | macro_recall    | 0.5496651785714286  |
|  3 | macro_f1        | 0.48089871194379386 |
|  4 | micro_precision | 0.7227722772277227  |
|  5 | micro_recall    | 0.7227722772277227  |
|  6 | micro_f1        | 0.7227722772277227  |
|  7 | lans            | ['en', 'pt']        |
|  8 | sample_rate     | 0.5                 |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18148.05 ms
[Epoch 0] loss: 95.23454970121384


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.27 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.83     |
|  1 | macro_precision | 0.428125 |
|  2 | macro_recall    | 0.387699 |
|  3 | macro_f1        | 0.397559 |
|  4 | micro_precision | 0.83     |
|  5 | micro_recall    | 0.83     |
|  6 | micro_f1        | 0.83     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18211.32 ms
[Epoch 1] loss: 53.54384212195873


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.24 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18208.17 ms
[Epoch 2] loss: 39.816862769424915


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.57 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17816.99 ms
[Epoch 3] loss: 28.2426930218935


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.81 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18537.44 ms
[Epoch 4] loss: 19.838430916890502


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.36 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.86     |
|  1 | macro_precision | 0.477778 |
|  2 | macro_recall    | 0.67116  |
|  3 | macro_f1        | 0.485003 |
|  4 | micro_precision | 0.86     |
|  5 | micro_recall    | 0.86     |
|  6 | micro_f1        | 0.86     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17814.38 ms
[Epoch 5] loss: 14.207384491339326


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.23 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.597775 |
|  2 | macro_recall    | 0.726408 |
|  3 | macro_f1        | 0.641186 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18205.65 ms
[Epoch 6] loss: 9.246242973487824


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 869.07 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18127.17 ms
[Epoch 7] loss: 6.940750257228501


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.99 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18197.15 ms
[Epoch 8] loss: 8.115857810014859


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.25 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17957.12 ms
[Epoch 9] loss: 7.506517404224724


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.76 ms
Early Stopping. Epoch: 9, best_valid_metric (macro_f1): 0.6411862990810359



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.73 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7425742574257426 |
|  1 | macro_precision | 0.4594907407407407 |
|  2 | macro_recall    | 0.4754464285714286 |
|  3 | macro_f1        | 0.4620056497175141 |
|  4 | micro_precision | 0.7425742574257426 |
|  5 | micro_recall    | 0.7425742574257426 |
|  6 | micro_f1        | 0.7425742574257425 |
|  7 | lans            | ['en', 'pt']       |
|  8 | sample_rate     | 0.75               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18133.59 ms
[Epoch 0] loss: 95.23454970121384


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 516.65 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.83     |
|  1 | macro_precision | 0.428125 |
|  2 | macro_recall    | 0.387699 |
|  3 | macro_f1        | 0.397559 |
|  4 | micro_precision | 0.83     |
|  5 | micro_recall    | 0.83     |
|  6 | micro_f1        | 0.83     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18181.99 ms
[Epoch 1] loss: 53.54384212195873


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.63 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18188.13 ms
[Epoch 2] loss: 39.816862769424915


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.76 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18227.63 ms
[Epoch 3] loss: 28.2426930218935


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.73 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18249.74 ms
[Epoch 4] loss: 19.838430916890502


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.18 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.86     |
|  1 | macro_precision | 0.477778 |
|  2 | macro_recall    | 0.67116  |
|  3 | macro_f1        | 0.485003 |
|  4 | micro_precision | 0.86     |
|  5 | micro_recall    | 0.86     |
|  6 | micro_f1        | 0.86     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18217.65 ms
[Epoch 5] loss: 14.207384491339326


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.19 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.597775 |
|  2 | macro_recall    | 0.726408 |
|  3 | macro_f1        | 0.641186 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17812.36 ms
[Epoch 6] loss: 9.246242973487824


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.74 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18539.39 ms
[Epoch 7] loss: 6.940750257228501


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.60 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17992.01 ms
[Epoch 8] loss: 8.115857810014859


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.97 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18207.37 ms
[Epoch 9] loss: 7.506517404224724


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.16 ms
Early Stopping. Epoch: 9, best_valid_metric (macro_f1): 0.6411862990810359



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 869.60 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7425742574257426 |
|  1 | macro_precision | 0.4594907407407407 |
|  2 | macro_recall    | 0.4754464285714286 |
|  3 | macro_f1        | 0.4620056497175141 |
|  4 | micro_precision | 0.7425742574257426 |
|  5 | micro_recall    | 0.7425742574257426 |
|  6 | micro_f1        | 0.7425742574257425 |
|  7 | lans            | ['en', 'pt']       |
|  8 | sample_rate     | 0.75               |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20704.94 ms
[Epoch 0] loss: 108.76208728551865


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.02 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.75     |
|  1 | macro_precision | 0.432065 |
|  2 | macro_recall    | 0.314516 |
|  3 | macro_f1        | 0.313256 |
|  4 | micro_precision | 0.75     |
|  5 | micro_recall    | 0.75     |
|  6 | micro_f1        | 0.75     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20817.91 ms
[Epoch 1] loss: 63.06195703148842


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.58 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20833.45 ms
[Epoch 2] loss: 42.829197488725185


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.52 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20908.07 ms
[Epoch 3] loss: 31.083991039544344


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.69 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20981.00 ms
[Epoch 4] loss: 20.556109270080924


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.32 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.510795 |
|  2 | macro_recall    | 0.678021 |
|  3 | macro_f1        | 0.498906 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20858.71 ms
[Epoch 5] loss: 14.119527137372643


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.09 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20838.57 ms
[Epoch 6] loss: 9.29774437576998


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.81 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.721273 |
|  2 | macro_recall    | 0.705946 |
|  3 | macro_f1        | 0.712986 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20909.17 ms
[Epoch 7] loss: 6.286281776148826


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.82 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.725406 |
|  2 | macro_recall    | 0.714011 |
|  3 | macro_f1        | 0.719444 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21111.51 ms
[Epoch 8] loss: 3.6313945284928195


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.56 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20663.64 ms
[Epoch 9] loss: 2.816536024911329


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.31 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.03 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7821782178217822  |
|  1 | macro_precision | 0.47866127583108714 |
|  2 | macro_recall    | 0.49386160714285715 |
|  3 | macro_f1        | 0.47509157509157507 |
|  4 | micro_precision | 0.7821782178217822  |
|  5 | micro_recall    | 0.7821782178217822  |
|  6 | micro_f1        | 0.7821782178217822  |
|  7 | lans            | ['en', 'pt']        |
|  8 | sample_rate     | 1                   |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20727.07 ms
[Epoch 0] loss: 108.76208728551865


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.28 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.75     |
|  1 | macro_precision | 0.432065 |
|  2 | macro_recall    | 0.314516 |
|  3 | macro_f1        | 0.313256 |
|  4 | micro_precision | 0.75     |
|  5 | micro_recall    | 0.75     |
|  6 | micro_f1        | 0.75     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20802.07 ms
[Epoch 1] loss: 63.06195703148842


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 515.92 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20453.63 ms
[Epoch 2] loss: 42.829197488725185


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.68 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21301.50 ms
[Epoch 3] loss: 31.083991039544344


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.35 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20724.97 ms
[Epoch 4] loss: 20.556109270080924


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.20 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.510795 |
|  2 | macro_recall    | 0.678021 |
|  3 | macro_f1        | 0.498906 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20831.80 ms
[Epoch 5] loss: 14.119527137372643


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.81 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20778.39 ms
[Epoch 6] loss: 9.29774437576998


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.91 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.721273 |
|  2 | macro_recall    | 0.705946 |
|  3 | macro_f1        | 0.712986 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21108.59 ms
[Epoch 7] loss: 6.286281776148826


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.99 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.725406 |
|  2 | macro_recall    | 0.714011 |
|  3 | macro_f1        | 0.719444 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20688.44 ms
[Epoch 8] loss: 3.6313945284928195


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.33 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20718.19 ms
[Epoch 9] loss: 2.816536024911329


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.65 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.45 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7821782178217822  |
|  1 | macro_precision | 0.47866127583108714 |
|  2 | macro_recall    | 0.49386160714285715 |
|  3 | macro_f1        | 0.47509157509157507 |
|  4 | micro_precision | 0.7821782178217822  |
|  5 | micro_recall    | 0.7821782178217822  |
|  6 | micro_f1        | 0.7821782178217822  |
|  7 | lans            | ['en', 'pt']        |
|  8 | sample_rate     | 1                   |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11534.72 ms
[Epoch 0] loss: 64.9944115281105


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.31 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.7      |
|  1 | macro_precision | 0.42268  |
|  2 | macro_recall    | 0.274194 |
|  3 | macro_f1        | 0.248386 |
|  4 | micro_precision | 0.7      |
|  5 | micro_recall    | 0.7      |
|  6 | micro_f1        | 0.7      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11502.77 ms
[Epoch 1] loss: 41.057246282696724


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.84 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.424261 |
|  2 | macro_recall    | 0.454622 |
|  3 | macro_f1        | 0.433479 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11179.56 ms
[Epoch 2] loss: 28.18021285533905


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.51 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.454427 |
|  2 | macro_recall    | 0.47701  |
|  3 | macro_f1        | 0.464339 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11587.23 ms
[Epoch 3] loss: 22.007512286305428


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.81 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11654.66 ms
[Epoch 4] loss: 19.175831384956837


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.62 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11341.52 ms
[Epoch 5] loss: 14.486416194587946


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.52 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.96     |
|  1 | macro_precision | 0.480194 |
|  2 | macro_recall    | 0.483871 |
|  3 | macro_f1        | 0.482019 |
|  4 | micro_precision | 0.96     |
|  5 | micro_recall    | 0.96     |
|  6 | micro_f1        | 0.96     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11714.39 ms
[Epoch 6] loss: 12.15014000236988


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.99 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.529279 |
|  2 | macro_recall    | 0.707752 |
|  3 | macro_f1        | 0.574456 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11252.19 ms
[Epoch 7] loss: 8.384479139000177


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 890.18 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11226.74 ms
[Epoch 8] loss: 6.666847687214613


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.63 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11583.96 ms
[Epoch 9] loss: 5.280981538584456


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.32 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.17 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7821782178217822 |
|  1 | macro_precision | 0.4917929292929293 |
|  2 | macro_recall    | 0.49609375         |
|  3 | macro_f1        | 0.4916306528785307 |
|  4 | micro_precision | 0.7821782178217822 |
|  5 | micro_recall    | 0.7821782178217822 |
|  6 | micro_f1        | 0.7821782178217822 |
|  7 | lans            | ['en', 'ru']       |
|  8 | sample_rate     | 0.1                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  882 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11236.24 ms
[Epoch 0] loss: 64.9944115281105


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.82 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.7      |
|  1 | macro_precision | 0.42268  |
|  2 | macro_recall    | 0.274194 |
|  3 | macro_f1        | 0.248386 |
|  4 | micro_precision | 0.7      |
|  5 | micro_recall    | 0.7      |
|  6 | micro_f1        | 0.7      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11553.15 ms
[Epoch 1] loss: 41.057246282696724


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.18 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.424261 |
|  2 | macro_recall    | 0.454622 |
|  3 | macro_f1        | 0.433479 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11573.88 ms
[Epoch 2] loss: 28.18021285533905


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.27 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.454427 |
|  2 | macro_recall    | 0.47701  |
|  3 | macro_f1        | 0.464339 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11594.30 ms
[Epoch 3] loss: 22.007512286305428


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.68 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11231.13 ms
[Epoch 4] loss: 19.175831384956837


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.44 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11666.03 ms
[Epoch 5] loss: 14.486416194587946


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.81 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.96     |
|  1 | macro_precision | 0.480194 |
|  2 | macro_recall    | 0.483871 |
|  3 | macro_f1        | 0.482019 |
|  4 | micro_precision | 0.96     |
|  5 | micro_recall    | 0.96     |
|  6 | micro_f1        | 0.96     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11235.72 ms
[Epoch 6] loss: 12.15014000236988


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.59 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.529279 |
|  2 | macro_recall    | 0.707752 |
|  3 | macro_f1        | 0.574456 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11581.67 ms
[Epoch 7] loss: 8.384479139000177


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.37 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11596.45 ms
[Epoch 8] loss: 6.666847687214613


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.98 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=56.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 11186.58 ms
[Epoch 9] loss: 5.280981538584456


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.68 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.45 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.7821782178217822 |
|  1 | macro_precision | 0.4917929292929293 |
|  2 | macro_recall    | 0.49609375         |
|  3 | macro_f1        | 0.4916306528785307 |
|  4 | micro_precision | 0.7821782178217822 |
|  5 | micro_recall    | 0.7821782178217822 |
|  6 | micro_f1        | 0.7821782178217822 |
|  7 | lans            | ['en', 'ru']       |
|  8 | sample_rate     | 0.1                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13084.16 ms
[Epoch 0] loss: 70.74298447370529


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.82 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.74     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.306452 |
|  3 | macro_f1        | 0.30148  |
|  4 | micro_precision | 0.74     |
|  5 | micro_recall    | 0.74     |
|  6 | micro_f1        | 0.74     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13092.75 ms
[Epoch 1] loss: 45.47271829843521


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.04 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.444519 |
|  2 | macro_recall    | 0.460881 |
|  3 | macro_f1        | 0.452227 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13131.01 ms
[Epoch 2] loss: 32.36546069383621


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.94 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12782.89 ms
[Epoch 3] loss: 24.581247851252556


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.62 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13135.64 ms
[Epoch 4] loss: 21.704696264117956


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.72 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13144.67 ms
[Epoch 5] loss: 15.79058241005987


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.65 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12881.92 ms
[Epoch 6] loss: 11.708962485194206


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.38 ms
Early Stopping. Epoch: 6, best_valid_metric (macro_f1): 0.4696969696969696



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.96 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.8316831683168316  |
|  1 | macro_precision | 0.5292262405382675  |
|  2 | macro_recall    | 0.48604910714285715 |
|  3 | macro_f1        | 0.47201235447849843 |
|  4 | micro_precision | 0.8316831683168316  |
|  5 | micro_recall    | 0.8316831683168316  |
|  6 | micro_f1        | 0.8316831683168316  |
|  7 | lans            | ['en', 'ru']        |
|  8 | sample_rate     | 0.25                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1002 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13157.36 ms
[Epoch 0] loss: 70.74298447370529


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.62 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.74     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.306452 |
|  3 | macro_f1        | 0.30148  |
|  4 | micro_precision | 0.74     |
|  5 | micro_recall    | 0.74     |
|  6 | micro_f1        | 0.74     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13175.67 ms
[Epoch 1] loss: 45.47271829843521


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.27 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.444519 |
|  2 | macro_recall    | 0.460881 |
|  3 | macro_f1        | 0.452227 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12813.37 ms
[Epoch 2] loss: 32.36546069383621


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 883.38 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12797.98 ms
[Epoch 3] loss: 24.581247851252556


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.22 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13199.03 ms
[Epoch 4] loss: 21.704696264117956


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.51 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12892.91 ms
[Epoch 5] loss: 15.79058241005987


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.52 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=63.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 13173.75 ms
[Epoch 6] loss: 11.708962485194206


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.20 ms
Early Stopping. Epoch: 6, best_valid_metric (macro_f1): 0.4696969696969696



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.69 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.8316831683168316  |
|  1 | macro_precision | 0.5292262405382675  |
|  2 | macro_recall    | 0.48604910714285715 |
|  3 | macro_f1        | 0.47201235447849843 |
|  4 | micro_precision | 0.8316831683168316  |
|  5 | micro_recall    | 0.8316831683168316  |
|  6 | micro_f1        | 0.8316831683168316  |
|  7 | lans            | ['en', 'ru']        |
|  8 | sample_rate     | 0.25                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15685.23 ms
[Epoch 0] loss: 82.92712354660034


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.81 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.81     |
|  1 | macro_precision | 0.444767 |
|  2 | macro_recall    | 0.362903 |
|  3 | macro_f1        | 0.37451  |
|  4 | micro_precision | 0.81     |
|  5 | micro_recall    | 0.81     |
|  6 | micro_f1        | 0.81     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15697.38 ms
[Epoch 1] loss: 48.21143826842308


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.07 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15703.28 ms
[Epoch 2] loss: 34.80480341613293


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.59 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15379.30 ms
[Epoch 3] loss: 28.15995293855667


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.71 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15725.17 ms
[Epoch 4] loss: 21.83918757736683


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.98 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15725.16 ms
[Epoch 5] loss: 15.988294001668692


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.82 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.503901 |
|  2 | macro_recall    | 0.661892 |
|  3 | macro_f1        | 0.482317 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15814.30 ms
[Epoch 6] loss: 11.613537294790149


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.12 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.524425 |
|  2 | macro_recall    | 0.710279 |
|  3 | macro_f1        | 0.542641 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15681.33 ms
[Epoch 7] loss: 9.381091114366427


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.07 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.554607 |
|  2 | macro_recall    | 0.705946 |
|  3 | macro_f1        | 0.587986 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15366.37 ms
[Epoch 8] loss: 6.974427664652467


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.25 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15728.68 ms
[Epoch 9] loss: 5.772304852027446


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.90 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.82 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8118811881188119 |
|  1 | macro_precision | 0.5014478764478765 |
|  2 | macro_recall    | 0.5228794642857143 |
|  3 | macro_f1        | 0.5075320512820513 |
|  4 | micro_precision | 0.8118811881188119 |
|  5 | micro_recall    | 0.8118811881188119 |
|  6 | micro_f1        | 0.8118811881188119 |
|  7 | lans            | ['en', 'ru']       |
|  8 | sample_rate     | 0.5                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1203 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15704.76 ms
[Epoch 0] loss: 82.92712354660034


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.53 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.81     |
|  1 | macro_precision | 0.444767 |
|  2 | macro_recall    | 0.362903 |
|  3 | macro_f1        | 0.37451  |
|  4 | micro_precision | 0.81     |
|  5 | micro_recall    | 0.81     |
|  6 | micro_f1        | 0.81     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15740.31 ms
[Epoch 1] loss: 48.21143826842308


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.16 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15447.22 ms
[Epoch 2] loss: 34.80480341613293


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 532.05 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15840.88 ms
[Epoch 3] loss: 28.15995293855667


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.12 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15922.33 ms
[Epoch 4] loss: 21.83918757736683


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.57 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15785.01 ms
[Epoch 5] loss: 15.988294001668692


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.40 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.503901 |
|  2 | macro_recall    | 0.661892 |
|  3 | macro_f1        | 0.482317 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15371.72 ms
[Epoch 6] loss: 11.613537294790149


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.67 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.524425 |
|  2 | macro_recall    | 0.710279 |
|  3 | macro_f1        | 0.542641 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15697.40 ms
[Epoch 7] loss: 9.381091114366427


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.86 ms
--------------------------------------------------------------------------------
[Epoch 7]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.554607 |
|  2 | macro_recall    | 0.705946 |
|  3 | macro_f1        | 0.587986 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15664.88 ms
[Epoch 8] loss: 6.974427664652467


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 516.29 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15696.96 ms
[Epoch 9] loss: 5.772304852027446


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.35 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.50 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------+
|    | keys            | values             |
|----+-----------------+--------------------|
|  0 | accuracy        | 0.8118811881188119 |
|  1 | macro_precision | 0.5014478764478765 |
|  2 | macro_recall    | 0.5228794642857143 |
|  3 | macro_f1        | 0.5075320512820513 |
|  4 | micro_precision | 0.8118811881188119 |
|  5 | micro_recall    | 0.8118811881188119 |
|  6 | micro_f1        | 0.8118811881188119 |
|  7 | lans            | ['en', 'ru']       |
|  8 | sample_rate     | 0.5                |
+----+-----------------+--------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18190.31 ms
[Epoch 0] loss: 95.88039183616638


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.06 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.74     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.306452 |
|  3 | macro_f1        | 0.30148  |
|  4 | micro_precision | 0.74     |
|  5 | micro_recall    | 0.74     |
|  6 | micro_f1        | 0.74     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18005.84 ms
[Epoch 1] loss: 55.86948922276497


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 893.54 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.443336 |
|  2 | macro_recall    | 0.469547 |
|  3 | macro_f1        | 0.453825 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18331.06 ms
[Epoch 2] loss: 42.53062190115452


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.55 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.457721 |
|  2 | macro_recall    | 0.468344 |
|  3 | macro_f1        | 0.462963 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18391.30 ms
[Epoch 3] loss: 30.44582974165678


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 538.32 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.464962 |
|  2 | macro_recall    | 0.468344 |
|  3 | macro_f1        | 0.466583 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18014.80 ms
[Epoch 4] loss: 20.60890458896756


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.30 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18266.39 ms
[Epoch 5] loss: 13.687751406803727


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.53 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18630.02 ms
[Epoch 6] loss: 9.996388334780931


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.16 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.513408 |
|  2 | macro_recall    | 0.69415  |
|  3 | macro_f1        | 0.522378 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18155.58 ms
[Epoch 7] loss: 6.890202713315375


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.22 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18151.14 ms
[Epoch 8] loss: 8.260843152820598


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.35 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.508333 |
|  2 | macro_recall    | 0.698483 |
|  3 | macro_f1        | 0.528818 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17781.37 ms
[Epoch 9] loss: 7.895725951995701


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 883.34 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.92 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7920792079207921  |
|  1 | macro_precision | 0.49675925925925923 |
|  2 | macro_recall    | 0.5172991071428572  |
|  3 | macro_f1        | 0.5025659824046921  |
|  4 | micro_precision | 0.7920792079207921  |
|  5 | micro_recall    | 0.7920792079207921  |
|  6 | micro_f1        | 0.7920792079207921  |
|  7 | lans            | ['en', 'ru']        |
|  8 | sample_rate     | 0.75                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1403 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17740.18 ms
[Epoch 0] loss: 95.88039183616638


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 890.33 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.74     |
|  1 | macro_precision | 0.430108 |
|  2 | macro_recall    | 0.306452 |
|  3 | macro_f1        | 0.30148  |
|  4 | micro_precision | 0.74     |
|  5 | micro_recall    | 0.74     |
|  6 | micro_f1        | 0.74     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17823.25 ms
[Epoch 1] loss: 55.86948922276497


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.56 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.443336 |
|  2 | macro_recall    | 0.469547 |
|  3 | macro_f1        | 0.453825 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18304.05 ms
[Epoch 2] loss: 42.53062190115452


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.53 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.457721 |
|  2 | macro_recall    | 0.468344 |
|  3 | macro_f1        | 0.462963 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18225.84 ms
[Epoch 3] loss: 30.44582974165678


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.56 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.464962 |
|  2 | macro_recall    | 0.468344 |
|  3 | macro_f1        | 0.466583 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18186.96 ms
[Epoch 4] loss: 20.60890458896756


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.36 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18214.46 ms
[Epoch 5] loss: 13.687751406803727


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.05 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18188.44 ms
[Epoch 6] loss: 9.996388334780931


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.81 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.513408 |
|  2 | macro_recall    | 0.69415  |
|  3 | macro_f1        | 0.522378 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18148.97 ms
[Epoch 7] loss: 6.890202713315375


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.50 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18141.34 ms
[Epoch 8] loss: 8.260843152820598


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 518.24 ms
--------------------------------------------------------------------------------
[Epoch 8]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.508333 |
|  2 | macro_recall    | 0.698483 |
|  3 | macro_f1        | 0.528818 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=88.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 18152.04 ms
[Epoch 9] loss: 7.895725951995701


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.92 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.36 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7920792079207921  |
|  1 | macro_precision | 0.49675925925925923 |
|  2 | macro_recall    | 0.5172991071428572  |
|  3 | macro_f1        | 0.5025659824046921  |
|  4 | micro_precision | 0.7920792079207921  |
|  5 | micro_recall    | 0.7920792079207921  |
|  6 | micro_f1        | 0.7920792079207921  |
|  7 | lans            | ['en', 'ru']        |
|  8 | sample_rate     | 0.75                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20835.88 ms
[Epoch 0] loss: 108.16506117582321


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.96 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.79     |
|  1 | macro_precision | 0.424003 |
|  2 | macro_recall    | 0.351107 |
|  3 | macro_f1        | 0.360131 |
|  4 | micro_precision | 0.79     |
|  5 | micro_recall    | 0.79     |
|  6 | micro_f1        | 0.79     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20849.99 ms
[Epoch 1] loss: 62.27554251253605


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.87 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.462234 |
|  2 | macro_recall    | 0.476408 |
|  3 | macro_f1        | 0.4691   |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21220.54 ms
[Epoch 2] loss: 43.36599741876125


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.42 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20840.90 ms
[Epoch 3] loss: 32.329415243119


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.93 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.459459 |
|  2 | macro_recall    | 0.485075 |
|  3 | macro_f1        | 0.470249 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20809.13 ms
[Epoch 4] loss: 21.463055866770446


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.46 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.504149 |
|  2 | macro_recall    | 0.686086 |
|  3 | macro_f1        | 0.513757 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20856.30 ms
[Epoch 5] loss: 15.610211191698909


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.19 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.513836 |
|  2 | macro_recall    | 0.702817 |
|  3 | macro_f1        | 0.541533 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20778.61 ms
[Epoch 6] loss: 10.125697356648743


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.07 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.549124 |
|  2 | macro_recall    | 0.718946 |
|  3 | macro_f1        | 0.592021 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20826.44 ms
[Epoch 7] loss: 8.912571360473521


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.26 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20402.97 ms
[Epoch 8] loss: 4.455283866846003


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.34 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21150.14 ms
[Epoch 9] loss: 2.060664892254863


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.28 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.88 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7722772277227723  |
|  1 | macro_precision | 0.46759259259259256 |
|  2 | macro_recall    | 0.47488839285714285 |
|  3 | macro_f1        | 0.46676377118644063 |
|  4 | micro_precision | 0.7722772277227723  |
|  5 | micro_recall    | 0.7722772277227723  |
|  6 | micro_f1        | 0.7722772277227723  |
|  7 | lans            | ['en', 'ru']        |
|  8 | sample_rate     | 1                   |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1604 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20710.98 ms
[Epoch 0] loss: 108.16506117582321


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.75 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.79     |
|  1 | macro_precision | 0.424003 |
|  2 | macro_recall    | 0.351107 |
|  3 | macro_f1        | 0.360131 |
|  4 | micro_precision | 0.79     |
|  5 | micro_recall    | 0.79     |
|  6 | micro_f1        | 0.79     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20860.72 ms
[Epoch 1] loss: 62.27554251253605


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.87 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.462234 |
|  2 | macro_recall    | 0.476408 |
|  3 | macro_f1        | 0.4691   |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21049.81 ms
[Epoch 2] loss: 43.36599741876125


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.91 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20987.93 ms
[Epoch 3] loss: 32.329415243119


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.52 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.459459 |
|  2 | macro_recall    | 0.485075 |
|  3 | macro_f1        | 0.470249 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 21139.20 ms
[Epoch 4] loss: 21.463055866770446


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.31 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.504149 |
|  2 | macro_recall    | 0.686086 |
|  3 | macro_f1        | 0.513757 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20795.29 ms
[Epoch 5] loss: 15.610211191698909


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.14 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.513836 |
|  2 | macro_recall    | 0.702817 |
|  3 | macro_f1        | 0.541533 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20788.27 ms
[Epoch 6] loss: 10.125697356648743


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.26 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.549124 |
|  2 | macro_recall    | 0.718946 |
|  3 | macro_f1        | 0.592021 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20841.84 ms
[Epoch 7] loss: 8.912571360473521


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.49 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20686.71 ms
[Epoch 8] loss: 4.455283866846003


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.19 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=101.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 20708.38 ms
[Epoch 9] loss: 2.060664892254863


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.18 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 891.19 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7722772277227723  |
|  1 | macro_precision | 0.46759259259259256 |
|  2 | macro_recall    | 0.47488839285714285 |
|  3 | macro_f1        | 0.46676377118644063 |
|  4 | micro_precision | 0.7722772277227723  |
|  5 | micro_recall    | 0.7722772277227723  |
|  6 | micro_f1        | 0.7722772277227723  |
|  7 | lans            | ['en', 'ru']        |
|  8 | sample_rate     | 1                   |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it


### six languages backtranslation

In [14]:
repeat = 2
lang_list = ["ar","zh","fr","es","pt","ru"]
sample_rate_list = [0.1,0.25,0.5,0.75,1]
lan_set = lang_list
lan_set.append(lang)
for sample_rate in sample_rate_list:
    for i in range(repeat):
        train_set = train_sample(train_bt_set,lan_set,sample_rate)
        run_bert(train_set, dev_set, test_set, lans=lan_set, sample_rate=sample_rate)

train/dev/test numbers:  1362 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17663.99 ms
[Epoch 0] loss: 94.96821820735931


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.82 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.77     |
|  1 | macro_precision | 0.380864 |
|  2 | macro_recall    | 0.365311 |
|  3 | macro_f1        | 0.372692 |
|  4 | micro_precision | 0.77     |
|  5 | micro_recall    | 0.77     |
|  6 | micro_f1        | 0.77     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17708.56 ms
[Epoch 1] loss: 54.14074981212616


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.35 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.438209 |
|  2 | macro_recall    | 0.465816 |
|  3 | macro_f1        | 0.448661 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17303.97 ms
[Epoch 2] loss: 37.66360878944397


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 881.41 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17693.49 ms
[Epoch 3] loss: 28.021400967612863


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.54 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.85     |
|  1 | macro_precision | 0.4875   |
|  2 | macro_recall    | 0.684762 |
|  3 | macro_f1        | 0.524825 |
|  4 | micro_precision | 0.85     |
|  5 | micro_recall    | 0.85     |
|  6 | micro_f1        | 0.85     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17739.16 ms
[Epoch 4] loss: 20.303795874118805


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.26 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17349.01 ms
[Epoch 5] loss: 17.235357128549367


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.08 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.535247 |
|  2 | macro_recall    | 0.70715  |
|  3 | macro_f1        | 0.578846 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17688.17 ms
[Epoch 6] loss: 10.506114803254604


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.98 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17726.37 ms
[Epoch 7] loss: 7.208651600172743


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.95 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17569.76 ms
[Epoch 8] loss: 5.390772149316035


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 513.00 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17599.65 ms
[Epoch 9] loss: 3.8050866035628133


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.35 ms
Early Stopping. Epoch: 9, best_valid_metric (macro_f1): 0.5788461538461538



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.02 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------------------------------+
|    | keys            | values                                     |
|----+-----------------+--------------------------------------------|
|  0 | accuracy        | 0.7920792079207921                         |
|  1 | macro_precision | 0.4725940725940726                         |
|  2 | macro_recall    | 0.4927455357142857                         |
|  3 | macro_f1        | 0.4759229064760232                         |
|  4 | micro_precision | 0.7920792079207921                         |
|  5 | micro_recall    | 0.7920792079207921                         |
|  6 | micro_f1        | 0.7920792079207921                         |
|  7 | lans            | ['ar', 'zh', 'fr', 'es', 'pt', 'ru', 'ru'] |
|  8 | sample_rate     | 0.1                                        |
+----+-----------------+--------------

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17671.68 ms
[Epoch 0] loss: 94.96821820735931


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.61 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.77     |
|  1 | macro_precision | 0.380864 |
|  2 | macro_recall    | 0.365311 |
|  3 | macro_f1        | 0.372692 |
|  4 | micro_precision | 0.77     |
|  5 | micro_recall    | 0.77     |
|  6 | micro_f1        | 0.77     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17461.14 ms
[Epoch 1] loss: 54.14074981212616


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 890.29 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.438209 |
|  2 | macro_recall    | 0.465816 |
|  3 | macro_f1        | 0.448661 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17467.79 ms
[Epoch 2] loss: 37.66360878944397


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.61 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17667.32 ms
[Epoch 3] loss: 28.021400967612863


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.53 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.85     |
|  1 | macro_precision | 0.4875   |
|  2 | macro_recall    | 0.684762 |
|  3 | macro_f1        | 0.524825 |
|  4 | micro_precision | 0.85     |
|  5 | micro_recall    | 0.85     |
|  6 | micro_f1        | 0.85     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17728.31 ms
[Epoch 4] loss: 20.303795874118805


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.55 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17700.86 ms
[Epoch 5] loss: 17.235357128549367


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.99 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.91     |
|  1 | macro_precision | 0.535247 |
|  2 | macro_recall    | 0.70715  |
|  3 | macro_f1        | 0.578846 |
|  4 | micro_precision | 0.91     |
|  5 | micro_recall    | 0.91     |
|  6 | micro_f1        | 0.91     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17663.55 ms
[Epoch 6] loss: 10.506114803254604


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.61 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17744.83 ms
[Epoch 7] loss: 7.208651600172743


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.80 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17628.22 ms
[Epoch 8] loss: 5.390772149316035


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.98 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=86.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 17323.78 ms
[Epoch 9] loss: 3.8050866035628133


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 893.32 ms
Early Stopping. Epoch: 9, best_valid_metric (macro_f1): 0.5788461538461538



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.51 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------------------------------+
|    | keys            | values                                     |
|----+-----------------+--------------------------------------------|
|  0 | accuracy        | 0.7920792079207921                         |
|  1 | macro_precision | 0.4725940725940726                         |
|  2 | macro_recall    | 0.4927455357142857                         |
|  3 | macro_f1        | 0.4759229064760232                         |
|  4 | micro_precision | 0.7920792079207921                         |
|  5 | micro_recall    | 0.7920792079207921                         |
|  6 | micro_f1        | 0.7920792079207921                         |
|  7 | lans            | ['ar', 'zh', 'fr', 'es', 'pt', 'ru', 'ru'] |
|  8 | sample_rate     | 0.1                                        |
+----+-----------------+--------------

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28321.86 ms
[Epoch 0] loss: 145.0585879087448


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.16 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.82     |
|  1 | macro_precision | 0.40202  |
|  2 | macro_recall    | 0.431632 |
|  3 | macro_f1        | 0.408003 |
|  4 | micro_precision | 0.82     |
|  5 | micro_recall    | 0.82     |
|  6 | micro_f1        | 0.82     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28424.96 ms
[Epoch 1] loss: 75.2997952401638


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.16 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28422.86 ms
[Epoch 2] loss: 49.827536173164845


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.06 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 29072.51 ms
[Epoch 3] loss: 32.65571328252554


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.25 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.503092 |
|  2 | macro_recall    | 0.682354 |
|  3 | macro_f1        | 0.525529 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28403.25 ms
[Epoch 4] loss: 22.370210510212928


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.99 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28757.22 ms
[Epoch 5] loss: 15.683471250813454


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.06 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.508089 |
|  2 | macro_recall    | 0.703418 |
|  3 | macro_f1        | 0.538517 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28371.59 ms
[Epoch 6] loss: 12.272473870427348


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.53 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.746032 |
|  2 | macro_recall    | 0.932956 |
|  3 | macro_f1        | 0.764808 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28731.65 ms
[Epoch 7] loss: 9.508517830399796


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.49 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28320.10 ms
[Epoch 8] loss: 6.411110074142925


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 533.84 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28310.01 ms
[Epoch 9] loss: 2.2415927128167823


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.83 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.01 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------------------------------+
|    | keys            | values                                     |
|----+-----------------+--------------------------------------------|
|  0 | accuracy        | 0.7524752475247525                         |
|  1 | macro_precision | 0.44896331738437                           |
|  2 | macro_recall    | 0.4520089285714286                         |
|  3 | macro_f1        | 0.4481858679185853                         |
|  4 | micro_precision | 0.7524752475247525                         |
|  5 | micro_recall    | 0.7524752475247525                         |
|  6 | micro_f1        | 0.7524752475247525                         |
|  7 | lans            | ['ar', 'zh', 'fr', 'es', 'pt', 'ru', 'ru'] |
|  8 | sample_rate     | 0.25                                       |
+----+-----------------+--------------

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28673.05 ms
[Epoch 0] loss: 145.0585879087448


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.32 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.82     |
|  1 | macro_precision | 0.40202  |
|  2 | macro_recall    | 0.431632 |
|  3 | macro_f1        | 0.408003 |
|  4 | micro_precision | 0.82     |
|  5 | micro_recall    | 0.82     |
|  6 | micro_f1        | 0.82     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28602.06 ms
[Epoch 1] loss: 75.2997952401638


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 534.51 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28999.04 ms
[Epoch 2] loss: 49.827536173164845


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.10 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28489.98 ms
[Epoch 3] loss: 32.65571328252554


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.56 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.503092 |
|  2 | macro_recall    | 0.682354 |
|  3 | macro_f1        | 0.525529 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28346.64 ms
[Epoch 4] loss: 22.370210510212928


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.82 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28752.59 ms
[Epoch 5] loss: 15.683471250813454


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.02 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.508089 |
|  2 | macro_recall    | 0.703418 |
|  3 | macro_f1        | 0.538517 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28268.72 ms
[Epoch 6] loss: 12.272473870427348


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.24 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.89     |
|  1 | macro_precision | 0.746032 |
|  2 | macro_recall    | 0.932956 |
|  3 | macro_f1        | 0.764808 |
|  4 | micro_precision | 0.89     |
|  5 | micro_recall    | 0.89     |
|  6 | micro_f1        | 0.89     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28264.68 ms
[Epoch 7] loss: 9.508517830399796


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 893.83 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28402.24 ms
[Epoch 8] loss: 6.411110074142925


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.17 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=138.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 28316.12 ms
[Epoch 9] loss: 2.2415927128167823


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.74 ms



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.06 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------------------------------+
|    | keys            | values                                     |
|----+-----------------+--------------------------------------------|
|  0 | accuracy        | 0.7524752475247525                         |
|  1 | macro_precision | 0.44896331738437                           |
|  2 | macro_recall    | 0.4520089285714286                         |
|  3 | macro_f1        | 0.4481858679185853                         |
|  4 | micro_precision | 0.7524752475247525                         |
|  5 | micro_recall    | 0.7524752475247525                         |
|  6 | micro_f1        | 0.7524752475247525                         |
|  7 | lans            | ['ar', 'zh', 'fr', 'es', 'pt', 'ru', 'ru'] |
|  8 | sample_rate     | 0.25                                       |
+----+-----------------+--------------

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46577.45 ms
[Epoch 0] loss: 217.22024792432785


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.63 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.462234 |
|  2 | macro_recall    | 0.476408 |
|  3 | macro_f1        | 0.4691   |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 47233.12 ms
[Epoch 1] loss: 105.39049205183983


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.45 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46909.44 ms
[Epoch 2] loss: 64.26252761110663


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.44 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.597775 |
|  2 | macro_recall    | 0.726408 |
|  3 | macro_f1        | 0.641186 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46967.69 ms
[Epoch 3] loss: 39.815317681990564


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.49 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46612.78 ms
[Epoch 4] loss: 25.708485706942156


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.36 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46660.12 ms
[Epoch 5] loss: 17.794784781639464


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 533.08 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.96     |
|  1 | macro_precision | 0.723485 |
|  2 | macro_recall    | 0.734473 |
|  3 | macro_f1        | 0.728736 |
|  4 | micro_precision | 0.96     |
|  5 | micro_recall    | 0.96     |
|  6 | micro_f1        | 0.96     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46416.42 ms
[Epoch 6] loss: 7.5719673608546145


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.30 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46215.35 ms
[Epoch 7] loss: 4.007486776943551


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.67 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46468.00 ms
[Epoch 8] loss: 7.180982827223488


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 893.76 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46309.42 ms
[Epoch 9] loss: 3.2487043642031495


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.17 ms
Early Stopping. Epoch: 9, best_valid_metric (macro_f1): 0.7287359022556391



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.55 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------------------------------+
|    | keys            | values                                     |
|----+-----------------+--------------------------------------------|
|  0 | accuracy        | 0.8118811881188119                         |
|  1 | macro_precision | 0.4562070545121393                         |
|  2 | macro_recall    | 0.4732142857142857                         |
|  3 | macro_f1        | 0.45841150719199497                        |
|  4 | micro_precision | 0.8118811881188119                         |
|  5 | micro_recall    | 0.8118811881188119                         |
|  6 | micro_f1        | 0.8118811881188119                         |
|  7 | lans            | ['ar', 'zh', 'fr', 'es', 'pt', 'ru', 'ru'] |
|  8 | sample_rate     | 0.5                                        |
+----+-----------------+--------------

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46480.68 ms
[Epoch 0] loss: 217.22024792432785


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.27 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.462234 |
|  2 | macro_recall    | 0.476408 |
|  3 | macro_f1        | 0.4691   |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46910.22 ms
[Epoch 1] loss: 105.39049205183983


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.80 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46583.19 ms
[Epoch 2] loss: 64.26252761110663


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.32 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.597775 |
|  2 | macro_recall    | 0.726408 |
|  3 | macro_f1        | 0.641186 |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 47119.36 ms
[Epoch 3] loss: 39.815317681990564


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 908.32 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46492.86 ms
[Epoch 4] loss: 25.708485706942156


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.32 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46410.09 ms
[Epoch 5] loss: 17.794784781639464


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.68 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.96     |
|  1 | macro_precision | 0.723485 |
|  2 | macro_recall    | 0.734473 |
|  3 | macro_f1        | 0.728736 |
|  4 | micro_precision | 0.96     |
|  5 | micro_recall    | 0.96     |
|  6 | micro_f1        | 0.96     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46800.84 ms
[Epoch 6] loss: 7.5719673608546145


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.16 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46281.23 ms
[Epoch 7] loss: 4.007486776943551


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.97 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46359.96 ms
[Epoch 8] loss: 7.180982827223488


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.31 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=226.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 46307.22 ms
[Epoch 9] loss: 3.2487043642031495


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.37 ms
Early Stopping. Epoch: 9, best_valid_metric (macro_f1): 0.7287359022556391



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.52 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------------------------------+
|    | keys            | values                                     |
|----+-----------------+--------------------------------------------|
|  0 | accuracy        | 0.8118811881188119                         |
|  1 | macro_precision | 0.4562070545121393                         |
|  2 | macro_recall    | 0.4732142857142857                         |
|  3 | macro_f1        | 0.45841150719199497                        |
|  4 | micro_precision | 0.8118811881188119                         |
|  5 | micro_recall    | 0.8118811881188119                         |
|  6 | micro_f1        | 0.8118811881188119                         |
|  7 | lans            | ['ar', 'zh', 'fr', 'es', 'pt', 'ru', 'ru'] |
|  8 | sample_rate     | 0.5                                        |
+----+-----------------+--------------

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64388.17 ms
[Epoch 0] loss: 278.28243239223957


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.10 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.454427 |
|  2 | macro_recall    | 0.47701  |
|  3 | macro_f1        | 0.464339 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 65024.89 ms
[Epoch 1] loss: 132.83999514579773


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.17 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64648.63 ms
[Epoch 2] loss: 80.19705993356183


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.94 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.88     |
|  1 | macro_precision | 0.496094 |
|  2 | macro_recall    | 0.678623 |
|  3 | macro_f1        | 0.497355 |
|  4 | micro_precision | 0.88     |
|  5 | micro_recall    | 0.88     |
|  6 | micro_f1        | 0.88     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64880.04 ms
[Epoch 3] loss: 45.98516789195128


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.24 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 65127.13 ms
[Epoch 4] loss: 27.447673344053328


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.46 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.666412 |
|  2 | macro_recall    | 0.956548 |
|  3 | macro_f1        | 0.734091 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64509.07 ms
[Epoch 5] loss: 8.085688393039163


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.03 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64535.05 ms
[Epoch 6] loss: 7.647741781314835


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.76 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64439.54 ms
[Epoch 7] loss: 5.070454684799188


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.49 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64428.18 ms
[Epoch 8] loss: 3.3228124033048516


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.92 ms
Early Stopping. Epoch: 8, best_valid_metric (macro_f1): 0.734090909090909



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.18 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------------------------------+
|    | keys            | values                                     |
|----+-----------------+--------------------------------------------|
|  0 | accuracy        | 0.801980198019802                          |
|  1 | macro_precision | 0.47888293248333813                        |
|  2 | macro_recall    | 0.4815848214285714                         |
|  3 | macro_f1        | 0.47769257888242544                        |
|  4 | micro_precision | 0.801980198019802                          |
|  5 | micro_recall    | 0.801980198019802                          |
|  6 | micro_f1        | 0.801980198019802                          |
|  7 | lans            | ['ar', 'zh', 'fr', 'es', 'pt', 'ru', 'ru'] |
|  8 | sample_rate     | 0.75                                       |
+----+-----------------+--------------

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64776.98 ms
[Epoch 0] loss: 278.28243239223957


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.23 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.454427 |
|  2 | macro_recall    | 0.47701  |
|  3 | macro_f1        | 0.464339 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64946.33 ms
[Epoch 1] loss: 132.83999514579773


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.87 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64569.26 ms
[Epoch 2] loss: 80.19705993356183


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 911.88 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.88     |
|  1 | macro_precision | 0.496094 |
|  2 | macro_recall    | 0.678623 |
|  3 | macro_f1        | 0.497355 |
|  4 | micro_precision | 0.88     |
|  5 | micro_recall    | 0.88     |
|  6 | micro_f1        | 0.88     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64905.88 ms
[Epoch 3] loss: 45.98516789195128


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 517.89 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64230.65 ms
[Epoch 4] loss: 27.447673344053328


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 905.77 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.666412 |
|  2 | macro_recall    | 0.956548 |
|  3 | macro_f1        | 0.734091 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64445.70 ms
[Epoch 5] loss: 8.085688393039163


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.20 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64306.50 ms
[Epoch 6] loss: 7.647741781314835


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.80 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64390.62 ms
[Epoch 7] loss: 5.070454684799188


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.41 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=314.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 64373.61 ms
[Epoch 8] loss: 3.3228124033048516


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.51 ms
Early Stopping. Epoch: 8, best_valid_metric (macro_f1): 0.734090909090909



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.93 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------------------------------+
|    | keys            | values                                     |
|----+-----------------+--------------------------------------------|
|  0 | accuracy        | 0.801980198019802                          |
|  1 | macro_precision | 0.47888293248333813                        |
|  2 | macro_recall    | 0.4815848214285714                         |
|  3 | macro_f1        | 0.47769257888242544                        |
|  4 | micro_precision | 0.801980198019802                          |
|  5 | micro_recall    | 0.801980198019802                          |
|  6 | micro_f1        | 0.801980198019802                          |
|  7 | lans            | ['ar', 'zh', 'fr', 'es', 'pt', 'ru', 'ru'] |
|  8 | sample_rate     | 0.75                                       |
+----+-----------------+--------------

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82586.14 ms
[Epoch 0] loss: 346.2952408194542


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.22 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.454427 |
|  2 | macro_recall    | 0.47701  |
|  3 | macro_f1        | 0.464339 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 83175.68 ms
[Epoch 1] loss: 156.03936509788036


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 897.01 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82932.79 ms
[Epoch 2] loss: 86.81642334349453


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.62 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.486834 |
|  2 | macro_recall    | 0.670558 |
|  3 | macro_f1        | 0.488734 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 83014.13 ms
[Epoch 3] loss: 47.4220641835127


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.58 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82727.33 ms
[Epoch 4] loss: 23.230637120082974


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 520.88 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.533654 |
|  2 | macro_recall    | 0.706548 |
|  3 | macro_f1        | 0.563736 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82458.61 ms
[Epoch 5] loss: 7.14364740147721


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.09 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82424.17 ms
[Epoch 6] loss: 3.566593426599866


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.19 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82613.20 ms
[Epoch 7] loss: 4.351748419307114


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.47 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82115.00 ms
[Epoch 8] loss: 5.4412772489959025


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.47 ms
Early Stopping. Epoch: 8, best_valid_metric (macro_f1): 0.5637357178340785



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.83 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------------------------------+
|    | keys            | values                                     |
|----+-----------------+--------------------------------------------|
|  0 | accuracy        | 0.7920792079207921                         |
|  1 | macro_precision | 0.5066468253968254                         |
|  2 | macro_recall    | 0.52734375                                 |
|  3 | macro_f1        | 0.507032279314888                          |
|  4 | micro_precision | 0.7920792079207921                         |
|  5 | micro_recall    | 0.7920792079207921                         |
|  6 | micro_f1        | 0.7920792079207921                         |
|  7 | lans            | ['ar', 'zh', 'fr', 'es', 'pt', 'ru', 'ru'] |
|  8 | sample_rate     | 1                                          |
+----+-----------------+--------------

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82987.33 ms
[Epoch 0] loss: 346.2952408194542


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 534.08 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.454427 |
|  2 | macro_recall    | 0.47701  |
|  3 | macro_f1        | 0.464339 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 83388.44 ms
[Epoch 1] loss: 156.03936509788036


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 519.37 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 83294.81 ms
[Epoch 2] loss: 86.81642334349453


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.67 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.87     |
|  1 | macro_precision | 0.486834 |
|  2 | macro_recall    | 0.670558 |
|  3 | macro_f1        | 0.488734 |
|  4 | micro_precision | 0.87     |
|  5 | micro_recall    | 0.87     |
|  6 | micro_f1        | 0.87     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82910.31 ms
[Epoch 3] loss: 47.4220641835127


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.31 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 83022.78 ms
[Epoch 4] loss: 23.230637120082974


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.20 ms
--------------------------------------------------------------------------------
[Epoch 4]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.533654 |
|  2 | macro_recall    | 0.706548 |
|  3 | macro_f1        | 0.563736 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82551.65 ms
[Epoch 5] loss: 7.14364740147721


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.26 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82590.22 ms
[Epoch 6] loss: 3.566593426599866


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.04 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82144.61 ms
[Epoch 7] loss: 4.351748419307114


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.09 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=401.0, style=ProgressStyle(description_wid…


Execute [train_an_epoch] method costing 82572.01 ms
[Epoch 8] loss: 5.4412772489959025


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.50 ms
Early Stopping. Epoch: 8, best_valid_metric (macro_f1): 0.5637357178340785



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.64 ms
--------------------------------------------------------------------------------
+----+-----------------+--------------------------------------------+
|    | keys            | values                                     |
|----+-----------------+--------------------------------------------|
|  0 | accuracy        | 0.7920792079207921                         |
|  1 | macro_precision | 0.5066468253968254                         |
|  2 | macro_recall    | 0.52734375                                 |
|  3 | macro_f1        | 0.507032279314888                          |
|  4 | micro_precision | 0.7920792079207921                         |
|  5 | micro_recall    | 0.7920792079207921                         |
|  6 | micro_f1        | 0.7920792079207921                         |
|  7 | lans            | ['ar', 'zh', 'fr', 'es', 'pt', 'ru', 'ru'] |
|  8 | sample_rate     | 1                                          |
+----+-----------------+--------------

### two languages backtranslation

In [None]:
repeat = 2
lang_list = ["ar","zh","fr","es","pt","ru"]
sample_rate_list = [0.1,0.25,0.5,0.75,1]
for i in range(len(lang_list)-1):
    for j in range(i+1,len(lang_list)):
        lan_set = ["en"]+[lang_list[i]]+[lang_list[j]]
        for sample_rate in sample_rate_list:
            for i in range(repeat):
                train_set = train_sample(train_bt_set,lan_set,sample_rate)
                run_bert(train_set, dev_set, test_set, lans=lan_set, sample_rate=sample_rate)

train/dev/test numbers:  962 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12580.45 ms
[Epoch 0] loss: 69.58321756124496


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.25 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.69     |
|  1 | macro_precision | 0.420918 |
|  2 | macro_recall    | 0.266129 |
|  3 | macro_f1        | 0.233333 |
|  4 | micro_precision | 0.69     |
|  5 | micro_recall    | 0.69     |
|  6 | micro_f1        | 0.69     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12232.00 ms
[Epoch 1] loss: 43.22737593948841


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.10 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.438209 |
|  2 | macro_recall    | 0.465816 |
|  3 | macro_f1        | 0.448661 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12640.22 ms
[Epoch 2] loss: 31.705886013805866


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.37 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.457721 |
|  2 | macro_recall    | 0.468344 |
|  3 | macro_f1        | 0.462963 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12672.70 ms
[Epoch 3] loss: 25.639764703810215


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.08 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12764.34 ms
[Epoch 4] loss: 19.465955128893256


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.80 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12267.02 ms
[Epoch 5] loss: 15.47154201567173


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.16 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12651.62 ms
[Epoch 6] loss: 10.725856528617442


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.11 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.501344 |
|  2 | macro_recall    | 0.694752 |
|  3 | macro_f1        | 0.523414 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12679.31 ms
[Epoch 7] loss: 8.425443976651877


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.81 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12221.21 ms
[Epoch 8] loss: 7.578510835301131


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.45 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12654.87 ms
[Epoch 9] loss: 5.0317172550130635


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.36 ms
--------------------------------------------------------------------------------
[Epoch 9]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.577703 |
|  2 | macro_recall    | 0.719547 |
|  3 | macro_f1        | 0.625536 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.67 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7821782178217822  |
|  1 | macro_precision | 0.5048701298701299  |
|  2 | macro_recall    | 0.5212053571428572  |
|  3 | macro_f1        | 0.49985380116959066 |
|  4 | micro_precision | 0.7821782178217822  |
|  5 | micro_recall    | 0.7821782178217822  |
|  6 | micro_f1        | 0.7821782178217822  |
|  7 | lans            | ['en', 'ar', 'zh']  |
|  8 | sample_rate     | 0.1                 |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  962 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12608.00 ms
[Epoch 0] loss: 69.58321756124496


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.14 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.69     |
|  1 | macro_precision | 0.420918 |
|  2 | macro_recall    | 0.266129 |
|  3 | macro_f1        | 0.233333 |
|  4 | micro_precision | 0.69     |
|  5 | micro_recall    | 0.69     |
|  6 | micro_f1        | 0.69     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12336.94 ms
[Epoch 1] loss: 43.22737593948841


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.25 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.438209 |
|  2 | macro_recall    | 0.465816 |
|  3 | macro_f1        | 0.448661 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12747.53 ms
[Epoch 2] loss: 31.705886013805866


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.21 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.457721 |
|  2 | macro_recall    | 0.468344 |
|  3 | macro_f1        | 0.462963 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12644.12 ms
[Epoch 3] loss: 25.639764703810215


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.02 ms
--------------------------------------------------------------------------------
[Epoch 3]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12189.20 ms
[Epoch 4] loss: 19.465955128893256


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.44 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12697.27 ms
[Epoch 5] loss: 15.47154201567173


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.41 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12773.54 ms
[Epoch 6] loss: 10.725856528617442


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.55 ms
--------------------------------------------------------------------------------
[Epoch 6]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.9      |
|  1 | macro_precision | 0.501344 |
|  2 | macro_recall    | 0.694752 |
|  3 | macro_f1        | 0.523414 |
|  4 | micro_precision | 0.9      |
|  5 | micro_recall    | 0.9      |
|  6 | micro_f1        | 0.9      |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12226.25 ms
[Epoch 7] loss: 8.425443976651877


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 524.79 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12612.06 ms
[Epoch 8] loss: 7.578510835301131


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 531.54 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=61.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 12690.57 ms
[Epoch 9] loss: 5.0317172550130635


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.16 ms
--------------------------------------------------------------------------------
[Epoch 9]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.92     |
|  1 | macro_precision | 0.577703 |
|  2 | macro_recall    | 0.719547 |
|  3 | macro_f1        | 0.625536 |
|  4 | micro_precision | 0.92     |
|  5 | micro_recall    | 0.92     |
|  6 | micro_f1        | 0.92     |
+----+-----------------+----------+
--------------------------------------------------------------------------------



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.97 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7821782178217822  |
|  1 | macro_precision | 0.5048701298701299  |
|  2 | macro_recall    | 0.5212053571428572  |
|  3 | macro_f1        | 0.49985380116959066 |
|  4 | micro_precision | 0.7821782178217822  |
|  5 | micro_recall    | 0.7821782178217822  |
|  6 | micro_f1        | 0.7821782178217822  |
|  7 | lans            | ['en', 'ar', 'zh']  |
|  8 | sample_rate     | 0.1                 |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1202 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15246.60 ms
[Epoch 0] loss: 85.25570905208588


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.58 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.72     |
|  1 | macro_precision | 0.426316 |
|  2 | macro_recall    | 0.290323 |
|  3 | macro_f1        | 0.276235 |
|  4 | micro_precision | 0.72     |
|  5 | micro_recall    | 0.72     |
|  6 | micro_f1        | 0.72     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15668.49 ms
[Epoch 1] loss: 48.60005697607994


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 522.28 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15685.16 ms
[Epoch 2] loss: 33.45598977804184


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.04 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15285.98 ms
[Epoch 3] loss: 24.502664778381586


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 914.50 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15357.22 ms
[Epoch 4] loss: 19.76404847484082


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.13 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15708.99 ms
[Epoch 5] loss: 12.764261526986957


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 521.26 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.549124 |
|  2 | macro_recall    | 0.718946 |
|  3 | macro_f1        | 0.592021 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15701.88 ms
[Epoch 6] loss: 10.811598268803209


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.40 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15321.34 ms
[Epoch 7] loss: 7.399456010549329


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.95 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15656.44 ms
[Epoch 8] loss: 7.000572843942791


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.32 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15719.11 ms
[Epoch 9] loss: 5.631397775141522


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.40 ms
Early Stopping. Epoch: 9, best_valid_metric (macro_f1): 0.5920205152671756



HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 526.08 ms
--------------------------------------------------------------------------------
+----+-----------------+---------------------+
|    | keys            | values              |
|----+-----------------+---------------------|
|  0 | accuracy        | 0.7425742574257426  |
|  1 | macro_precision | 0.40016233766233766 |
|  2 | macro_recall    | 0.3984375           |
|  3 | macro_f1        | 0.3971311475409836  |
|  4 | micro_precision | 0.7425742574257426  |
|  5 | micro_recall    | 0.7425742574257426  |
|  6 | micro_f1        | 0.7425742574257425  |
|  7 | lans            | ['en', 'ar', 'zh']  |
|  8 | sample_rate     | 0.25                |
+----+-----------------+---------------------+
--------------------------------------------------------------------------------
../results/20201001_backtrans_full.csv  already exists, appending result to it
train/dev/test numbers:  1202 100 101


Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

The BERT model has 201 different named parameters.

==== Embedding Layer ====

bert.embeddings.word_embeddings.weight                  (28996, 768)
bert.embeddings.position_embeddings.weight                (512, 768)
bert.embeddings.token_type_embeddings.weight                (2, 768)
bert.embeddings.LayerNorm.weight                              (768,)
bert.embeddings.LayerNorm.bias                                (768,)

==== First Transformer ====

bert.encoder.layer.0.attention.self.query.weight          (768, 768)
bert.encoder.layer.0.attention.self.query.bias                (768,)
bert.encoder.layer.0.attention.self.key.weight            (768, 768)
bert.encoder.layer.0.attention.self.key.bias                  (768,)
bert.encoder.layer.0.attention.self.value.weight          (768, 768)
bert.encoder.layer.0.attention.self.value.bias                (768,)
bert.encoder.layer.0.attention.output.dense.weight        (768, 768)
bert.encoder.layer.0.attention.output.dense.bias              (

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15792.72 ms
[Epoch 0] loss: 85.25570905208588


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 523.70 ms
--------------------------------------------------------------------------------
[Epoch 0]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.72     |
|  1 | macro_precision | 0.426316 |
|  2 | macro_recall    | 0.290323 |
|  3 | macro_f1        | 0.276235 |
|  4 | micro_precision | 0.72     |
|  5 | micro_recall    | 0.72     |
|  6 | micro_f1        | 0.72     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15313.96 ms
[Epoch 1] loss: 48.60005697607994


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 910.50 ms
--------------------------------------------------------------------------------
[Epoch 1]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.94     |
|  1 | macro_precision | 0.46044  |
|  2 | macro_recall    | 0.480741 |
|  3 | macro_f1        | 0.469697 |
|  4 | micro_precision | 0.94     |
|  5 | micro_recall    | 0.94     |
|  6 | micro_f1        | 0.94     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15364.70 ms
[Epoch 2] loss: 33.45598977804184


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.77 ms
--------------------------------------------------------------------------------
[Epoch 2]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.95     |
|  1 | macro_precision | 0.4668   |
|  2 | macro_recall    | 0.484473 |
|  3 | macro_f1        | 0.47513  |
|  4 | micro_precision | 0.95     |
|  5 | micro_recall    | 0.95     |
|  6 | micro_f1        | 0.95     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15707.73 ms
[Epoch 3] loss: 24.502664778381586


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.72 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15691.54 ms
[Epoch 4] loss: 19.76404847484082


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 529.77 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15767.58 ms
[Epoch 5] loss: 12.764261526986957


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 525.81 ms
--------------------------------------------------------------------------------
[Epoch 5]performance on validation set
+----+-----------------+----------+
|    | metrics         |   values |
|----+-----------------+----------|
|  0 | accuracy        | 0.93     |
|  1 | macro_precision | 0.549124 |
|  2 | macro_recall    | 0.718946 |
|  3 | macro_f1        | 0.592021 |
|  4 | micro_precision | 0.93     |
|  5 | micro_recall    | 0.93     |
|  6 | micro_f1        | 0.93     |
+----+-----------------+----------+
--------------------------------------------------------------------------------


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15745.82 ms
[Epoch 6] loss: 10.811598268803209


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 528.97 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15440.10 ms
[Epoch 7] loss: 7.399456010549329


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 530.44 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…


Execute [train_an_epoch] method costing 15804.22 ms
[Epoch 8] loss: 7.000572843942791


HBox(children=(FloatProgress(value=0.0, description='Evaluating', max=7.0, style=ProgressStyle(description_wid…


Execute [eval] method costing 527.45 ms


HBox(children=(FloatProgress(value=0.0, description='Training', max=76.0, style=ProgressStyle(description_widt…