In [9]:
%%capture
!wget https://users.dcc.uchile.cl/~jperez/beto/cased_2M/pytorch_weights.tar.gz 
!wget https://users.dcc.uchile.cl/~jperez/beto/cased_2M/vocab.txt 
!tar -xzvf pytorch_weights.tar.gz
!mv vocab.txt pytorch/.

In [24]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Wed Mar 23 17:39:43 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.103.01   Driver Version: 470.103.01   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A30          On   | 00000000:02:00.0 Off |                    0 |
| N/A   28C    P0    51W / 165W |   5070MiB / 24258MiB |    100%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA A10          On   | 00000000:84:00.0 Off |                    0 |
|  0%   90C    P0   126W / 150W |   6240MiB / 22731MiB |    100%      Default |
|       

In [1]:
import math
from sentence_transformers import models, losses, datasets
from sentence_transformers import LoggingHandler, SentenceTransformer, util, InputExample
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
import logging
from datetime import datetime
import sys
import os
import gzip
import csv
import random
import json
import requests
import pandas as pd

In [2]:
config_beto = {
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 2,
  "cls_token_id": 2,
  "eos_token_id": 3,
  "gradient_checkpointing": False,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "mask_token_id": 4,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "sep_token_id": 3,
  # "tokenizer_class": "DebertaV2Tokenizer",
  "torch_dtype": "float32",
  "transformers_version": "4.9.1",
  "type_vocab_size": 2,
  "unk_token_id": 1,
  "use_cache": True,
  "vocab_size": 31002
}

with open('pytorch/config.json', 'w') as outfile:
    json.dump(config_beto, outfile)

In [3]:
#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])

In [4]:
#### /print debug information to stdout

model_path = 'pytorch/'
train_batch_size = 100          #The larger you select this, the better the results (usually). But it requires more GPU memory
max_seq_length = 75
num_epochs = 20

# Save path of the model
model_save_path = 'output/training_nli_beto'+'-'+datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

In [5]:
model_save_path

'output/training_nli_beto-2022-03-23_17-57-01'

In [11]:
!mv pytorch_weights.tar.gz pytorch

In [12]:
!ls pytorch/

config.json  pytorch_model.bin	pytorch_weights.tar.gz	vocab.txt


In [None]:
# from transformers import AutoConfig, AutoModel, AutoTokenizer

# config = AutoConfig.from_pretrained(model_path)
# auto_model = AutoModel.from_pretrained(model_path)
# tokenizer = AutoTokenizer.from_pretrained(model_path)

In [19]:
# Here we define our SentenceTransformer model
word_embedding_model = models.Transformer(model_path, max_seq_length=max_seq_length)
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode='mean')
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

Some weights of the model checkpoint at pytorch/ were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


2022-03-23 18:03:06 - Use pytorch device: cuda


In [20]:
# from torch import nn
# from transformers import AutoModel, AutoTokenizer, AutoConfig
# import json
# from typing import List, Dict, Optional, Union, Tuple
# import os

In [21]:
## Getting data
esxnli = 'https://raw.githubusercontent.com/artetxem/esxnli/master/esxnli.tsv'

In [22]:
esxnli_dataset_path = 'esxnli.tsv'

if not os.path.exists(esxnli_dataset_path):
    util.http_get('https://raw.githubusercontent.com/artetxem/esxnli/master/esxnli.tsv', esxnli_dataset_path)

In [10]:
# pd.read_csv('data/esxnli.tsv', sep='\t').head()

In [11]:
# Read the AllNLI.tsv.gz file and create the training dataset
logging.info("Read ESxNLI train dataset")

2022-03-23 17:57:06 - Read ESxNLI train dataset


In [12]:
train_data = {}

def add_to_samples(sent1, sent2, label):
    if sent1 not in train_data:
        train_data[sent1] = {'contradiction': set(), 'entailment': set(), 'neutral': set()}
    train_data[sent1][label].add(sent2)

In [13]:
with open('esxnli.tsv') as dataset:
  reader = csv.DictReader(dataset, delimiter='\t', quoting=csv.QUOTE_NONE)
  for row in reader:
    if row['language'] == 'es':
      
      sent1 = row['sentence1'].strip()
      sent2 = row['sentence2'].strip()
    
      add_to_samples(sent1, sent2, row['gold_label'])
      add_to_samples(sent2, sent1, row['gold_label'])  #Also add the opposite

In [23]:
train_samples = []
for sent1, others in train_data.items():
    if len(others['entailment']) > 0 and len(others['contradiction']) > 0:
        train_samples.append(InputExample(texts=[sent1, random.choice(list(others['entailment'])), random.choice(list(others['contradiction']))]))
        train_samples.append(InputExample(texts=[random.choice(list(others['entailment'])), sent1, random.choice(list(others['contradiction']))]))

logging.info("Train samples: {}".format(len(train_samples)))

2022-03-23 18:03:26 - Train samples: 1660


In [24]:
# Special data loader that avoid duplicates within a batch
train_dataloader = datasets.NoDuplicatesDataLoader(train_samples, batch_size=train_batch_size)


# Our training loss
train_loss = losses.MultipleNegativesRankingLoss(model)

In [25]:
sts_root = 'sts2015-es'
sts_files = [('STS.gs.newswire.txt', 'STS.input.newswire.txt'),
             ('STS.gs.wikipedia.txt', 'STS.input.wikipedia.txt')]

dev_examples = []
for gs_fname, sents_fname in sts_files:
    gs = open(os.path.join(sts_root, gs_fname))
    sents = open(os.path.join(sts_root, sents_fname))
    for g, s in zip(gs, sents):
        sent1, sent2 = s.split('\t')
        dp = InputExample(texts=[sent1, sent2], label=float(g))
        dev_examples.append(dp)
gs.close()
sents.close()
    
evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_examples, batch_size=32,
                                                             name='sts-dev')

In [26]:
# Configure the training
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1) #10% of train data for warm-up
logging.info("Warmup-steps: {}".format(warmup_steps))

2022-03-23 18:03:32 - Warmup-steps: 32


In [29]:
# Train the model
model.fit(train_objectives=[(train_dataloader, train_loss)],
          # evaluator=dev_evaluator,
          epochs=num_epochs,
          evaluation_steps=1,
          warmup_steps=warmup_steps,
          evaluator=evaluator,
          output_path=model_save_path,
          use_amp=True          #Set to True, if your GPU supports FP16 operations
          )



Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:04:27 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 0 after 1 steps:
2022-03-23 18:04:30 - Cosine-Similarity :	Pearson: 0.5603	Spearman: 0.4844
2022-03-23 18:04:30 - Manhattan-Distance:	Pearson: 0.5526	Spearman: 0.4798
2022-03-23 18:04:30 - Euclidean-Distance:	Pearson: 0.5530	Spearman: 0.4796
2022-03-23 18:04:30 - Dot-Product-Similarity:	Pearson: 0.2996	Spearman: 0.2306
2022-03-23 18:04:30 - Save model to output/training_nli_beto-2022-03-23_17-57-01
2022-03-23 18:04:31 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 0:
2022-03-23 18:04:34 - Cosine-Similarity :	Pearson: 0.5603	Spearman: 0.4844
2022-03-23 18:04:34 - Manhattan-Distance:	Pearson: 0.5526	Spearman: 0.4798
2022-03-23 18:04:34 - Euclidean-Distance:	Pearson: 0.5530	Spearman: 0.4796
2022-03-23 18:04:34 - Dot-Product-Similarity:	Pearson: 0.2996	Spearman: 0.2306
2022-03-23 18:04:35 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev da

2022-03-23 18:05:41 - Cosine-Similarity :	Pearson: 0.5717	Spearman: 0.4941
2022-03-23 18:05:41 - Manhattan-Distance:	Pearson: 0.5630	Spearman: 0.4874
2022-03-23 18:05:41 - Euclidean-Distance:	Pearson: 0.5628	Spearman: 0.4868
2022-03-23 18:05:41 - Dot-Product-Similarity:	Pearson: 0.3354	Spearman: 0.2563
2022-03-23 18:05:41 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 0:
2022-03-23 18:05:44 - Cosine-Similarity :	Pearson: 0.5717	Spearman: 0.4941
2022-03-23 18:05:44 - Manhattan-Distance:	Pearson: 0.5630	Spearman: 0.4874
2022-03-23 18:05:44 - Euclidean-Distance:	Pearson: 0.5628	Spearman: 0.4868
2022-03-23 18:05:44 - Dot-Product-Similarity:	Pearson: 0.3354	Spearman: 0.2563
2022-03-23 18:05:46 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 0 after 11 steps:
2022-03-23 18:05:49 - Cosine-Similarity :	Pearson: 0.5743	Spearman: 0.4964
2022-03-23 18:05:49 - Manhattan-Distance:	Pearson: 0.5654	Spearman: 0.4893
2022-03-23 18:05

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:06:32 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 1 after 1 steps:
2022-03-23 18:06:35 - Cosine-Similarity :	Pearson: 0.5878	Spearman: 0.5082
2022-03-23 18:06:35 - Manhattan-Distance:	Pearson: 0.5748	Spearman: 0.4952
2022-03-23 18:06:35 - Euclidean-Distance:	Pearson: 0.5731	Spearman: 0.4935
2022-03-23 18:06:35 - Dot-Product-Similarity:	Pearson: 0.4306	Spearman: 0.3398
2022-03-23 18:06:35 - Save model to output/training_nli_beto-2022-03-23_17-57-01
2022-03-23 18:06:36 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 1:
2022-03-23 18:06:39 - Cosine-Similarity :	Pearson: 0.5878	Spearman: 0.5082
2022-03-23 18:06:39 - Manhattan-Distance:	Pearson: 0.5748	Spearman: 0.4952
2022-03-23 18:06:39 - Euclidean-Distance:	Pearson: 0.5731	Spearman: 0.4935
2022-03-23 18:06:39 - Dot-Product-Similarity:	Pearson: 0.4306	Spearman: 0.3398
2022-03-23 18:06:40 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev da

2022-03-23 18:07:47 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 1:
2022-03-23 18:07:49 - Cosine-Similarity :	Pearson: 0.6109	Spearman: 0.5287
2022-03-23 18:07:49 - Manhattan-Distance:	Pearson: 0.5870	Spearman: 0.5133
2022-03-23 18:07:49 - Euclidean-Distance:	Pearson: 0.5831	Spearman: 0.5087
2022-03-23 18:07:49 - Dot-Product-Similarity:	Pearson: 0.4720	Spearman: 0.4053
2022-03-23 18:07:50 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 1 after 11 steps:
2022-03-23 18:07:53 - Cosine-Similarity :	Pearson: 0.6124	Spearman: 0.5291
2022-03-23 18:07:53 - Manhattan-Distance:	Pearson: 0.5891	Spearman: 0.5163
2022-03-23 18:07:53 - Euclidean-Distance:	Pearson: 0.5864	Spearman: 0.5121
2022-03-23 18:07:53 - Dot-Product-Similarity:	Pearson: 0.4742	Spearman: 0.4037
2022-03-23 18:07:53 - Save model to output/training_nli_beto-2022-03-23_17-57-01
2022-03-23 18:07:54 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev d

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:08:32 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 2 after 1 steps:
2022-03-23 18:08:36 - Cosine-Similarity :	Pearson: 0.6109	Spearman: 0.5237
2022-03-23 18:08:36 - Manhattan-Distance:	Pearson: 0.5900	Spearman: 0.5163
2022-03-23 18:08:36 - Euclidean-Distance:	Pearson: 0.5885	Spearman: 0.5159
2022-03-23 18:08:36 - Dot-Product-Similarity:	Pearson: 0.4882	Spearman: 0.4131
2022-03-23 18:08:36 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 2:
2022-03-23 18:08:39 - Cosine-Similarity :	Pearson: 0.6109	Spearman: 0.5237
2022-03-23 18:08:39 - Manhattan-Distance:	Pearson: 0.5900	Spearman: 0.5163
2022-03-23 18:08:39 - Euclidean-Distance:	Pearson: 0.5885	Spearman: 0.5159
2022-03-23 18:08:39 - Dot-Product-Similarity:	Pearson: 0.4882	Spearman: 0.4131
2022-03-23 18:08:40 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 2 after 2 steps:
2022-03-23 18:08:43 - Cosine-Similarity :	Pearson

2022-03-23 18:09:46 - Cosine-Similarity :	Pearson: 0.6194	Spearman: 0.5323
2022-03-23 18:09:46 - Manhattan-Distance:	Pearson: 0.5979	Spearman: 0.5249
2022-03-23 18:09:46 - Euclidean-Distance:	Pearson: 0.5975	Spearman: 0.5240
2022-03-23 18:09:46 - Dot-Product-Similarity:	Pearson: 0.5078	Spearman: 0.4334
2022-03-23 18:09:47 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 2 after 11 steps:
2022-03-23 18:09:51 - Cosine-Similarity :	Pearson: 0.6199	Spearman: 0.5323
2022-03-23 18:09:51 - Manhattan-Distance:	Pearson: 0.5979	Spearman: 0.5249
2022-03-23 18:09:51 - Euclidean-Distance:	Pearson: 0.5977	Spearman: 0.5239
2022-03-23 18:09:51 - Dot-Product-Similarity:	Pearson: 0.5086	Spearman: 0.4342
2022-03-23 18:09:51 - Save model to output/training_nli_beto-2022-03-23_17-57-01
2022-03-23 18:09:51 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 2:
2022-03-23 18:09:53 - Cosine-Similarity :	Pearson: 0.6199	Spearman: 0.5323
2022-03-23

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:10:29 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 3 after 1 steps:
2022-03-23 18:10:33 - Cosine-Similarity :	Pearson: 0.6137	Spearman: 0.5296
2022-03-23 18:10:33 - Manhattan-Distance:	Pearson: 0.5912	Spearman: 0.5200
2022-03-23 18:10:33 - Euclidean-Distance:	Pearson: 0.5913	Spearman: 0.5195
2022-03-23 18:10:33 - Dot-Product-Similarity:	Pearson: 0.5115	Spearman: 0.4310
2022-03-23 18:10:33 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 3:
2022-03-23 18:10:36 - Cosine-Similarity :	Pearson: 0.6137	Spearman: 0.5296
2022-03-23 18:10:36 - Manhattan-Distance:	Pearson: 0.5912	Spearman: 0.5200
2022-03-23 18:10:36 - Euclidean-Distance:	Pearson: 0.5913	Spearman: 0.5195
2022-03-23 18:10:36 - Dot-Product-Similarity:	Pearson: 0.5115	Spearman: 0.4310
2022-03-23 18:10:37 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 3 after 2 steps:
2022-03-23 18:10:40 - Cosine-Similarity :	Pearson

2022-03-23 18:11:40 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 3 after 11 steps:
2022-03-23 18:11:43 - Cosine-Similarity :	Pearson: 0.6073	Spearman: 0.5242
2022-03-23 18:11:43 - Manhattan-Distance:	Pearson: 0.5870	Spearman: 0.5191
2022-03-23 18:11:43 - Euclidean-Distance:	Pearson: 0.5877	Spearman: 0.5188
2022-03-23 18:11:43 - Dot-Product-Similarity:	Pearson: 0.5145	Spearman: 0.4319
2022-03-23 18:11:43 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 3:
2022-03-23 18:11:46 - Cosine-Similarity :	Pearson: 0.6073	Spearman: 0.5242
2022-03-23 18:11:46 - Manhattan-Distance:	Pearson: 0.5870	Spearman: 0.5191
2022-03-23 18:11:46 - Euclidean-Distance:	Pearson: 0.5877	Spearman: 0.5188
2022-03-23 18:11:46 - Dot-Product-Similarity:	Pearson: 0.5145	Spearman: 0.4319
2022-03-23 18:11:47 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 3 after 12 steps:
2022-03-23 18:11:50 - Cosine-Similarity :	Pears

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:12:22 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 4 after 1 steps:
2022-03-23 18:12:25 - Cosine-Similarity :	Pearson: 0.6050	Spearman: 0.5241
2022-03-23 18:12:25 - Manhattan-Distance:	Pearson: 0.5837	Spearman: 0.5185
2022-03-23 18:12:25 - Euclidean-Distance:	Pearson: 0.5843	Spearman: 0.5184
2022-03-23 18:12:25 - Dot-Product-Similarity:	Pearson: 0.5181	Spearman: 0.4362
2022-03-23 18:12:25 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 4:
2022-03-23 18:12:29 - Cosine-Similarity :	Pearson: 0.6050	Spearman: 0.5241
2022-03-23 18:12:29 - Manhattan-Distance:	Pearson: 0.5837	Spearman: 0.5185
2022-03-23 18:12:29 - Euclidean-Distance:	Pearson: 0.5843	Spearman: 0.5184
2022-03-23 18:12:29 - Dot-Product-Similarity:	Pearson: 0.5181	Spearman: 0.4362
2022-03-23 18:12:29 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 4 after 2 steps:
2022-03-23 18:12:31 - Cosine-Similarity :	Pearson

2022-03-23 18:13:32 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 4 after 11 steps:
2022-03-23 18:13:34 - Cosine-Similarity :	Pearson: 0.6031	Spearman: 0.5264
2022-03-23 18:13:34 - Manhattan-Distance:	Pearson: 0.5788	Spearman: 0.5169
2022-03-23 18:13:34 - Euclidean-Distance:	Pearson: 0.5793	Spearman: 0.5165
2022-03-23 18:13:34 - Dot-Product-Similarity:	Pearson: 0.5282	Spearman: 0.4482
2022-03-23 18:13:34 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 4:
2022-03-23 18:13:37 - Cosine-Similarity :	Pearson: 0.6031	Spearman: 0.5264
2022-03-23 18:13:37 - Manhattan-Distance:	Pearson: 0.5788	Spearman: 0.5169
2022-03-23 18:13:37 - Euclidean-Distance:	Pearson: 0.5793	Spearman: 0.5165
2022-03-23 18:13:37 - Dot-Product-Similarity:	Pearson: 0.5282	Spearman: 0.4482
2022-03-23 18:13:38 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 4 after 12 steps:
2022-03-23 18:13:41 - Cosine-Similarity :	Pears

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:14:14 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 5 after 1 steps:
2022-03-23 18:14:17 - Cosine-Similarity :	Pearson: 0.6063	Spearman: 0.5262
2022-03-23 18:14:17 - Manhattan-Distance:	Pearson: 0.5813	Spearman: 0.5189
2022-03-23 18:14:17 - Euclidean-Distance:	Pearson: 0.5822	Spearman: 0.5187
2022-03-23 18:14:17 - Dot-Product-Similarity:	Pearson: 0.5296	Spearman: 0.4477
2022-03-23 18:14:17 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 5:
2022-03-23 18:14:20 - Cosine-Similarity :	Pearson: 0.6063	Spearman: 0.5262
2022-03-23 18:14:20 - Manhattan-Distance:	Pearson: 0.5813	Spearman: 0.5189
2022-03-23 18:14:20 - Euclidean-Distance:	Pearson: 0.5822	Spearman: 0.5187
2022-03-23 18:14:20 - Dot-Product-Similarity:	Pearson: 0.5296	Spearman: 0.4477
2022-03-23 18:14:21 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 5 after 2 steps:
2022-03-23 18:14:24 - Cosine-Similarity :	Pearson

2022-03-23 18:15:25 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 5 after 11 steps:
2022-03-23 18:15:28 - Cosine-Similarity :	Pearson: 0.6130	Spearman: 0.5305
2022-03-23 18:15:28 - Manhattan-Distance:	Pearson: 0.5885	Spearman: 0.5250
2022-03-23 18:15:28 - Euclidean-Distance:	Pearson: 0.5895	Spearman: 0.5245
2022-03-23 18:15:28 - Dot-Product-Similarity:	Pearson: 0.5338	Spearman: 0.4505
2022-03-23 18:15:28 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 5:
2022-03-23 18:15:31 - Cosine-Similarity :	Pearson: 0.6130	Spearman: 0.5305
2022-03-23 18:15:31 - Manhattan-Distance:	Pearson: 0.5885	Spearman: 0.5250
2022-03-23 18:15:31 - Euclidean-Distance:	Pearson: 0.5895	Spearman: 0.5245
2022-03-23 18:15:31 - Dot-Product-Similarity:	Pearson: 0.5338	Spearman: 0.4505
2022-03-23 18:15:32 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 5 after 12 steps:
2022-03-23 18:15:36 - Cosine-Similarity :	Pears

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:16:08 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 6 after 1 steps:
2022-03-23 18:16:11 - Cosine-Similarity :	Pearson: 0.6128	Spearman: 0.5315
2022-03-23 18:16:11 - Manhattan-Distance:	Pearson: 0.5882	Spearman: 0.5253
2022-03-23 18:16:11 - Euclidean-Distance:	Pearson: 0.5895	Spearman: 0.5254
2022-03-23 18:16:11 - Dot-Product-Similarity:	Pearson: 0.5369	Spearman: 0.4555
2022-03-23 18:16:11 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 6:
2022-03-23 18:16:14 - Cosine-Similarity :	Pearson: 0.6128	Spearman: 0.5315
2022-03-23 18:16:14 - Manhattan-Distance:	Pearson: 0.5882	Spearman: 0.5253
2022-03-23 18:16:14 - Euclidean-Distance:	Pearson: 0.5895	Spearman: 0.5254
2022-03-23 18:16:14 - Dot-Product-Similarity:	Pearson: 0.5369	Spearman: 0.4555
2022-03-23 18:16:15 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 6 after 2 steps:
2022-03-23 18:16:19 - Cosine-Similarity :	Pearson

2022-03-23 18:17:17 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 6 after 11 steps:
2022-03-23 18:17:20 - Cosine-Similarity :	Pearson: 0.6115	Spearman: 0.5297
2022-03-23 18:17:20 - Manhattan-Distance:	Pearson: 0.5881	Spearman: 0.5242
2022-03-23 18:17:20 - Euclidean-Distance:	Pearson: 0.5896	Spearman: 0.5242
2022-03-23 18:17:20 - Dot-Product-Similarity:	Pearson: 0.5404	Spearman: 0.4577
2022-03-23 18:17:20 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 6:
2022-03-23 18:17:23 - Cosine-Similarity :	Pearson: 0.6115	Spearman: 0.5297
2022-03-23 18:17:23 - Manhattan-Distance:	Pearson: 0.5881	Spearman: 0.5242
2022-03-23 18:17:23 - Euclidean-Distance:	Pearson: 0.5896	Spearman: 0.5242
2022-03-23 18:17:23 - Dot-Product-Similarity:	Pearson: 0.5404	Spearman: 0.4577
2022-03-23 18:17:24 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 6 after 12 steps:
2022-03-23 18:17:26 - Cosine-Similarity :	Pears

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:17:58 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 7 after 1 steps:
2022-03-23 18:18:00 - Cosine-Similarity :	Pearson: 0.6090	Spearman: 0.5266
2022-03-23 18:18:00 - Manhattan-Distance:	Pearson: 0.5862	Spearman: 0.5211
2022-03-23 18:18:00 - Euclidean-Distance:	Pearson: 0.5879	Spearman: 0.5228
2022-03-23 18:18:00 - Dot-Product-Similarity:	Pearson: 0.5395	Spearman: 0.4563
2022-03-23 18:18:00 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 7:
2022-03-23 18:18:03 - Cosine-Similarity :	Pearson: 0.6090	Spearman: 0.5266
2022-03-23 18:18:03 - Manhattan-Distance:	Pearson: 0.5862	Spearman: 0.5211
2022-03-23 18:18:03 - Euclidean-Distance:	Pearson: 0.5879	Spearman: 0.5228
2022-03-23 18:18:03 - Dot-Product-Similarity:	Pearson: 0.5395	Spearman: 0.4563
2022-03-23 18:18:04 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 7 after 2 steps:
2022-03-23 18:18:07 - Cosine-Similarity :	Pearson

2022-03-23 18:19:08 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 7 after 11 steps:
2022-03-23 18:19:11 - Cosine-Similarity :	Pearson: 0.6076	Spearman: 0.5247
2022-03-23 18:19:11 - Manhattan-Distance:	Pearson: 0.5850	Spearman: 0.5204
2022-03-23 18:19:11 - Euclidean-Distance:	Pearson: 0.5867	Spearman: 0.5214
2022-03-23 18:19:11 - Dot-Product-Similarity:	Pearson: 0.5382	Spearman: 0.4545
2022-03-23 18:19:11 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 7:
2022-03-23 18:19:14 - Cosine-Similarity :	Pearson: 0.6076	Spearman: 0.5247
2022-03-23 18:19:14 - Manhattan-Distance:	Pearson: 0.5850	Spearman: 0.5204
2022-03-23 18:19:14 - Euclidean-Distance:	Pearson: 0.5867	Spearman: 0.5214
2022-03-23 18:19:14 - Dot-Product-Similarity:	Pearson: 0.5382	Spearman: 0.4545
2022-03-23 18:19:15 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 7 after 12 steps:
2022-03-23 18:19:19 - Cosine-Similarity :	Pears

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:19:49 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 8 after 1 steps:
2022-03-23 18:19:52 - Cosine-Similarity :	Pearson: 0.6077	Spearman: 0.5244
2022-03-23 18:19:52 - Manhattan-Distance:	Pearson: 0.5851	Spearman: 0.5206
2022-03-23 18:19:52 - Euclidean-Distance:	Pearson: 0.5867	Spearman: 0.5211
2022-03-23 18:19:52 - Dot-Product-Similarity:	Pearson: 0.5384	Spearman: 0.4539
2022-03-23 18:19:52 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 8:
2022-03-23 18:19:55 - Cosine-Similarity :	Pearson: 0.6077	Spearman: 0.5244
2022-03-23 18:19:55 - Manhattan-Distance:	Pearson: 0.5851	Spearman: 0.5206
2022-03-23 18:19:55 - Euclidean-Distance:	Pearson: 0.5867	Spearman: 0.5211
2022-03-23 18:19:55 - Dot-Product-Similarity:	Pearson: 0.5384	Spearman: 0.4539
2022-03-23 18:19:56 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 8 after 2 steps:
2022-03-23 18:19:59 - Cosine-Similarity :	Pearson

2022-03-23 18:21:00 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 8 after 11 steps:
2022-03-23 18:21:03 - Cosine-Similarity :	Pearson: 0.6079	Spearman: 0.5241
2022-03-23 18:21:03 - Manhattan-Distance:	Pearson: 0.5859	Spearman: 0.5214
2022-03-23 18:21:03 - Euclidean-Distance:	Pearson: 0.5875	Spearman: 0.5223
2022-03-23 18:21:03 - Dot-Product-Similarity:	Pearson: 0.5390	Spearman: 0.4535
2022-03-23 18:21:03 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 8:
2022-03-23 18:21:07 - Cosine-Similarity :	Pearson: 0.6079	Spearman: 0.5241
2022-03-23 18:21:07 - Manhattan-Distance:	Pearson: 0.5859	Spearman: 0.5214
2022-03-23 18:21:07 - Euclidean-Distance:	Pearson: 0.5875	Spearman: 0.5223
2022-03-23 18:21:07 - Dot-Product-Similarity:	Pearson: 0.5390	Spearman: 0.4535
2022-03-23 18:21:08 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 8 after 12 steps:
2022-03-23 18:21:10 - Cosine-Similarity :	Pears

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:21:42 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 9 after 1 steps:
2022-03-23 18:21:45 - Cosine-Similarity :	Pearson: 0.6082	Spearman: 0.5244
2022-03-23 18:21:45 - Manhattan-Distance:	Pearson: 0.5863	Spearman: 0.5221
2022-03-23 18:21:45 - Euclidean-Distance:	Pearson: 0.5879	Spearman: 0.5226
2022-03-23 18:21:45 - Dot-Product-Similarity:	Pearson: 0.5396	Spearman: 0.4535
2022-03-23 18:21:45 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 9:
2022-03-23 18:21:48 - Cosine-Similarity :	Pearson: 0.6082	Spearman: 0.5244
2022-03-23 18:21:48 - Manhattan-Distance:	Pearson: 0.5863	Spearman: 0.5221
2022-03-23 18:21:48 - Euclidean-Distance:	Pearson: 0.5879	Spearman: 0.5226
2022-03-23 18:21:48 - Dot-Product-Similarity:	Pearson: 0.5396	Spearman: 0.4535
2022-03-23 18:21:49 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 9 after 2 steps:
2022-03-23 18:21:52 - Cosine-Similarity :	Pearson

2022-03-23 18:22:52 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 9 after 11 steps:
2022-03-23 18:22:55 - Cosine-Similarity :	Pearson: 0.6090	Spearman: 0.5251
2022-03-23 18:22:55 - Manhattan-Distance:	Pearson: 0.5876	Spearman: 0.5230
2022-03-23 18:22:55 - Euclidean-Distance:	Pearson: 0.5892	Spearman: 0.5235
2022-03-23 18:22:55 - Dot-Product-Similarity:	Pearson: 0.5400	Spearman: 0.4531
2022-03-23 18:22:55 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 9:
2022-03-23 18:22:58 - Cosine-Similarity :	Pearson: 0.6090	Spearman: 0.5251
2022-03-23 18:22:58 - Manhattan-Distance:	Pearson: 0.5876	Spearman: 0.5230
2022-03-23 18:22:58 - Euclidean-Distance:	Pearson: 0.5892	Spearman: 0.5235
2022-03-23 18:22:58 - Dot-Product-Similarity:	Pearson: 0.5400	Spearman: 0.4531
2022-03-23 18:22:59 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 9 after 12 steps:
2022-03-23 18:23:02 - Cosine-Similarity :	Pears

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:23:35 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 10 after 1 steps:
2022-03-23 18:23:37 - Cosine-Similarity :	Pearson: 0.6087	Spearman: 0.5252
2022-03-23 18:23:37 - Manhattan-Distance:	Pearson: 0.5874	Spearman: 0.5225
2022-03-23 18:23:37 - Euclidean-Distance:	Pearson: 0.5890	Spearman: 0.5235
2022-03-23 18:23:37 - Dot-Product-Similarity:	Pearson: 0.5395	Spearman: 0.4522
2022-03-23 18:23:37 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 10:
2022-03-23 18:23:40 - Cosine-Similarity :	Pearson: 0.6087	Spearman: 0.5252
2022-03-23 18:23:40 - Manhattan-Distance:	Pearson: 0.5874	Spearman: 0.5225
2022-03-23 18:23:40 - Euclidean-Distance:	Pearson: 0.5890	Spearman: 0.5235
2022-03-23 18:23:40 - Dot-Product-Similarity:	Pearson: 0.5395	Spearman: 0.4522
2022-03-23 18:23:41 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 10 after 2 steps:
2022-03-23 18:23:44 - Cosine-Similarity :	Pear

2022-03-23 18:24:45 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 10 after 11 steps:
2022-03-23 18:24:48 - Cosine-Similarity :	Pearson: 0.6076	Spearman: 0.5241
2022-03-23 18:24:48 - Manhattan-Distance:	Pearson: 0.5867	Spearman: 0.5223
2022-03-23 18:24:48 - Euclidean-Distance:	Pearson: 0.5882	Spearman: 0.5228
2022-03-23 18:24:48 - Dot-Product-Similarity:	Pearson: 0.5378	Spearman: 0.4520
2022-03-23 18:24:48 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 10:
2022-03-23 18:24:51 - Cosine-Similarity :	Pearson: 0.6076	Spearman: 0.5241
2022-03-23 18:24:51 - Manhattan-Distance:	Pearson: 0.5867	Spearman: 0.5223
2022-03-23 18:24:51 - Euclidean-Distance:	Pearson: 0.5882	Spearman: 0.5228
2022-03-23 18:24:51 - Dot-Product-Similarity:	Pearson: 0.5378	Spearman: 0.4520
2022-03-23 18:24:53 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 10 after 12 steps:
2022-03-23 18:24:56 - Cosine-Similarity :	Pe

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:25:29 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 11 after 1 steps:
2022-03-23 18:25:32 - Cosine-Similarity :	Pearson: 0.6074	Spearman: 0.5237
2022-03-23 18:25:32 - Manhattan-Distance:	Pearson: 0.5865	Spearman: 0.5224
2022-03-23 18:25:32 - Euclidean-Distance:	Pearson: 0.5879	Spearman: 0.5229
2022-03-23 18:25:32 - Dot-Product-Similarity:	Pearson: 0.5378	Spearman: 0.4525
2022-03-23 18:25:32 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 11:
2022-03-23 18:25:36 - Cosine-Similarity :	Pearson: 0.6074	Spearman: 0.5237
2022-03-23 18:25:36 - Manhattan-Distance:	Pearson: 0.5865	Spearman: 0.5224
2022-03-23 18:25:36 - Euclidean-Distance:	Pearson: 0.5879	Spearman: 0.5229
2022-03-23 18:25:36 - Dot-Product-Similarity:	Pearson: 0.5378	Spearman: 0.4525
2022-03-23 18:25:37 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 11 after 2 steps:
2022-03-23 18:25:38 - Cosine-Similarity :	Pear

2022-03-23 18:26:37 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 11 after 11 steps:
2022-03-23 18:26:39 - Cosine-Similarity :	Pearson: 0.6086	Spearman: 0.5257
2022-03-23 18:26:39 - Manhattan-Distance:	Pearson: 0.5874	Spearman: 0.5233
2022-03-23 18:26:39 - Euclidean-Distance:	Pearson: 0.5889	Spearman: 0.5240
2022-03-23 18:26:39 - Dot-Product-Similarity:	Pearson: 0.5390	Spearman: 0.4545
2022-03-23 18:26:39 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 11:
2022-03-23 18:26:41 - Cosine-Similarity :	Pearson: 0.6086	Spearman: 0.5257
2022-03-23 18:26:41 - Manhattan-Distance:	Pearson: 0.5874	Spearman: 0.5233
2022-03-23 18:26:41 - Euclidean-Distance:	Pearson: 0.5889	Spearman: 0.5240
2022-03-23 18:26:41 - Dot-Product-Similarity:	Pearson: 0.5390	Spearman: 0.4545
2022-03-23 18:26:42 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 11 after 12 steps:
2022-03-23 18:26:45 - Cosine-Similarity :	Pe

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:27:16 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 12 after 1 steps:
2022-03-23 18:27:20 - Cosine-Similarity :	Pearson: 0.6083	Spearman: 0.5257
2022-03-23 18:27:20 - Manhattan-Distance:	Pearson: 0.5871	Spearman: 0.5232
2022-03-23 18:27:20 - Euclidean-Distance:	Pearson: 0.5885	Spearman: 0.5242
2022-03-23 18:27:20 - Dot-Product-Similarity:	Pearson: 0.5393	Spearman: 0.4550
2022-03-23 18:27:20 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 12:
2022-03-23 18:27:23 - Cosine-Similarity :	Pearson: 0.6083	Spearman: 0.5257
2022-03-23 18:27:23 - Manhattan-Distance:	Pearson: 0.5871	Spearman: 0.5232
2022-03-23 18:27:23 - Euclidean-Distance:	Pearson: 0.5885	Spearman: 0.5242
2022-03-23 18:27:23 - Dot-Product-Similarity:	Pearson: 0.5393	Spearman: 0.4550
2022-03-23 18:27:24 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 12 after 2 steps:
2022-03-23 18:27:27 - Cosine-Similarity :	Pear

2022-03-23 18:28:26 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 12 after 11 steps:
2022-03-23 18:28:29 - Cosine-Similarity :	Pearson: 0.6084	Spearman: 0.5263
2022-03-23 18:28:29 - Manhattan-Distance:	Pearson: 0.5870	Spearman: 0.5238
2022-03-23 18:28:29 - Euclidean-Distance:	Pearson: 0.5884	Spearman: 0.5241
2022-03-23 18:28:29 - Dot-Product-Similarity:	Pearson: 0.5399	Spearman: 0.4562
2022-03-23 18:28:29 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 12:
2022-03-23 18:28:33 - Cosine-Similarity :	Pearson: 0.6084	Spearman: 0.5263
2022-03-23 18:28:33 - Manhattan-Distance:	Pearson: 0.5870	Spearman: 0.5238
2022-03-23 18:28:33 - Euclidean-Distance:	Pearson: 0.5884	Spearman: 0.5241
2022-03-23 18:28:33 - Dot-Product-Similarity:	Pearson: 0.5399	Spearman: 0.4562
2022-03-23 18:28:34 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 12 after 12 steps:
2022-03-23 18:28:37 - Cosine-Similarity :	Pe

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:29:10 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 13 after 1 steps:
2022-03-23 18:29:13 - Cosine-Similarity :	Pearson: 0.6084	Spearman: 0.5262
2022-03-23 18:29:13 - Manhattan-Distance:	Pearson: 0.5870	Spearman: 0.5239
2022-03-23 18:29:13 - Euclidean-Distance:	Pearson: 0.5884	Spearman: 0.5243
2022-03-23 18:29:13 - Dot-Product-Similarity:	Pearson: 0.5400	Spearman: 0.4568
2022-03-23 18:29:13 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 13:
2022-03-23 18:29:16 - Cosine-Similarity :	Pearson: 0.6084	Spearman: 0.5262
2022-03-23 18:29:16 - Manhattan-Distance:	Pearson: 0.5870	Spearman: 0.5239
2022-03-23 18:29:16 - Euclidean-Distance:	Pearson: 0.5884	Spearman: 0.5243
2022-03-23 18:29:16 - Dot-Product-Similarity:	Pearson: 0.5400	Spearman: 0.4568
2022-03-23 18:29:17 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 13 after 2 steps:
2022-03-23 18:29:20 - Cosine-Similarity :	Pear

2022-03-23 18:30:19 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 13 after 11 steps:
2022-03-23 18:30:23 - Cosine-Similarity :	Pearson: 0.6093	Spearman: 0.5274
2022-03-23 18:30:23 - Manhattan-Distance:	Pearson: 0.5882	Spearman: 0.5245
2022-03-23 18:30:23 - Euclidean-Distance:	Pearson: 0.5896	Spearman: 0.5250
2022-03-23 18:30:23 - Dot-Product-Similarity:	Pearson: 0.5403	Spearman: 0.4570
2022-03-23 18:30:23 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 13:
2022-03-23 18:30:26 - Cosine-Similarity :	Pearson: 0.6093	Spearman: 0.5274
2022-03-23 18:30:26 - Manhattan-Distance:	Pearson: 0.5882	Spearman: 0.5245
2022-03-23 18:30:26 - Euclidean-Distance:	Pearson: 0.5896	Spearman: 0.5250
2022-03-23 18:30:26 - Dot-Product-Similarity:	Pearson: 0.5403	Spearman: 0.4570
2022-03-23 18:30:27 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 13 after 12 steps:
2022-03-23 18:30:31 - Cosine-Similarity :	Pe

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:31:02 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 14 after 1 steps:
2022-03-23 18:31:04 - Cosine-Similarity :	Pearson: 0.6091	Spearman: 0.5267
2022-03-23 18:31:04 - Manhattan-Distance:	Pearson: 0.5882	Spearman: 0.5244
2022-03-23 18:31:04 - Euclidean-Distance:	Pearson: 0.5896	Spearman: 0.5250
2022-03-23 18:31:04 - Dot-Product-Similarity:	Pearson: 0.5405	Spearman: 0.4570
2022-03-23 18:31:04 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 14:
2022-03-23 18:31:06 - Cosine-Similarity :	Pearson: 0.6091	Spearman: 0.5267
2022-03-23 18:31:06 - Manhattan-Distance:	Pearson: 0.5882	Spearman: 0.5244
2022-03-23 18:31:06 - Euclidean-Distance:	Pearson: 0.5896	Spearman: 0.5250
2022-03-23 18:31:06 - Dot-Product-Similarity:	Pearson: 0.5405	Spearman: 0.4570
2022-03-23 18:31:07 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 14 after 2 steps:
2022-03-23 18:31:10 - Cosine-Similarity :	Pear

2022-03-23 18:32:12 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 14 after 11 steps:
2022-03-23 18:32:14 - Cosine-Similarity :	Pearson: 0.6080	Spearman: 0.5254
2022-03-23 18:32:14 - Manhattan-Distance:	Pearson: 0.5875	Spearman: 0.5236
2022-03-23 18:32:14 - Euclidean-Distance:	Pearson: 0.5890	Spearman: 0.5242
2022-03-23 18:32:14 - Dot-Product-Similarity:	Pearson: 0.5407	Spearman: 0.4565
2022-03-23 18:32:14 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 14:
2022-03-23 18:32:17 - Cosine-Similarity :	Pearson: 0.6080	Spearman: 0.5254
2022-03-23 18:32:17 - Manhattan-Distance:	Pearson: 0.5875	Spearman: 0.5236
2022-03-23 18:32:17 - Euclidean-Distance:	Pearson: 0.5890	Spearman: 0.5242
2022-03-23 18:32:17 - Dot-Product-Similarity:	Pearson: 0.5407	Spearman: 0.4565
2022-03-23 18:32:18 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 14 after 12 steps:
2022-03-23 18:32:21 - Cosine-Similarity :	Pe

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:32:52 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 15 after 1 steps:
2022-03-23 18:32:55 - Cosine-Similarity :	Pearson: 0.6078	Spearman: 0.5255
2022-03-23 18:32:55 - Manhattan-Distance:	Pearson: 0.5873	Spearman: 0.5237
2022-03-23 18:32:55 - Euclidean-Distance:	Pearson: 0.5889	Spearman: 0.5243
2022-03-23 18:32:55 - Dot-Product-Similarity:	Pearson: 0.5412	Spearman: 0.4568
2022-03-23 18:32:55 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 15:
2022-03-23 18:32:59 - Cosine-Similarity :	Pearson: 0.6078	Spearman: 0.5255
2022-03-23 18:32:59 - Manhattan-Distance:	Pearson: 0.5873	Spearman: 0.5237
2022-03-23 18:32:59 - Euclidean-Distance:	Pearson: 0.5889	Spearman: 0.5243
2022-03-23 18:32:59 - Dot-Product-Similarity:	Pearson: 0.5412	Spearman: 0.4568
2022-03-23 18:33:00 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 15 after 2 steps:
2022-03-23 18:33:03 - Cosine-Similarity :	Pear

2022-03-23 18:34:03 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 15 after 11 steps:
2022-03-23 18:34:06 - Cosine-Similarity :	Pearson: 0.6091	Spearman: 0.5274
2022-03-23 18:34:06 - Manhattan-Distance:	Pearson: 0.5881	Spearman: 0.5252
2022-03-23 18:34:06 - Euclidean-Distance:	Pearson: 0.5896	Spearman: 0.5257
2022-03-23 18:34:06 - Dot-Product-Similarity:	Pearson: 0.5435	Spearman: 0.4597
2022-03-23 18:34:06 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 15:
2022-03-23 18:34:09 - Cosine-Similarity :	Pearson: 0.6091	Spearman: 0.5274
2022-03-23 18:34:09 - Manhattan-Distance:	Pearson: 0.5881	Spearman: 0.5252
2022-03-23 18:34:09 - Euclidean-Distance:	Pearson: 0.5896	Spearman: 0.5257
2022-03-23 18:34:09 - Dot-Product-Similarity:	Pearson: 0.5435	Spearman: 0.4597
2022-03-23 18:34:10 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 15 after 12 steps:
2022-03-23 18:34:14 - Cosine-Similarity :	Pe

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:34:45 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 16 after 1 steps:
2022-03-23 18:34:49 - Cosine-Similarity :	Pearson: 0.6092	Spearman: 0.5280
2022-03-23 18:34:49 - Manhattan-Distance:	Pearson: 0.5881	Spearman: 0.5254
2022-03-23 18:34:49 - Euclidean-Distance:	Pearson: 0.5896	Spearman: 0.5259
2022-03-23 18:34:49 - Dot-Product-Similarity:	Pearson: 0.5439	Spearman: 0.4603
2022-03-23 18:34:49 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 16:
2022-03-23 18:34:52 - Cosine-Similarity :	Pearson: 0.6092	Spearman: 0.5280
2022-03-23 18:34:52 - Manhattan-Distance:	Pearson: 0.5881	Spearman: 0.5254
2022-03-23 18:34:52 - Euclidean-Distance:	Pearson: 0.5896	Spearman: 0.5259
2022-03-23 18:34:52 - Dot-Product-Similarity:	Pearson: 0.5439	Spearman: 0.4603
2022-03-23 18:34:53 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 16 after 2 steps:
2022-03-23 18:34:55 - Cosine-Similarity :	Pear

2022-03-23 18:35:56 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 16 after 11 steps:
2022-03-23 18:35:58 - Cosine-Similarity :	Pearson: 0.6092	Spearman: 0.5283
2022-03-23 18:35:58 - Manhattan-Distance:	Pearson: 0.5880	Spearman: 0.5255
2022-03-23 18:35:58 - Euclidean-Distance:	Pearson: 0.5895	Spearman: 0.5257
2022-03-23 18:35:58 - Dot-Product-Similarity:	Pearson: 0.5443	Spearman: 0.4606
2022-03-23 18:35:58 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 16:
2022-03-23 18:36:00 - Cosine-Similarity :	Pearson: 0.6092	Spearman: 0.5283
2022-03-23 18:36:00 - Manhattan-Distance:	Pearson: 0.5880	Spearman: 0.5255
2022-03-23 18:36:00 - Euclidean-Distance:	Pearson: 0.5895	Spearman: 0.5257
2022-03-23 18:36:00 - Dot-Product-Similarity:	Pearson: 0.5443	Spearman: 0.4606
2022-03-23 18:36:00 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 16 after 12 steps:
2022-03-23 18:36:04 - Cosine-Similarity :	Pe

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:36:35 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 17 after 1 steps:
2022-03-23 18:36:38 - Cosine-Similarity :	Pearson: 0.6091	Spearman: 0.5280
2022-03-23 18:36:38 - Manhattan-Distance:	Pearson: 0.5879	Spearman: 0.5253
2022-03-23 18:36:38 - Euclidean-Distance:	Pearson: 0.5894	Spearman: 0.5259
2022-03-23 18:36:38 - Dot-Product-Similarity:	Pearson: 0.5443	Spearman: 0.4606
2022-03-23 18:36:38 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 17:
2022-03-23 18:36:41 - Cosine-Similarity :	Pearson: 0.6091	Spearman: 0.5280
2022-03-23 18:36:41 - Manhattan-Distance:	Pearson: 0.5879	Spearman: 0.5253
2022-03-23 18:36:41 - Euclidean-Distance:	Pearson: 0.5894	Spearman: 0.5259
2022-03-23 18:36:41 - Dot-Product-Similarity:	Pearson: 0.5443	Spearman: 0.4606
2022-03-23 18:36:42 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 17 after 2 steps:
2022-03-23 18:36:45 - Cosine-Similarity :	Pear

2022-03-23 18:37:43 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 17 after 11 steps:
2022-03-23 18:37:46 - Cosine-Similarity :	Pearson: 0.6091	Spearman: 0.5281
2022-03-23 18:37:46 - Manhattan-Distance:	Pearson: 0.5882	Spearman: 0.5255
2022-03-23 18:37:46 - Euclidean-Distance:	Pearson: 0.5897	Spearman: 0.5259
2022-03-23 18:37:46 - Dot-Product-Similarity:	Pearson: 0.5442	Spearman: 0.4605
2022-03-23 18:37:46 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 17:
2022-03-23 18:37:49 - Cosine-Similarity :	Pearson: 0.6091	Spearman: 0.5281
2022-03-23 18:37:49 - Manhattan-Distance:	Pearson: 0.5882	Spearman: 0.5255
2022-03-23 18:37:49 - Euclidean-Distance:	Pearson: 0.5897	Spearman: 0.5259
2022-03-23 18:37:49 - Dot-Product-Similarity:	Pearson: 0.5442	Spearman: 0.4605
2022-03-23 18:37:50 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 17 after 12 steps:
2022-03-23 18:37:53 - Cosine-Similarity :	Pe

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:38:24 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 18 after 1 steps:
2022-03-23 18:38:28 - Cosine-Similarity :	Pearson: 0.6092	Spearman: 0.5282
2022-03-23 18:38:28 - Manhattan-Distance:	Pearson: 0.5883	Spearman: 0.5255
2022-03-23 18:38:28 - Euclidean-Distance:	Pearson: 0.5899	Spearman: 0.5260
2022-03-23 18:38:28 - Dot-Product-Similarity:	Pearson: 0.5441	Spearman: 0.4603
2022-03-23 18:38:28 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 18:
2022-03-23 18:38:31 - Cosine-Similarity :	Pearson: 0.6092	Spearman: 0.5282
2022-03-23 18:38:31 - Manhattan-Distance:	Pearson: 0.5883	Spearman: 0.5255
2022-03-23 18:38:31 - Euclidean-Distance:	Pearson: 0.5899	Spearman: 0.5260
2022-03-23 18:38:31 - Dot-Product-Similarity:	Pearson: 0.5441	Spearman: 0.4603
2022-03-23 18:38:32 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 18 after 2 steps:
2022-03-23 18:38:35 - Cosine-Similarity :	Pear

2022-03-23 18:39:35 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 18 after 11 steps:
2022-03-23 18:39:38 - Cosine-Similarity :	Pearson: 0.6099	Spearman: 0.5290
2022-03-23 18:39:38 - Manhattan-Distance:	Pearson: 0.5891	Spearman: 0.5260
2022-03-23 18:39:38 - Euclidean-Distance:	Pearson: 0.5907	Spearman: 0.5264
2022-03-23 18:39:38 - Dot-Product-Similarity:	Pearson: 0.5441	Spearman: 0.4604
2022-03-23 18:39:38 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 18:
2022-03-23 18:39:42 - Cosine-Similarity :	Pearson: 0.6099	Spearman: 0.5290
2022-03-23 18:39:42 - Manhattan-Distance:	Pearson: 0.5891	Spearman: 0.5260
2022-03-23 18:39:42 - Euclidean-Distance:	Pearson: 0.5907	Spearman: 0.5264
2022-03-23 18:39:42 - Dot-Product-Similarity:	Pearson: 0.5441	Spearman: 0.4604
2022-03-23 18:39:43 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 18 after 12 steps:
2022-03-23 18:39:46 - Cosine-Similarity :	Pe

Iteration:   0%|          | 0/16 [00:00<?, ?it/s]

2022-03-23 18:40:19 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 19 after 1 steps:
2022-03-23 18:40:22 - Cosine-Similarity :	Pearson: 0.6100	Spearman: 0.5290
2022-03-23 18:40:22 - Manhattan-Distance:	Pearson: 0.5892	Spearman: 0.5261
2022-03-23 18:40:22 - Euclidean-Distance:	Pearson: 0.5907	Spearman: 0.5266
2022-03-23 18:40:22 - Dot-Product-Similarity:	Pearson: 0.5441	Spearman: 0.4604
2022-03-23 18:40:22 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 19:
2022-03-23 18:40:25 - Cosine-Similarity :	Pearson: 0.6100	Spearman: 0.5290
2022-03-23 18:40:25 - Manhattan-Distance:	Pearson: 0.5892	Spearman: 0.5261
2022-03-23 18:40:25 - Euclidean-Distance:	Pearson: 0.5907	Spearman: 0.5266
2022-03-23 18:40:25 - Dot-Product-Similarity:	Pearson: 0.5441	Spearman: 0.4604
2022-03-23 18:40:26 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 19 after 2 steps:
2022-03-23 18:40:28 - Cosine-Similarity :	Pear

2022-03-23 18:41:27 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 19 after 11 steps:
2022-03-23 18:41:30 - Cosine-Similarity :	Pearson: 0.6097	Spearman: 0.5287
2022-03-23 18:41:30 - Manhattan-Distance:	Pearson: 0.5891	Spearman: 0.5259
2022-03-23 18:41:30 - Euclidean-Distance:	Pearson: 0.5906	Spearman: 0.5265
2022-03-23 18:41:30 - Dot-Product-Similarity:	Pearson: 0.5441	Spearman: 0.4604
2022-03-23 18:41:30 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset after epoch 19:
2022-03-23 18:41:33 - Cosine-Similarity :	Pearson: 0.6097	Spearman: 0.5287
2022-03-23 18:41:33 - Manhattan-Distance:	Pearson: 0.5891	Spearman: 0.5259
2022-03-23 18:41:33 - Euclidean-Distance:	Pearson: 0.5906	Spearman: 0.5265
2022-03-23 18:41:33 - Dot-Product-Similarity:	Pearson: 0.5441	Spearman: 0.4604
2022-03-23 18:41:34 - EmbeddingSimilarityEvaluator: Evaluating the model on sts-dev dataset in epoch 19 after 12 steps:
2022-03-23 18:41:37 - Cosine-Similarity :	Pe

In [30]:
import pandas as pd

df = pd.read_csv('/users/mmazuecos/somos_nlp/output/training_nli_beto-2022-03-23_17-57-01/eval/similarity_evaluation_sts-dev_results.csv')
df

Unnamed: 0,epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
0,0,-1,0.560276,0.484379,0.552980,0.479646,0.552597,0.479779,0.299552,0.230628
1,0,-1,0.560276,0.484379,0.552980,0.479646,0.552597,0.479779,0.299552,0.230628
2,0,-1,0.560617,0.484452,0.553261,0.479957,0.552885,0.480021,0.300716,0.231258
3,0,-1,0.561365,0.485106,0.553903,0.480507,0.553543,0.480300,0.303176,0.232707
4,0,-1,0.562467,0.486193,0.554843,0.481195,0.554522,0.480866,0.306947,0.235153
...,...,...,...,...,...,...,...,...,...,...
690,19,-1,0.609721,0.528749,0.590559,0.526487,0.589056,0.525800,0.544053,0.460484
691,19,15,0.609733,0.528677,0.590569,0.526479,0.589068,0.525738,0.544072,0.460402
692,19,-1,0.609733,0.528677,0.590569,0.526479,0.589068,0.525738,0.544072,0.460402
693,19,16,0.609713,0.528726,0.590555,0.526515,0.589054,0.525850,0.544049,0.460358


In [None]:
# model = SentenceTransformer(model_save_path)

In [None]:
!transformers-cli login

In [None]:
!curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash

Detected operating system as Ubuntu/bionic.
Checking for curl...
Detected curl...
Checking for gpg...
Detected gpg...
Running apt-get update... done.
Installing apt-transport-https... done.
Installing /etc/apt/sources.list.d/github_git-lfs.list...done.
Importing packagecloud gpg key... done.
Running apt-get update... done.

The repository is setup! You can now install packages.


In [None]:
!sudo apt-get install git-lfs

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  git-lfs
0 upgraded, 1 newly installed, 0 to remove and 98 not upgraded.
Need to get 6,229 kB of archives.
After this operation, 14.5 MB of additional disk space will be used.
Get:1 https://packagecloud.io/github/git-lfs/ubuntu bionic/main amd64 git-lfs amd64 2.13.3 [6,229 kB]
Fetched 6,229 kB in 1s (6,947 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 76, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package git-lfs.
(Reading database ... 160841 files and direct

In [None]:
!git lfs install

Git LFS initialized.


In [None]:
model.save_to_hub('sentece-embeddings-BETO')

2021-08-08 01:26:51 - Create repository and clone it if it exists
2021-08-08 01:26:51 - git version 2.17.1
git-lfs/2.13.3 (GitHub; linux amd64; go 1.16.2)
2021-08-08 01:26:51 - Save model to /tmp/tmpe8otb7zy
2021-08-08 01:26:53 - Track files with git lfs: pytorch_model.bin
2021-08-08 01:26:53 - Push model to the hub. This might take a while


OSError: ignored

In [None]:
!git config --global user.email "espejelomar@gmail.com"
!git config --global user.name  "espejelomar"