# Contrastive Learning From Scratch

An attempt to build contrastive learning model from scratch. Parts include:

- Loading and preparing Wiki-1M data for model input
- Contrastive learning model
  - Forward passing using pre-trained model
  - Constrastive layer
  - Calculate loss
- Training procedure
  - Default trainer optimizer
  - Default trainer hyper-parameters

In [1]:
import os

# Set Project home
#PROJECT_HOME = os.path.join('/',
#                            'workspace',
#                            'gatech',
#                            'cs7643-deep-learning',
#                            'contrastive-learning-in-distilled-models')
PROJECT_HOME = os.path.join('/',
                            'Users',
                            'ng-ka',
                            'OMSCS',
                            'DL',
                            'DLProject',
                            'contrastive-learning-in-distilled-models')
%cd {PROJECT_HOME}

# Load project code
%reload_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, '../src')

#import distilface
import src.distilface as distilface

C:\Users\ng-ka\OMSCS\DL\DLProject\contrastive-learning-in-distilled-models


## 1. Loading and Preparing Wiki-1M data

Use huggingface `datasets` library to load local file data.

In [18]:
#size = 1000000
size = 100
train_size = int(0.75*size)
train_name = "data/training/wiki"+str(train_size)+"_train_for_simcse.txt"
valid_name = "data/training/wiki"+str(size-train_size)+"_valid_for_simcse.txt"

from itertools import islice
with open("data/training/wiki1m_for_simcse.txt", "r", encoding="utf8") as myfile:
    train_data = list(islice(myfile, train_size))

# always remember, use files in a with statement
with open(train_name, "w",encoding="utf8") as f2:
    for item in train_data:
        f2.write(item)
        
with open("data/training/wiki1m_for_simcse.txt", "r", encoding="utf8") as myfile:
    train_data = list(islice(myfile, train_size,size))
    
# always remember, use files in a with statement
with open(valid_name, "w",encoding="utf8") as f2:
    for item in train_data:
        f2.write(item)

In [2]:
#size = 1000000
size = 100
train_size = int(0.75*size)
train_name = "data/training/wiki"+str(train_size)+"_train_for_simcse.txt"
valid_name = "data/training/wiki"+str(size-train_size)+"_valid_for_simcse.txt"

import numpy as np

from datasets import load_dataset

data_files = {'train': train_name, 'validation': valid_name}
# data_files = {'train': 'data/training/wiki5k.txt'}
#datasets = load_dataset('text', data_files=data_files, cache_dir='./data/')
datasets = load_dataset('text', data_files=data_files)

  from .autonotebook import tqdm as notebook_tqdm
Using custom data configuration default-933581723230cc23
Reusing dataset text (C:\Users\ng-ka\.cache\huggingface\datasets\text\default-933581723230cc23\0.0.0\4b86d314f7236db91f0a0f5cda32d4375445e64c5eda2692655dd99c2dac68e8)
100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 181.79it/s]


In [3]:
# Unsupervised / Self-supervised dataset

column_names = datasets["train"].column_names
sent0_cname = column_names[0]
sent1_cname = column_names[0]

print('column_names:', column_names)
print('sent0_cname:', sent0_cname, '| sent1_cname:', sent1_cname)

column_names: ['text']
sent0_cname: text | sent1_cname: text


In [4]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

In [5]:
def prepare_features(examples):
    total = len(examples[sent0_cname])

    # Avoid "None" fields 
    for idx in range(total):
        if examples[sent0_cname][idx] is None:
            examples[sent0_cname][idx] = " "
        if examples[sent1_cname][idx] is None:
            examples[sent1_cname][idx] = " "
    
    sentences = examples[sent0_cname] + examples[sent1_cname]

    sent_features = tokenizer(
        sentences,
        max_length=32,
        truncation=True,
        padding=True,
    )

    features = {}
    for key in sent_features:
        features[key] = [[sent_features[key][i], sent_features[key][i+total]] for i in range(total)]

    return features

In [6]:
train_dataset = datasets["train"].map(prepare_features,
                                      batched=True,
                                    #   num_proc=24,
                                      remove_columns=column_names)
validation_dataset = datasets["validation"].map(prepare_features,
                                      batched=True,
                                    #   num_proc=24,
                                      remove_columns=column_names)

Loading cached processed dataset at C:\Users\ng-ka\.cache\huggingface\datasets\text\default-933581723230cc23\0.0.0\4b86d314f7236db91f0a0f5cda32d4375445e64c5eda2692655dd99c2dac68e8\cache-3727a2900f89dabb.arrow
Loading cached processed dataset at C:\Users\ng-ka\.cache\huggingface\datasets\text\default-933581723230cc23\0.0.0\4b86d314f7236db91f0a0f5cda32d4375445e64c5eda2692655dd99c2dac68e8\cache-12e35541180b4e8c.arrow


In [7]:
train_dataset.num_rows

75

In [8]:
str(train_dataset['input_ids'][0][0])

'[101, 26866, 1999, 2148, 2660, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]'

In [None]:
str(train_dataset['input_ids'][0][1])

In [None]:
train_dataset.features.keys()

Sentence 1 and Sentence 2 are the same sentence

## 2. Contrastive Learning Model

In [9]:
import torch
import torch.nn as nn

from transformers import AutoTokenizer, BertModel, BertPreTrainedModel, AutoConfig
from transformers.modeling_outputs import SequenceClassifierOutput, BaseModelOutputWithPooling

from src.distilface.modules.pooler import Pooler
from src.distilface.modules.similarity import Similarity


class BertCLModel(BertPreTrainedModel):
    def __init__(self, config, pooler_type='avg_first_last', temp=0.05):
        super().__init__(config)

        self.config = config
        self.pooler_type = pooler_type
        self.temp = 0.05

        self.bert = BertModel(config, add_pooling_layer=False)
        self.pooler = Pooler(pooler_type)
        self.sim = Similarity(temp=temp)

        self.init_weights()

    def forward(self, input_ids=None, attention_mask=None, token_type_ids=None):
        if self.training:
            return self.cl_forward(self.bert, input_ids, attention_mask, token_type_ids)
        else:
            return self.sent_emb(self.bert, input_ids, attention_mask, token_type_ids)

    def cl_forward(self, encoder, input_ids=None, attention_mask=None, token_type_ids=None):
        batch_size = input_ids.size(0)
        num_sent = input_ids.size(1)  # Number of sentences in one instance: 2 sentences
        
        input_ids = input_ids.view((-1, input_ids.size(-1))) # (bs * num_sent, len)
        attention_mask = attention_mask.view((-1, attention_mask.size(-1))) # (bs * num_sent len)
        token_type_ids = token_type_ids.view((-1, token_type_ids.size(-1))) # (bs * num_sent, len)

        outputs = encoder(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            output_attentions=True,
            output_hidden_states=True,
            return_dict=True,
        )
        # Pooling
        pooler_output = self.pooler(attention_mask, outputs)
        pooler_output = pooler_output.view((batch_size, num_sent, pooler_output.size(-1)))  # (bs, num_sent, hidden)

        # Separate representation
        z1, z2 = pooler_output[:, 0], pooler_output[:, 1]

        # Cosine similarity
        cos_sim = self.sim(z1.unsqueeze(1), z2.unsqueeze(0))

        # Calculate contrastive loss
        criterion = nn.CrossEntropyLoss()
        labels = torch.arange(cos_sim.size(0)).long().to(self.device)
        loss = criterion(cos_sim, labels)

        return SequenceClassifierOutput(
            loss=loss,
            logits=cos_sim,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    def sent_emb(self, encoder, input_ids=None, attention_mask=None, token_type_ids=None):
        
        input_ids = input_ids.view((-1, input_ids.size(-1))) # (bs * num_sent, len)
        attention_mask = attention_mask.view((-1, attention_mask.size(-1))) # (bs * num_sent len)
        token_type_ids = token_type_ids.view((-1, token_type_ids.size(-1))) # (bs * num_sent, len)
        
        outputs = encoder(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            output_attentions=True,
            output_hidden_states=True,
            return_dict=True,
        )

        pooler_output = self.pooler(attention_mask, outputs)

        return BaseModelOutputWithPooling(
            pooler_output=pooler_output,
            last_hidden_state=outputs.last_hidden_state,
            hidden_states=outputs.hidden_states,
        )


pretrained_model_name = 'bert-base-uncased'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
config = AutoConfig.from_pretrained(pretrained_model_name)

model = BertCLModel.from_pretrained(pretrained_model_name, config=config).to(device)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name)

model.eval();


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertCLModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'bert.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'bert.pooler.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertCLModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertCLModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


### 2.1 Initial BERT embeddings performance

In [10]:
import senteval


def prepare(params, samples):
    return

def batcher(params, batch):
    sentences = [" ".join(s) for s in batch]
    batch = tokenizer.batch_encode_plus(
        sentences,
        return_tensors="pt",
        padding=True,
    )

    for k in batch:
        batch[k] = batch[k].to(device)

    with torch.no_grad():
        outputs = model(**batch)

    pooled_result = outputs.pooler_output.cpu()

    return pooled_result


def evaluate_model():
    PATH_TO_DATA = "./data"

    params = {"task_path": PATH_TO_DATA, "usepytorch": True, "kfold": 10}
    tasks = ["STSBenchmark", 'STS12', 'STS13', 'STS14', 'STS15']

    se = senteval.engine.SE(params, batcher, prepare)
    results = se.eval(tasks)

    return results

In [18]:
results = evaluate_model()
#results

  sent1 = np.array([s.split() for s in sent1])[not_empty_idx]
  sent2 = np.array([s.split() for s in sent2])[not_empty_idx]


In [None]:
results

In [20]:
    print(results["STS12"]["all"]["spearman"].keys())
    print('STS12 mean: ', results["STS12"]["all"]["spearman"])
    print('STS12 wmean: ', results["STS12"]["all"]["spearman"])
    print('STS13 mean: ', results["STS13"]["all"]["spearman"]["mean"])
    print('STS13 wmean: ', results["STS13"]["all"]["spearman"]["wmean"])
    print('STS14 mean: ', results["STS14"]["all"]["spearman"]["mean"])
    print('STS14 wmean: ', results["STS14"]["all"]["spearman"]["wmean"])
    print('STS15 mean: ', results["STS15"]["all"]["spearman"]["mean"])
    print('STS15 wmean: ', results["STS15"]["all"]["spearman"]["wmean"])
    print('STSB: ', results["STSBenchmark"]["spearman"])

dict_keys(['mean', 'wmean'])
STS12 mean:  {'mean': 0.5468686878940625, 'wmean': 0.5443486582913551}
STS12 wmean:  {'mean': 0.5468686878940625, 'wmean': 0.5443486582913551}
STS13 mean:  0.5450458580607989
STS13 wmean:  0.5802891692744219
STS14 mean:  0.5882437430679742
STS14 wmean:  0.588747600067705
STS15 mean:  0.6736581656213602
STS15 wmean:  0.6794191674814121
STSB:  0.7117942971683138


## 3. Trainer

In [11]:
import mlflow

from transformers import Trainer, TrainingArguments
from transformers import default_data_collator

training_args = TrainingArguments(
    output_dir='output',
    overwrite_output_dir=True,
    learning_rate=5e-05,
    weight_decay=0.0,
    num_train_epochs=1,
    eval_steps=100,
)

In [12]:
#model.train()

def model_init():
    pretrained_model_name = 'bert-base-uncased'
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    config = AutoConfig.from_pretrained(pretrained_model_name)
    model = BertCLModel.from_pretrained(pretrained_model_name, config=config).to(device)
    return model

def my_objective(metrics):
    return metrics["eval_loss"]

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions.argmax(axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# from transformers import EvalPrediction
# def compute_metrics(p: EvalPrediction):
#     preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
#     preds = np.argmax(preds, axis=1)
#     return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}

trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=validation_dataset,
    tokenizer=tokenizer,
    data_collator=default_data_collator,
    compute_metrics= compute_metrics
)

loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading weights file https://huggingface.co/bert-base-u

In [19]:
#pip install "ray[tune]"
#pip install hyperopt
#pip install optuna

In [22]:
#os.environ["TUNE_MAX_PENDING_TRIALS_PG"] = "1"

In [12]:
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.schedulers import ASHAScheduler

trainer.hyperparameter_search(
    direction="minimize", 
    backend="optuna",
    #compute_objective=my_objective
#     resources_per_trial={
#         "cpu": 1,
#         "gpu": 1
#     },
    # Choose among many libraries:
    # https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
    #search_alg=HyperOptSearch(metric="objective", mode="max"),
    # Choose among schedulers:
    # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html
    #scheduler=ASHAScheduler(metric="objective", mode="max"),
    #n_trials=8 # number of trials
)

[32m[I 2022-03-26 01:59:33,349][0m A new study created in memory with name: no-name-5f23b8e3-5b6d-4181-b349-f205acf147cd[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 01:59:37,078][0m Trial 0 failed because of the following error: KeyError('eval_loss')[0m
Traceback (most recent call last):
  File "C:\Users\ng-ka\anaconda3\envs\cl-distilled\lib\site-packages\optuna\study\_optimize.py", line 213, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\ng-ka\anaconda3\envs\cl-distilled\lib\site-packages\transformers\integrations.py", line 158, in _objective
    trainer.objective = trainer.compute_objective(metrics)
  File "C:\Users\ng-ka\AppData\Local\Temp\ipykernel_10524\4209526589.py", line 13, in my_objective
    return metrics["eval_loss"]
KeyError: 'eval_loss'


KeyError: 'eval_loss'

In [14]:
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.schedulers import ASHAScheduler

trainer.hyperparameter_search(
    direction="maximize", 
    backend="optuna"#,
    #compute_objective=my_objective
#     resources_per_trial={
#         "cpu": 1,
#         "gpu": 1
#     },
    # Choose among many libraries:
    # https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
    #search_alg=HyperOptSearch(metric="objective", mode="max"),
    # Choose among schedulers:
    # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html
    #scheduler=ASHAScheduler(metric="objective", mode="max"),
    #n_trials=8 # number of trials
)

[32m[I 2022-03-26 01:20:20,051][0m A new study created in memory with name: no-name-a99ab3dc-10de-4ee0-8e6c-03e2e03bf9fd[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1106, -0.2462, -0.5635,  ..., -0.3272,  0.4589,  0.5335],
         [-0.0938, -0.4328, -0.1038,  ..., -0.4340,  0.3108, -0.0326],
         [-0.3895,  0.1125, -0.9434,  ..., -0.2336, -0.2477, -0.2000],
         ...,
         [ 0.1991,  0.0874, -0.0233,  ..., -0.1527,  0.0203, -0.3245],
         [ 0.3433,  0.1391,  0.0133,  ..., -0.0945, -0.1172, -0.2706],
         [ 0.3886, -0.2084,  0.1146,  ..., -0.1066,  0.0879, -0.0374]],

        [[-0.1106, -0.2462, -0.5635,  ..., -0.3272,  0.4589,  0.5335],
         [-0.0938, -0.4328, -0.1038,  ..., -0.4340,  0.3108, -0.0326],
         [-0.3895,  0.1125, -0.9434,  ..., -0.2336, -0.2477, -0.2000],
         ...,
         [ 0.1991,  0.0874, -0.0233,  ..., -0.1527,  0.0203, -0.3245],
         [ 0.3433,  0.1391,  0.0133,  ..., -0.0945, -0.1172, -0.2706],
         [ 0.3886, -0.2084,  0.1146,  ..., -0.1066,  0.0879, -0.0374]],

        [[-0.0464, -0.1074,  0.0121,  ..., -0.3355,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2531,  0.0180, -0.2632,  ...,  0.0076,  0.2578,  0.5016],
         [ 0.0616,  0.1561, -0.5806,  ...,  0.0873,  0.6580,  0.0937],
         [-0.3186, -0.2305, -0.3577,  ...,  0.3526, -0.1710, -0.0380],
         ...,
         [-0.4423, -0.6505, -0.2774,  ...,  0.4476,  0.0275, -0.2425],
         [ 0.1962,  0.0136, -0.0334,  ...,  0.3350, -0.1302, -0.1040],
         [ 0.1378, -0.0316, -0.0877,  ...,  0.2801, -0.1191, -0.0711]],

        [[-0.2531,  0.0180, -0.2632,  ...,  0.0076,  0.2578,  0.5016],
         [ 0.0616,  0.1561, -0.5806,  ...,  0.0873,  0.6580,  0.0937],
         [-0.3186, -0.2305, -0.3577,  ...,  0.3526, -0.1710, -0.0380],
         ...,
         [-0.4423, -0.6505, -0.2774,  ...,  0.4476,  0.0275, -0.2425],
         [ 0.1962,  0.0136, -0.0334,  ...,  0.3350, -0.1302, -0.1040],
         [ 0.1378, -0.0316, -0.0877,  ...,  0.2801, -0.1191, -0.0711]],

        [[-0.0951,  0.1491, -0.1526,  ..., -0.4010,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1172,  0.2731, -0.4217,  ..., -0.0478,  0.1394,  0.4155],
         [ 0.0948, -0.3210,  0.2560,  ..., -0.2052,  0.0144,  1.0014],
         [ 0.2076,  0.6788, -0.3615,  ...,  0.6194, -0.4277, -0.1024],
         ...,
         [-0.4949, -0.3074,  0.2675,  ...,  0.2541,  0.2093,  0.4784],
         [-0.3626, -0.0726,  0.3191,  ...,  0.2530,  0.3027,  0.3209],
         [-0.4600, -0.1919,  0.3342,  ...,  0.2763,  0.2606,  0.3247]],

        [[-0.1172,  0.2731, -0.4217,  ..., -0.0478,  0.1394,  0.4155],
         [ 0.0948, -0.3210,  0.2560,  ..., -0.2052,  0.0144,  1.0014],
         [ 0.2076,  0.6788, -0.3615,  ...,  0.6194, -0.4277, -0.1024],
         ...,
         [-0.4949, -0.3074,  0.2675,  ...,  0.2541,  0.2093,  0.4784],
         [-0.3626, -0.0726,  0.3191,  ...,  0.2530,  0.3027,  0.3209],
         [-0.4600, -0.1919,  0.3342,  ...,  0.2763,  0.2606,  0.3247]],

        [[-0.3445, -0.0931, -0.6889,  ..., -0.2401,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.0851,  0.1346,  0.3771,  ..., -0.6946,  0.4594,  0.0742],
         [ 0.3850,  0.3982,  0.7696,  ..., -0.4718,  0.5582, -0.2852],
         [ 0.1041,  0.7406,  1.2298,  ..., -0.9674,  0.7835, -1.3903],
         ...,
         [ 0.0823,  0.1167,  0.5200,  ..., -0.2628,  0.3261, -0.1512],
         [ 0.1239,  0.0560,  0.6213,  ..., -0.3463,  0.3463, -0.1593],
         [ 0.5366, -0.1472,  0.5377,  ..., -0.2379,  0.0901, -0.5355]],

        [[-0.0851,  0.1346,  0.3771,  ..., -0.6946,  0.4594,  0.0742],
         [ 0.3850,  0.3982,  0.7696,  ..., -0.4718,  0.5582, -0.2852],
         [ 0.1041,  0.7406,  1.2298,  ..., -0.9674,  0.7835, -1.3903],
         ...,
         [ 0.0823,  0.1167,  0.5200,  ..., -0.2628,  0.3261, -0.1512],
         [ 0.1239,  0.0560,  0.6213,  ..., -0.3463,  0.3463, -0.1593],
         [ 0.5366, -0.1472,  0.5377,  ..., -0.2379,  0.0901, -0.5355]]],
       device='cuda:0'), pooler_output=tensor([[-0.0192

[33m[W 2022-03-26 01:20:25,666][0m Trial 0 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vo

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1355, -0.2088, -0.5548,  ..., -0.3842,  0.4066,  0.5113],
         [-0.0202, -0.3950, -0.1454,  ..., -0.3707,  0.1583, -0.0636],
         [-0.3517,  0.0283, -0.8427,  ..., -0.1805, -0.2602, -0.1043],
         ...,
         [ 0.1852,  0.1000,  0.0365,  ..., -0.1421, -0.0258, -0.2662],
         [ 0.3088,  0.1389,  0.0627,  ..., -0.1047, -0.1555, -0.2197],
         [ 0.4164, -0.2124,  0.0866,  ..., -0.1486,  0.0663, -0.0299]],

        [[-0.1355, -0.2088, -0.5548,  ..., -0.3842,  0.4066,  0.5113],
         [-0.0202, -0.3950, -0.1454,  ..., -0.3707,  0.1583, -0.0636],
         [-0.3517,  0.0283, -0.8427,  ..., -0.1805, -0.2602, -0.1043],
         ...,
         [ 0.1852,  0.1000,  0.0365,  ..., -0.1421, -0.0258, -0.2662],
         [ 0.3088,  0.1389,  0.0627,  ..., -0.1047, -0.1555, -0.2197],
         [ 0.4164, -0.2124,  0.0866,  ..., -0.1486,  0.0663, -0.0299]],

        [[-0.0737, -0.0914,  0.0030,  ..., -0.3271,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-2.4888e-01, -3.4404e-02, -2.9022e-01,  ..., -8.8202e-02,
           2.3071e-01,  4.8807e-01],
         [ 7.9080e-02,  1.4177e-01, -5.6956e-01,  ...,  5.0527e-02,
           4.9375e-01,  3.2046e-02],
         [-3.7063e-01, -2.9111e-01, -3.3957e-01,  ...,  1.6531e-01,
          -1.7569e-01, -7.9710e-03],
         ...,
         [-4.5353e-01, -6.1363e-01, -1.7486e-01,  ...,  3.5396e-01,
           4.1412e-02, -2.6297e-01],
         [ 1.3086e-01, -6.1674e-02,  3.9566e-02,  ...,  2.3322e-01,
          -1.1612e-01, -1.0860e-01],
         [ 6.8198e-02, -1.0198e-01,  1.0794e-04,  ...,  1.8393e-01,
          -1.0640e-01, -8.1798e-02]],

        [[-2.4888e-01, -3.4404e-02, -2.9022e-01,  ..., -8.8202e-02,
           2.3071e-01,  4.8807e-01],
         [ 7.9080e-02,  1.4177e-01, -5.6956e-01,  ...,  5.0527e-02,
           4.9375e-01,  3.2046e-02],
         [-3.7063e-01, -2.9111e-01, -3.3957e-01,  ...,  1.6531e-01,
          -1.75

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2082,  0.2727, -0.3979,  ..., -0.1335,  0.1070,  0.4348],
         [ 0.1047, -0.3493,  0.3192,  ..., -0.2242, -0.0443,  0.8836],
         [ 0.0953,  0.5664, -0.3148,  ...,  0.3711, -0.4493, -0.1702],
         ...,
         [-0.5112, -0.3365,  0.2719,  ...,  0.2191,  0.1861,  0.3849],
         [-0.3823, -0.1250,  0.3249,  ...,  0.2116,  0.2468,  0.2349],
         [-0.4715, -0.2309,  0.3388,  ...,  0.2354,  0.2231,  0.2316]],

        [[-0.2082,  0.2727, -0.3979,  ..., -0.1335,  0.1070,  0.4348],
         [ 0.1047, -0.3493,  0.3192,  ..., -0.2242, -0.0443,  0.8836],
         [ 0.0953,  0.5664, -0.3148,  ...,  0.3711, -0.4493, -0.1702],
         ...,
         [-0.5112, -0.3365,  0.2719,  ...,  0.2191,  0.1861,  0.3849],
         [-0.3823, -0.1250,  0.3249,  ...,  0.2116,  0.2468,  0.2349],
         [-0.4715, -0.2309,  0.3388,  ...,  0.2354,  0.2231,  0.2316]],

        [[-0.3169,  0.0376, -0.5005,  ..., -0.2729,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1114,  0.1696,  0.3558,  ..., -0.7010,  0.4520,  0.1200],
         [ 0.3137,  0.3655,  0.7761,  ..., -0.4602,  0.5354, -0.2100],
         [ 0.0157,  0.5691,  1.0252,  ..., -0.8922,  0.6335, -1.2636],
         ...,
         [ 0.0746,  0.1011,  0.5057,  ..., -0.2660,  0.2500, -0.1018],
         [ 0.1272,  0.0591,  0.6208,  ..., -0.3739,  0.2824, -0.1228],
         [ 0.4877, -0.1475,  0.5489,  ..., -0.2263,  0.0616, -0.4760]],

        [[-0.1114,  0.1696,  0.3558,  ..., -0.7010,  0.4520,  0.1200],
         [ 0.3137,  0.3655,  0.7761,  ..., -0.4602,  0.5354, -0.2100],
         [ 0.0157,  0.5691,  1.0252,  ..., -0.8922,  0.6335, -1.2636],
         ...,
         [ 0.0746,  0.1011,  0.5057,  ..., -0.2660,  0.2500, -0.1018],
         [ 0.1272,  0.0591,  0.6208,  ..., -0.3739,  0.2824, -0.1228],
         [ 0.4877, -0.1475,  0.5489,  ..., -0.2263,  0.0616, -0.4760]]],
       device='cuda:0'), pooler_output=tensor([[-0.0204

[33m[W 2022-03-26 01:20:31,331][0m Trial 1 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vo

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1006, -0.3163, -0.5420,  ..., -0.2996,  0.4915,  0.4943],
         [-0.1581, -0.4692, -0.1547,  ..., -0.4096,  0.5270, -0.0441],
         [-0.4211,  0.1318, -1.1190,  ..., -0.3465, -0.1529, -0.1921],
         ...,
         [ 0.1822, -0.0020, -0.0786,  ..., -0.1851,  0.0841, -0.3412],
         [ 0.3021,  0.0874, -0.0631,  ..., -0.1178, -0.0265, -0.2880],
         [ 0.3052, -0.2335,  0.1067,  ..., -0.0882,  0.1087, -0.0621]],

        [[-0.1006, -0.3163, -0.5420,  ..., -0.2996,  0.4915,  0.4943],
         [-0.1581, -0.4692, -0.1547,  ..., -0.4096,  0.5270, -0.0441],
         [-0.4211,  0.1318, -1.1190,  ..., -0.3465, -0.1529, -0.1921],
         ...,
         [ 0.1822, -0.0020, -0.0786,  ..., -0.1851,  0.0841, -0.3412],
         [ 0.3021,  0.0874, -0.0631,  ..., -0.1178, -0.0265, -0.2880],
         [ 0.3052, -0.2335,  0.1067,  ..., -0.0882,  0.1087, -0.0621]],

        [[-0.0569, -0.1130, -0.0126,  ..., -0.3494,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2387,  0.0613, -0.1742,  ..., -0.0042,  0.3087,  0.5062],
         [ 0.0950,  0.1996, -0.5669,  ...,  0.0286,  0.7019,  0.1076],
         [-0.2710, -0.1512, -0.3909,  ...,  0.3588, -0.2112, -0.0689],
         ...,
         [-0.3856, -0.6033, -0.3482,  ...,  0.4748,  0.0156, -0.2201],
         [ 0.2038,  0.0478, -0.0303,  ...,  0.3489, -0.1348, -0.1249],
         [ 0.1421, -0.0015, -0.0971,  ...,  0.2852, -0.1099, -0.0643]],

        [[-0.2387,  0.0613, -0.1742,  ..., -0.0042,  0.3087,  0.5062],
         [ 0.0950,  0.1996, -0.5669,  ...,  0.0286,  0.7019,  0.1076],
         [-0.2710, -0.1512, -0.3909,  ...,  0.3588, -0.2112, -0.0689],
         ...,
         [-0.3856, -0.6033, -0.3482,  ...,  0.4748,  0.0156, -0.2201],
         [ 0.2038,  0.0478, -0.0303,  ...,  0.3489, -0.1348, -0.1249],
         [ 0.1421, -0.0015, -0.0971,  ...,  0.2852, -0.1099, -0.0643]],

        [[-0.0757,  0.1899, -0.2157,  ..., -0.3722,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-6.8138e-02,  2.9449e-01, -4.7231e-01,  ...,  1.4471e-04,
           1.3109e-01,  4.2370e-01],
         [ 1.1972e-01, -2.7321e-01,  3.7841e-02,  ..., -1.4912e-01,
          -7.3799e-03,  1.1512e+00],
         [ 2.5825e-01,  6.4546e-01, -4.7804e-01,  ...,  7.4622e-01,
          -3.9121e-01, -1.6186e-02],
         ...,
         [-4.8003e-01, -2.8047e-01,  2.7699e-01,  ...,  3.1431e-01,
           1.5953e-01,  5.5628e-01],
         [-3.6050e-01, -4.5558e-02,  3.0705e-01,  ...,  3.2743e-01,
           3.1145e-01,  4.1165e-01],
         [-4.5392e-01, -1.7151e-01,  2.9835e-01,  ...,  3.5348e-01,
           2.4245e-01,  4.5142e-01]],

        [[-6.8138e-02,  2.9449e-01, -4.7231e-01,  ...,  1.4471e-04,
           1.3109e-01,  4.2370e-01],
         [ 1.1972e-01, -2.7321e-01,  3.7841e-02,  ..., -1.4912e-01,
          -7.3799e-03,  1.1512e+00],
         [ 2.5825e-01,  6.4546e-01, -4.7804e-01,  ...,  7.4622e-01,
          -3.91

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1326,  0.1539,  0.3872,  ..., -0.8627,  0.4974, -0.0045],
         [ 0.4625,  0.4862,  0.7271,  ..., -0.5743,  0.6116, -0.3250],
         [ 0.0330,  0.8448,  1.1653,  ..., -1.1577,  0.7565, -1.4694],
         ...,
         [ 0.0695,  0.1596,  0.5310,  ..., -0.3780,  0.3943, -0.1793],
         [ 0.1218,  0.0777,  0.6051,  ..., -0.4509,  0.3930, -0.1956],
         [ 0.5661, -0.1010,  0.4920,  ..., -0.2790,  0.0977, -0.6352]],

        [[-0.1326,  0.1539,  0.3872,  ..., -0.8627,  0.4974, -0.0045],
         [ 0.4625,  0.4862,  0.7271,  ..., -0.5743,  0.6116, -0.3250],
         [ 0.0330,  0.8448,  1.1653,  ..., -1.1577,  0.7565, -1.4694],
         ...,
         [ 0.0695,  0.1596,  0.5310,  ..., -0.3780,  0.3943, -0.1793],
         [ 0.1218,  0.0777,  0.6051,  ..., -0.4509,  0.3930, -0.1956],
         [ 0.5661, -0.1010,  0.4920,  ..., -0.2790,  0.0977, -0.6352]]],
       device='cuda:0'), pooler_output=tensor([[-0.0110

[33m[W 2022-03-26 01:20:39,040][0m Trial 2 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vo

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1459, -0.2467, -0.5769,  ..., -0.2542,  0.4722,  0.3936],
         [-0.2062, -0.4336, -0.0903,  ..., -0.4194,  0.4931, -0.1205],
         [-0.4755,  0.1694, -1.0541,  ..., -0.3461, -0.2255, -0.2730],
         ...,
         [ 0.1777,  0.0776, -0.0711,  ..., -0.1874,  0.0546, -0.3782],
         [ 0.3369,  0.1380, -0.0492,  ..., -0.1145, -0.0597, -0.3151],
         [ 0.3012, -0.2068,  0.1828,  ..., -0.1018,  0.1022, -0.0869]],

        [[-0.1459, -0.2467, -0.5769,  ..., -0.2542,  0.4722,  0.3936],
         [-0.2062, -0.4336, -0.0903,  ..., -0.4194,  0.4931, -0.1205],
         [-0.4755,  0.1694, -1.0541,  ..., -0.3461, -0.2255, -0.2730],
         ...,
         [ 0.1777,  0.0776, -0.0711,  ..., -0.1874,  0.0546, -0.3782],
         [ 0.3369,  0.1380, -0.0492,  ..., -0.1145, -0.0597, -0.3151],
         [ 0.3012, -0.2068,  0.1828,  ..., -0.1018,  0.1022, -0.0869]],

        [[-0.0553, -0.1536,  0.0445,  ..., -0.3597, -0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2366,  0.1343, -0.2360,  ...,  0.0956,  0.2621,  0.4376],
         [-0.0313,  0.1867, -0.5923,  ...,  0.0999,  0.7910,  0.0987],
         [-0.3388, -0.0562, -0.3813,  ...,  0.5824, -0.1336, -0.0598],
         ...,
         [-0.5124, -0.6201, -0.4444,  ...,  0.5382, -0.0290, -0.2417],
         [ 0.2224,  0.0959, -0.1224,  ...,  0.4526, -0.1915, -0.1198],
         [ 0.1628,  0.0420, -0.1906,  ...,  0.3862, -0.1612, -0.0687]],

        [[-0.2366,  0.1343, -0.2360,  ...,  0.0956,  0.2621,  0.4376],
         [-0.0313,  0.1867, -0.5923,  ...,  0.0999,  0.7910,  0.0987],
         [-0.3388, -0.0562, -0.3813,  ...,  0.5824, -0.1336, -0.0598],
         ...,
         [-0.5124, -0.6201, -0.4444,  ...,  0.5382, -0.0290, -0.2417],
         [ 0.2224,  0.0959, -0.1224,  ...,  0.4526, -0.1915, -0.1198],
         [ 0.1628,  0.0420, -0.1906,  ...,  0.3862, -0.1612, -0.0687]],

        [[-0.0519,  0.2230, -0.1750,  ..., -0.4863,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[ 0.0016,  0.2809, -0.4530,  ...,  0.0542,  0.1618,  0.4192],
         [ 0.0976, -0.2196,  0.0928,  ..., -0.0718,  0.0757,  1.0781],
         [ 0.3186,  0.6953, -0.4225,  ...,  0.8415, -0.3724,  0.0209],
         ...,
         [-0.4951, -0.2597,  0.3187,  ...,  0.3149,  0.1684,  0.5955],
         [-0.3711, -0.0348,  0.3474,  ...,  0.3282,  0.3262,  0.4353],
         [-0.4741, -0.1366,  0.3591,  ...,  0.3369,  0.2767,  0.4654]],

        [[ 0.0016,  0.2809, -0.4530,  ...,  0.0542,  0.1618,  0.4192],
         [ 0.0976, -0.2196,  0.0928,  ..., -0.0718,  0.0757,  1.0781],
         [ 0.3186,  0.6953, -0.4225,  ...,  0.8415, -0.3724,  0.0209],
         ...,
         [-0.4951, -0.2597,  0.3187,  ...,  0.3149,  0.1684,  0.5955],
         [-0.3711, -0.0348,  0.3474,  ...,  0.3282,  0.3262,  0.4353],
         [-0.4741, -0.1366,  0.3591,  ...,  0.3369,  0.2767,  0.4654]],

        [[-0.3529, -0.2191, -0.8714,  ..., -0.2281,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-1.1938e-01,  1.0372e-01,  3.5357e-01,  ..., -7.4914e-01,
           4.0517e-01,  1.1056e-03],
         [ 3.6705e-01,  3.5918e-01,  5.8408e-01,  ..., -4.3281e-01,
           4.7837e-01, -3.1855e-01],
         [ 6.6282e-02,  7.4830e-01,  1.1406e+00,  ..., -9.3440e-01,
           8.1280e-01, -1.2865e+00],
         ...,
         [ 5.7972e-02,  1.3138e-01,  5.1109e-01,  ..., -2.8173e-01,
           3.9149e-01, -2.1841e-01],
         [ 8.1986e-02,  5.4694e-02,  6.0239e-01,  ..., -3.3112e-01,
           4.0160e-01, -2.0083e-01],
         [ 5.4315e-01, -1.5718e-01,  4.9451e-01,  ..., -2.4673e-01,
           8.6965e-02, -6.3231e-01]],

        [[-1.1938e-01,  1.0372e-01,  3.5357e-01,  ..., -7.4914e-01,
           4.0517e-01,  1.1056e-03],
         [ 3.6705e-01,  3.5918e-01,  5.8408e-01,  ..., -4.3281e-01,
           4.7837e-01, -3.1855e-01],
         [ 6.6282e-02,  7.4830e-01,  1.1406e+00,  ..., -9.3440e-01,
           8.12

[33m[W 2022-03-26 01:20:44,676][0m Trial 3 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vo

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1228, -0.2170, -0.5642,  ..., -0.3679,  0.4187,  0.5238],
         [-0.0453, -0.4090, -0.1384,  ..., -0.3855,  0.2040, -0.0455],
         [-0.3574,  0.0506, -0.8726,  ..., -0.1913, -0.2682, -0.1309],
         ...,
         [ 0.1980,  0.0970,  0.0188,  ..., -0.1491, -0.0210, -0.2822],
         [ 0.3224,  0.1393,  0.0459,  ..., -0.1058, -0.1499, -0.2335],
         [ 0.4107, -0.2171,  0.0947,  ..., -0.1394,  0.0691, -0.0278]],

        [[-0.1228, -0.2170, -0.5642,  ..., -0.3679,  0.4187,  0.5238],
         [-0.0453, -0.4090, -0.1384,  ..., -0.3855,  0.2040, -0.0455],
         [-0.3574,  0.0506, -0.8726,  ..., -0.1913, -0.2682, -0.1309],
         ...,
         [ 0.1980,  0.0970,  0.0188,  ..., -0.1491, -0.0210, -0.2822],
         [ 0.3224,  0.1393,  0.0459,  ..., -0.1058, -0.1499, -0.2335],
         [ 0.4107, -0.2171,  0.0947,  ..., -0.1394,  0.0691, -0.0278]],

        [[-0.0696, -0.0995,  0.0014,  ..., -0.3334,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2504, -0.0194, -0.2858,  ..., -0.0675,  0.2366,  0.5026],
         [ 0.0770,  0.1562, -0.5756,  ...,  0.0594,  0.5426,  0.0590],
         [-0.3636, -0.2710, -0.3403,  ...,  0.2165, -0.1784, -0.0111],
         ...,
         [-0.4505, -0.6197, -0.2053,  ...,  0.3747,  0.0342, -0.2545],
         [ 0.1539, -0.0364,  0.0155,  ...,  0.2560, -0.1259, -0.1100],
         [ 0.0942, -0.0770, -0.0259,  ...,  0.2057, -0.1138, -0.0818]],

        [[-0.2504, -0.0194, -0.2858,  ..., -0.0675,  0.2366,  0.5026],
         [ 0.0770,  0.1562, -0.5756,  ...,  0.0594,  0.5426,  0.0590],
         [-0.3636, -0.2710, -0.3403,  ...,  0.2165, -0.1784, -0.0111],
         ...,
         [-0.4505, -0.6197, -0.2053,  ...,  0.3747,  0.0342, -0.2545],
         [ 0.1539, -0.0364,  0.0155,  ...,  0.2560, -0.1259, -0.1100],
         [ 0.0942, -0.0770, -0.0259,  ...,  0.2057, -0.1138, -0.0818]],

        [[-0.1607,  0.1202, -0.1742,  ..., -0.3932,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1799,  0.2781, -0.4080,  ..., -0.1092,  0.1136,  0.4345],
         [ 0.0992, -0.3400,  0.3114,  ..., -0.2184, -0.0202,  0.9238],
         [ 0.1243,  0.6038, -0.3401,  ...,  0.4463, -0.4545, -0.1440],
         ...,
         [-0.5097, -0.3292,  0.2706,  ...,  0.2255,  0.1872,  0.4181],
         [-0.3796, -0.1121,  0.3204,  ...,  0.2218,  0.2584,  0.2682],
         [-0.4682, -0.2136,  0.3348,  ...,  0.2420,  0.2335,  0.2677]],

        [[-0.1799,  0.2781, -0.4080,  ..., -0.1092,  0.1136,  0.4345],
         [ 0.0992, -0.3400,  0.3114,  ..., -0.2184, -0.0202,  0.9238],
         [ 0.1243,  0.6038, -0.3401,  ...,  0.4463, -0.4545, -0.1440],
         ...,
         [-0.5097, -0.3292,  0.2706,  ...,  0.2255,  0.1872,  0.4181],
         [-0.3796, -0.1121,  0.3204,  ...,  0.2218,  0.2584,  0.2682],
         [-0.4682, -0.2136,  0.3348,  ...,  0.2420,  0.2335,  0.2677]],

        [[-0.3267, -0.0038, -0.5619,  ..., -0.2560,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1005,  0.1647,  0.3598,  ..., -0.6986,  0.4482,  0.1105],
         [ 0.3271,  0.3733,  0.7571,  ..., -0.4566,  0.5335, -0.2209],
         [ 0.0419,  0.6117,  1.0861,  ..., -0.9128,  0.6848, -1.2828],
         ...,
         [ 0.0754,  0.1058,  0.5091,  ..., -0.2659,  0.2683, -0.1146],
         [ 0.1266,  0.0573,  0.6217,  ..., -0.3655,  0.2983, -0.1309],
         [ 0.5034, -0.1478,  0.5457,  ..., -0.2293,  0.0655, -0.4946]],

        [[-0.1005,  0.1647,  0.3598,  ..., -0.6986,  0.4482,  0.1105],
         [ 0.3271,  0.3733,  0.7571,  ..., -0.4566,  0.5335, -0.2209],
         [ 0.0419,  0.6117,  1.0861,  ..., -0.9128,  0.6848, -1.2828],
         ...,
         [ 0.0754,  0.1058,  0.5091,  ..., -0.2659,  0.2683, -0.1146],
         [ 0.1266,  0.0573,  0.6217,  ..., -0.3655,  0.2983, -0.1309],
         [ 0.5034, -0.1478,  0.5457,  ..., -0.2293,  0.0655, -0.4946]]],
       device='cuda:0'), pooler_output=tensor([[-0.0219

[33m[W 2022-03-26 01:20:51,682][0m Trial 4 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vo

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-1.1686e-01, -2.1309e-01, -5.5800e-01,  ..., -3.6939e-01,
           4.0587e-01,  5.2201e-01],
         [-4.6374e-02, -4.0215e-01, -1.3997e-01,  ..., -3.8573e-01,
           1.7829e-01, -4.8173e-02],
         [-3.5698e-01,  3.8694e-02, -8.7046e-01,  ..., -1.8238e-01,
          -2.7782e-01, -1.2418e-01],
         ...,
         [ 1.9129e-01,  1.0059e-01,  2.1578e-02,  ..., -1.4802e-01,
          -2.2153e-02, -2.8157e-01],
         [ 3.1651e-01,  1.4108e-01,  4.8449e-02,  ..., -1.0601e-01,
          -1.4959e-01, -2.3326e-01],
         [ 4.1241e-01, -2.1388e-01,  9.7802e-02,  ..., -1.4248e-01,
           6.0239e-02, -2.6400e-02]],

        [[-1.1686e-01, -2.1309e-01, -5.5800e-01,  ..., -3.6939e-01,
           4.0587e-01,  5.2201e-01],
         [-4.6374e-02, -4.0215e-01, -1.3997e-01,  ..., -3.8573e-01,
           1.7829e-01, -4.8173e-02],
         [-3.5698e-01,  3.8694e-02, -8.7046e-01,  ..., -1.8238e-01,
          -2.77

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2455, -0.0228, -0.2847,  ..., -0.0760,  0.2310,  0.4997],
         [ 0.0775,  0.1591, -0.5778,  ...,  0.0596,  0.5329,  0.0521],
         [-0.3619, -0.2669, -0.3427,  ...,  0.2085, -0.1834, -0.0095],
         ...,
         [-0.4500, -0.6102, -0.2022,  ...,  0.3708,  0.0307, -0.2573],
         [ 0.1521, -0.0362,  0.0189,  ...,  0.2536, -0.1248, -0.1101],
         [ 0.0897, -0.0780, -0.0223,  ...,  0.2034, -0.1124, -0.0820]],

        [[-0.2455, -0.0228, -0.2847,  ..., -0.0760,  0.2310,  0.4997],
         [ 0.0775,  0.1591, -0.5778,  ...,  0.0596,  0.5329,  0.0521],
         [-0.3619, -0.2669, -0.3427,  ...,  0.2085, -0.1834, -0.0095],
         ...,
         [-0.4500, -0.6102, -0.2022,  ...,  0.3708,  0.0307, -0.2573],
         [ 0.1521, -0.0362,  0.0189,  ...,  0.2536, -0.1248, -0.1101],
         [ 0.0897, -0.0780, -0.0223,  ...,  0.2034, -0.1124, -0.0820]],

        [[-0.1596,  0.1160, -0.1720,  ..., -0.3862,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1853,  0.2769, -0.4090,  ..., -0.1115,  0.1142,  0.4328],
         [ 0.0977, -0.3373,  0.3107,  ..., -0.2176, -0.0303,  0.9152],
         [ 0.1178,  0.5979, -0.3330,  ...,  0.4337, -0.4538, -0.1506],
         ...,
         [-0.5099, -0.3307,  0.2752,  ...,  0.2232,  0.1861,  0.4066],
         [-0.3825, -0.1178,  0.3250,  ...,  0.2200,  0.2558,  0.2558],
         [-0.4707, -0.2207,  0.3383,  ...,  0.2423,  0.2302,  0.2558]],

        [[-0.1853,  0.2769, -0.4090,  ..., -0.1115,  0.1142,  0.4328],
         [ 0.0977, -0.3373,  0.3107,  ..., -0.2176, -0.0303,  0.9152],
         [ 0.1178,  0.5979, -0.3330,  ...,  0.4337, -0.4538, -0.1506],
         ...,
         [-0.5099, -0.3307,  0.2752,  ...,  0.2232,  0.1861,  0.4066],
         [-0.3825, -0.1178,  0.3250,  ...,  0.2200,  0.2558,  0.2558],
         [-0.4707, -0.2207,  0.3383,  ...,  0.2423,  0.2302,  0.2558]],

        [[-0.3190,  0.0159, -0.5359,  ..., -0.2647,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1032,  0.1650,  0.3597,  ..., -0.6995,  0.4482,  0.1114],
         [ 0.3251,  0.3682,  0.7500,  ..., -0.4517,  0.5343, -0.2196],
         [ 0.0411,  0.5950,  1.0668,  ..., -0.9071,  0.6814, -1.2766],
         ...,
         [ 0.0771,  0.1037,  0.5076,  ..., -0.2659,  0.2634, -0.1162],
         [ 0.1277,  0.0540,  0.6196,  ..., -0.3682,  0.2904, -0.1328],
         [ 0.4968, -0.1490,  0.5482,  ..., -0.2281,  0.0633, -0.4915]],

        [[-0.1032,  0.1650,  0.3597,  ..., -0.6995,  0.4482,  0.1114],
         [ 0.3251,  0.3682,  0.7500,  ..., -0.4517,  0.5343, -0.2196],
         [ 0.0411,  0.5950,  1.0668,  ..., -0.9071,  0.6814, -1.2766],
         ...,
         [ 0.0771,  0.1037,  0.5076,  ..., -0.2659,  0.2634, -0.1162],
         [ 0.1277,  0.0540,  0.6196,  ..., -0.3682,  0.2904, -0.1328],
         [ 0.4968, -0.1490,  0.5482,  ..., -0.2281,  0.0633, -0.4915]]],
       device='cuda:0'), pooler_output=tensor([[-0.0206

[33m[W 2022-03-26 01:20:57,767][0m Trial 5 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vo

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.0436, -0.2364, -0.4791,  ..., -0.2166,  0.3495,  0.4866],
         [-0.1513, -0.3243, -0.0072,  ..., -0.4033,  0.3383, -0.0586],
         [-0.4475,  0.1757, -1.0249,  ..., -0.3510, -0.3289, -0.1957],
         ...,
         [ 0.2167,  0.0441, -0.0298,  ..., -0.1631,  0.0388, -0.3930],
         [ 0.3414,  0.1029, -0.0094,  ..., -0.0980, -0.0579, -0.3406],
         [ 0.3468, -0.2235,  0.1887,  ..., -0.0893,  0.0684, -0.0653]],

        [[-0.0436, -0.2364, -0.4791,  ..., -0.2166,  0.3495,  0.4866],
         [-0.1513, -0.3243, -0.0072,  ..., -0.4033,  0.3383, -0.0586],
         [-0.4475,  0.1757, -1.0249,  ..., -0.3510, -0.3289, -0.1957],
         ...,
         [ 0.2167,  0.0441, -0.0298,  ..., -0.1631,  0.0388, -0.3930],
         [ 0.3414,  0.1029, -0.0094,  ..., -0.0980, -0.0579, -0.3406],
         [ 0.3468, -0.2235,  0.1887,  ..., -0.0893,  0.0684, -0.0653]],

        [[-0.0939, -0.1412,  0.0444,  ..., -0.3847,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-1.9107e-01,  1.1530e-01, -2.1184e-01,  ..., -3.1254e-04,
           2.2292e-01,  4.8816e-01],
         [ 4.4147e-02,  2.7523e-01, -6.1209e-01,  ...,  4.5362e-02,
           7.1721e-01,  1.2291e-01],
         [-2.9269e-01, -5.7669e-02, -3.6635e-01,  ...,  5.0938e-01,
          -1.1461e-01, -7.5105e-04],
         ...,
         [-4.3737e-01, -4.9754e-01, -3.6772e-01,  ...,  4.5817e-01,
          -6.1007e-02, -1.8881e-01],
         [ 2.2213e-01,  1.0500e-01, -8.9713e-02,  ...,  3.5723e-01,
          -1.6393e-01, -1.2223e-01],
         [ 1.4684e-01,  5.9747e-02, -1.4465e-01,  ...,  2.8969e-01,
          -1.5408e-01, -6.7953e-02]],

        [[-1.9107e-01,  1.1530e-01, -2.1184e-01,  ..., -3.1254e-04,
           2.2292e-01,  4.8816e-01],
         [ 4.4147e-02,  2.7523e-01, -6.1209e-01,  ...,  4.5362e-02,
           7.1721e-01,  1.2291e-01],
         [-2.9269e-01, -5.7669e-02, -3.6635e-01,  ...,  5.0938e-01,
          -1.14

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[ 0.0075,  0.2700, -0.3978,  ..., -0.0018,  0.1449,  0.4180],
         [ 0.1264, -0.2329,  0.1375,  ..., -0.0926,  0.1334,  1.0341],
         [ 0.3313,  0.6829, -0.4132,  ...,  0.8179, -0.3590, -0.0171],
         ...,
         [-0.4774, -0.2878,  0.2942,  ...,  0.3077,  0.2144,  0.5716],
         [-0.3346, -0.0540,  0.3269,  ...,  0.3074,  0.3542,  0.4131],
         [-0.4246, -0.1532,  0.3525,  ...,  0.3254,  0.2977,  0.4262]],

        [[ 0.0075,  0.2700, -0.3978,  ..., -0.0018,  0.1449,  0.4180],
         [ 0.1264, -0.2329,  0.1375,  ..., -0.0926,  0.1334,  1.0341],
         [ 0.3313,  0.6829, -0.4132,  ...,  0.8179, -0.3590, -0.0171],
         ...,
         [-0.4774, -0.2878,  0.2942,  ...,  0.3077,  0.2144,  0.5716],
         [-0.3346, -0.0540,  0.3269,  ...,  0.3074,  0.3542,  0.4131],
         [-0.4246, -0.1532,  0.3525,  ...,  0.3254,  0.2977,  0.4262]],

        [[-0.2685, -0.1216, -0.7348,  ..., -0.2525,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.0750,  0.1017,  0.3502,  ..., -0.6851,  0.3846,  0.0684],
         [ 0.3984,  0.3618,  0.6028,  ..., -0.3796,  0.4478, -0.3318],
         [ 0.1617,  0.7672,  1.1983,  ..., -0.8282,  0.7651, -1.3166],
         ...,
         [ 0.1064,  0.1153,  0.5250,  ..., -0.2494,  0.3628, -0.1903],
         [ 0.1545,  0.0387,  0.6282,  ..., -0.3190,  0.3725, -0.1803],
         [ 0.5934, -0.1587,  0.5145,  ..., -0.2450,  0.0817, -0.5976]],

        [[-0.0750,  0.1017,  0.3502,  ..., -0.6851,  0.3846,  0.0684],
         [ 0.3984,  0.3618,  0.6028,  ..., -0.3796,  0.4478, -0.3318],
         [ 0.1617,  0.7672,  1.1983,  ..., -0.8282,  0.7651, -1.3166],
         ...,
         [ 0.1064,  0.1153,  0.5250,  ..., -0.2494,  0.3628, -0.1903],
         [ 0.1545,  0.0387,  0.6282,  ..., -0.3190,  0.3725, -0.1803],
         [ 0.5934, -0.1587,  0.5145,  ..., -0.2450,  0.0817, -0.5976]]],
       device='cuda:0'), pooler_output=tensor([[-0.0090

[33m[W 2022-03-26 01:21:07,523][0m Trial 6 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vo

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-1.0614e-01, -1.7665e-01, -5.9472e-01,  ..., -2.7869e-01,
           3.3458e-01,  5.2644e-01],
         [-1.7275e-01, -4.1996e-01, -1.3209e-01,  ..., -4.2564e-01,
           2.6996e-01, -2.2839e-02],
         [-4.5488e-01,  1.3732e-01, -9.4231e-01,  ..., -2.5096e-01,
          -3.3443e-01, -1.5382e-01],
         ...,
         [ 1.9917e-01,  1.1296e-01, -3.1694e-02,  ..., -1.4777e-01,
          -1.4866e-04, -3.5067e-01],
         [ 3.4563e-01,  1.5449e-01, -4.1412e-03,  ..., -9.0741e-02,
          -1.0990e-01, -2.9227e-01],
         [ 3.8862e-01, -2.2620e-01,  1.2830e-01,  ..., -1.4249e-01,
           5.2790e-02, -5.5659e-02]],

        [[-1.0614e-01, -1.7665e-01, -5.9472e-01,  ..., -2.7869e-01,
           3.3458e-01,  5.2644e-01],
         [-1.7275e-01, -4.1996e-01, -1.3209e-01,  ..., -4.2564e-01,
           2.6996e-01, -2.2839e-02],
         [-4.5488e-01,  1.3732e-01, -9.4231e-01,  ..., -2.5096e-01,
          -3.34

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2036,  0.0606, -0.2626,  ..., -0.0225,  0.1669,  0.5566],
         [ 0.0232,  0.2480, -0.5944,  ...,  0.0358,  0.6542,  0.1318],
         [-0.3565, -0.1219, -0.3243,  ...,  0.3408, -0.1672,  0.0174],
         ...,
         [-0.4598, -0.5322, -0.2782,  ...,  0.3986, -0.0402, -0.2304],
         [ 0.2073,  0.0719, -0.0457,  ...,  0.2842, -0.1832, -0.1203],
         [ 0.1265,  0.0164, -0.0894,  ...,  0.2226, -0.1643, -0.0830]],

        [[-0.2036,  0.0606, -0.2626,  ..., -0.0225,  0.1669,  0.5566],
         [ 0.0232,  0.2480, -0.5944,  ...,  0.0358,  0.6542,  0.1318],
         [-0.3565, -0.1219, -0.3243,  ...,  0.3408, -0.1672,  0.0174],
         ...,
         [-0.4598, -0.5322, -0.2782,  ...,  0.3986, -0.0402, -0.2304],
         [ 0.2073,  0.0719, -0.0457,  ...,  0.2842, -0.1832, -0.1203],
         [ 0.1265,  0.0164, -0.0894,  ...,  0.2226, -0.1643, -0.0830]],

        [[-0.0851,  0.1628, -0.1806,  ..., -0.3883,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.0955,  0.2876, -0.4265,  ..., -0.0467,  0.0909,  0.4419],
         [ 0.1265, -0.3021,  0.2376,  ..., -0.1426,  0.0702,  0.9587],
         [ 0.2032,  0.6788, -0.3827,  ...,  0.5925, -0.4987, -0.0739],
         ...,
         [-0.4973, -0.2943,  0.3260,  ...,  0.2392,  0.1633,  0.4686],
         [-0.3838, -0.0819,  0.3622,  ...,  0.2459,  0.2599,  0.3108],
         [-0.4806, -0.1702,  0.3811,  ...,  0.2744,  0.2229,  0.3196]],

        [[-0.0955,  0.2876, -0.4265,  ..., -0.0467,  0.0909,  0.4419],
         [ 0.1265, -0.3021,  0.2376,  ..., -0.1426,  0.0702,  0.9587],
         [ 0.2032,  0.6788, -0.3827,  ...,  0.5925, -0.4987, -0.0739],
         ...,
         [-0.4973, -0.2943,  0.3260,  ...,  0.2392,  0.1633,  0.4686],
         [-0.3838, -0.0819,  0.3622,  ...,  0.2459,  0.2599,  0.3108],
         [-0.4806, -0.1702,  0.3811,  ...,  0.2744,  0.2229,  0.3196]],

        [[-0.3349, -0.0467, -0.6767,  ..., -0.2403,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.0736,  0.1361,  0.3369,  ..., -0.6734,  0.4030,  0.0915],
         [ 0.3499,  0.3600,  0.6265,  ..., -0.3796,  0.4796, -0.2587],
         [ 0.1497,  0.6746,  1.1404,  ..., -0.8718,  0.7567, -1.2335],
         ...,
         [ 0.0986,  0.1136,  0.5163,  ..., -0.2472,  0.2925, -0.1735],
         [ 0.1346,  0.0348,  0.6230,  ..., -0.3280,  0.3004, -0.1812],
         [ 0.5436, -0.1513,  0.5295,  ..., -0.2301,  0.0504, -0.5742]],

        [[-0.0736,  0.1361,  0.3369,  ..., -0.6734,  0.4030,  0.0915],
         [ 0.3499,  0.3600,  0.6265,  ..., -0.3796,  0.4796, -0.2587],
         [ 0.1497,  0.6746,  1.1404,  ..., -0.8718,  0.7567, -1.2335],
         ...,
         [ 0.0986,  0.1136,  0.5163,  ..., -0.2472,  0.2925, -0.1735],
         [ 0.1346,  0.0348,  0.6230,  ..., -0.3280,  0.3004, -0.1812],
         [ 0.5436, -0.1513,  0.5295,  ..., -0.2301,  0.0504, -0.5742]]],
       device='cuda:0'), pooler_output=tensor([[-0.0191

[33m[W 2022-03-26 01:21:12,673][0m Trial 7 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vo

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2817, -0.0702, -0.3013,  ..., -0.1346,  0.4221,  0.2398],
         [-0.2378, -0.1797,  0.3077,  ..., -0.4978,  0.2894, -0.5332],
         [-0.5639,  0.4129, -0.8937,  ..., -0.4003, -0.3626, -0.2674],
         ...,
         [ 0.1261, -0.0912, -0.0472,  ..., -0.1407,  0.0249, -0.4189],
         [ 0.2301,  0.0394, -0.0464,  ..., -0.1299, -0.0434, -0.3724],
         [ 0.2632, -0.3567,  0.2385,  ..., -0.0586,  0.1295, -0.2350]],

        [[-0.2817, -0.0702, -0.3013,  ..., -0.1346,  0.4221,  0.2398],
         [-0.2378, -0.1797,  0.3077,  ..., -0.4978,  0.2894, -0.5332],
         [-0.5639,  0.4129, -0.8937,  ..., -0.4003, -0.3626, -0.2674],
         ...,
         [ 0.1261, -0.0912, -0.0472,  ..., -0.1407,  0.0249, -0.4189],
         [ 0.2301,  0.0394, -0.0464,  ..., -0.1299, -0.0434, -0.3724],
         [ 0.2632, -0.3567,  0.2385,  ..., -0.0586,  0.1295, -0.2350]],

        [[-0.2955, -0.2056, -0.0375,  ..., -0.5119,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-4.3168e-01,  6.8426e-02, -5.0231e-01,  ..., -2.5591e-02,
           2.8288e-01,  2.0696e-01],
         [ 1.7153e-01,  3.6840e-01, -8.0499e-01,  ..., -6.7001e-02,
           8.6091e-01,  8.7318e-03],
         [-2.5200e-01,  4.9907e-02, -2.9985e-01,  ...,  5.8231e-01,
          -1.0790e-01,  2.1260e-02],
         ...,
         [-3.7105e-01, -3.3580e-01, -3.9042e-01,  ...,  3.8736e-01,
          -1.1817e-01, -8.8492e-02],
         [ 1.4515e-01,  1.7023e-02, -7.4402e-04,  ...,  2.8272e-01,
          -1.0609e-01, -2.4352e-01],
         [ 9.8839e-02, -1.4982e-02, -8.0791e-02,  ...,  1.4489e-01,
          -1.0825e-01, -3.9538e-02]],

        [[-4.3168e-01,  6.8426e-02, -5.0231e-01,  ..., -2.5591e-02,
           2.8288e-01,  2.0696e-01],
         [ 1.7153e-01,  3.6840e-01, -8.0499e-01,  ..., -6.7001e-02,
           8.6091e-01,  8.7318e-03],
         [-2.5200e-01,  4.9907e-02, -2.9985e-01,  ...,  5.8231e-01,
          -1.07

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-1.1330e-01,  3.7081e-01, -3.3574e-01,  ...,  8.6159e-02,
           1.0317e-01,  5.5134e-01],
         [ 9.7380e-02, -2.2066e-01,  7.2450e-02,  ..., -1.1014e-01,
          -1.1022e-01,  9.6691e-01],
         [ 2.1606e-02,  4.5142e-01, -1.1927e-03,  ...,  7.1988e-01,
          -6.2072e-01,  4.4521e-01],
         ...,
         [-5.2047e-01, -2.3572e-01,  4.4086e-01,  ...,  2.9818e-01,
           1.7662e-01,  6.5244e-01],
         [-4.4509e-01,  7.1703e-02,  3.8959e-01,  ...,  2.4709e-01,
           4.1116e-01,  4.2739e-01],
         [-5.0000e-01, -2.5570e-02,  4.0672e-01,  ...,  2.6990e-01,
           3.0425e-01,  5.0876e-01]],

        [[-1.1330e-01,  3.7081e-01, -3.3574e-01,  ...,  8.6159e-02,
           1.0317e-01,  5.5134e-01],
         [ 9.7380e-02, -2.2066e-01,  7.2450e-02,  ..., -1.1014e-01,
          -1.1022e-01,  9.6691e-01],
         [ 2.1606e-02,  4.5142e-01, -1.1927e-03,  ...,  7.1988e-01,
          -6.20

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.3779,  0.2824,  0.4696,  ..., -0.7693,  0.5022, -0.1011],
         [ 0.3209,  0.5958,  0.6379,  ..., -0.6993,  0.4354, -0.3916],
         [-0.1091,  0.9090,  1.6797,  ..., -1.2381,  0.5171, -1.6332],
         ...,
         [-0.0738,  0.2830,  0.5140,  ..., -0.3171,  0.4817, -0.3012],
         [-0.0428,  0.1652,  0.6403,  ..., -0.3402,  0.4499, -0.2789],
         [ 0.4694, -0.0448,  0.5070,  ..., -0.3235,  0.0843, -0.6936]],

        [[-0.3779,  0.2824,  0.4696,  ..., -0.7693,  0.5022, -0.1011],
         [ 0.3209,  0.5958,  0.6379,  ..., -0.6993,  0.4354, -0.3916],
         [-0.1091,  0.9090,  1.6797,  ..., -1.2381,  0.5171, -1.6332],
         ...,
         [-0.0738,  0.2830,  0.5140,  ..., -0.3171,  0.4817, -0.3012],
         [-0.0428,  0.1652,  0.6403,  ..., -0.3402,  0.4499, -0.2789],
         [ 0.4694, -0.0448,  0.5070,  ..., -0.3235,  0.0843, -0.6936]]],
       device='cuda:0'), pooler_output=tensor([[-0.1121

[33m[W 2022-03-26 01:21:19,028][0m Trial 8 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vo

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-8.8572e-02, -2.6079e-01, -5.9919e-01,  ..., -3.1419e-01,
           4.5597e-01,  5.1222e-01],
         [-1.0169e-01, -4.2126e-01, -1.1466e-01,  ..., -4.1886e-01,
           3.4792e-01, -4.3696e-02],
         [-3.7280e-01,  1.2168e-01, -9.8038e-01,  ..., -2.6773e-01,
          -2.7129e-01, -2.2560e-01],
         ...,
         [ 2.2391e-01,  8.5789e-02, -4.1589e-02,  ..., -1.6862e-01,
           3.8245e-04, -3.2946e-01],
         [ 3.5468e-01,  1.4194e-01, -1.1064e-02,  ..., -1.0901e-01,
          -1.2609e-01, -2.7745e-01],
         [ 3.8899e-01, -2.0866e-01,  1.1306e-01,  ..., -1.0670e-01,
           8.3567e-02, -4.8114e-02]],

        [[-8.8572e-02, -2.6079e-01, -5.9919e-01,  ..., -3.1419e-01,
           4.5597e-01,  5.1222e-01],
         [-1.0169e-01, -4.2126e-01, -1.1466e-01,  ..., -4.1886e-01,
           3.4792e-01, -4.3696e-02],
         [-3.7280e-01,  1.2168e-01, -9.8038e-01,  ..., -2.6773e-01,
          -2.71

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-2.4439e-01,  2.8140e-02, -2.5559e-01,  ..., -2.7993e-02,
           2.3370e-01,  4.8980e-01],
         [ 7.7581e-02,  1.8371e-01, -6.0333e-01,  ...,  6.3095e-02,
           6.3895e-01,  1.0896e-01],
         [-2.9460e-01, -1.8779e-01, -3.8193e-01,  ...,  3.6827e-01,
          -2.1987e-01, -3.7949e-02],
         ...,
         [-4.5582e-01, -6.4631e-01, -3.2408e-01,  ...,  4.6285e-01,
           6.0086e-03, -2.4268e-01],
         [ 2.0729e-01,  3.5272e-02, -7.1844e-02,  ...,  3.3741e-01,
          -1.7425e-01, -1.2311e-01],
         [ 1.4274e-01, -1.3957e-02, -1.1678e-01,  ...,  2.8427e-01,
          -1.4939e-01, -9.0316e-02]],

        [[-2.4439e-01,  2.8140e-02, -2.5559e-01,  ..., -2.7993e-02,
           2.3370e-01,  4.8980e-01],
         [ 7.7581e-02,  1.8371e-01, -6.0333e-01,  ...,  6.3095e-02,
           6.3895e-01,  1.0896e-01],
         [-2.9460e-01, -1.8779e-01, -3.8193e-01,  ...,  3.6827e-01,
          -2.19

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-8.4288e-02,  2.7816e-01, -4.6082e-01,  ..., -2.7602e-02,
           1.2577e-01,  4.0787e-01],
         [ 8.4666e-02, -2.8751e-01,  1.9922e-01,  ..., -1.8205e-01,
           2.9177e-02,  1.0177e+00],
         [ 2.3841e-01,  7.0210e-01, -4.3730e-01,  ...,  6.7342e-01,
          -4.2315e-01, -9.0788e-02],
         ...,
         [-4.9876e-01, -2.9365e-01,  2.5850e-01,  ...,  2.5365e-01,
           1.8860e-01,  5.0011e-01],
         [-3.6759e-01, -6.4677e-02,  3.0236e-01,  ...,  2.6321e-01,
           2.9853e-01,  3.6098e-01],
         [-4.6434e-01, -1.7081e-01,  3.1845e-01,  ...,  2.7729e-01,
           2.5640e-01,  3.7178e-01]],

        [[-8.4288e-02,  2.7816e-01, -4.6082e-01,  ..., -2.7602e-02,
           1.2577e-01,  4.0787e-01],
         [ 8.4666e-02, -2.8751e-01,  1.9922e-01,  ..., -1.8205e-01,
           2.9177e-02,  1.0177e+00],
         [ 2.3841e-01,  7.0210e-01, -4.3730e-01,  ...,  6.7342e-01,
          -4.23

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.0854,  0.1325,  0.3584,  ..., -0.7015,  0.4394,  0.0658],
         [ 0.3685,  0.3944,  0.6879,  ..., -0.4486,  0.5166, -0.2875],
         [ 0.1140,  0.7185,  1.1626,  ..., -0.9481,  0.7923, -1.3303],
         ...,
         [ 0.0742,  0.1218,  0.5088,  ..., -0.2691,  0.3206, -0.1621],
         [ 0.1147,  0.0602,  0.6132,  ..., -0.3508,  0.3417, -0.1637],
         [ 0.5411, -0.1445,  0.5189,  ..., -0.2444,  0.0760, -0.5535]],

        [[-0.0854,  0.1325,  0.3584,  ..., -0.7015,  0.4394,  0.0658],
         [ 0.3685,  0.3944,  0.6879,  ..., -0.4486,  0.5166, -0.2875],
         [ 0.1140,  0.7185,  1.1626,  ..., -0.9481,  0.7923, -1.3303],
         ...,
         [ 0.0742,  0.1218,  0.5088,  ..., -0.2691,  0.3206, -0.1621],
         [ 0.1147,  0.0602,  0.6132,  ..., -0.3508,  0.3417, -0.1637],
         [ 0.5411, -0.1445,  0.5189,  ..., -0.2444,  0.0760, -0.5535]]],
       device='cuda:0'), pooler_output=tensor([[-0.0219

[33m[W 2022-03-26 01:21:24,294][0m Trial 9 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vo

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.0795, -0.2022, -0.5618,  ..., -0.3108,  0.3791,  0.5245],
         [-0.1828, -0.4657, -0.1302,  ..., -0.4703,  0.2716, -0.0402],
         [-0.3980,  0.1138, -0.9657,  ..., -0.2414, -0.3242, -0.1720],
         ...,
         [ 0.1930,  0.0858, -0.0164,  ..., -0.1629,  0.0088, -0.3370],
         [ 0.3374,  0.1390,  0.0028,  ..., -0.1075, -0.1190, -0.2769],
         [ 0.3656, -0.2279,  0.1314,  ..., -0.1329,  0.0556, -0.0552]],

        [[-0.0795, -0.2022, -0.5618,  ..., -0.3108,  0.3791,  0.5245],
         [-0.1828, -0.4657, -0.1302,  ..., -0.4703,  0.2716, -0.0402],
         [-0.3980,  0.1138, -0.9657,  ..., -0.2414, -0.3242, -0.1720],
         ...,
         [ 0.1930,  0.0858, -0.0164,  ..., -0.1629,  0.0088, -0.3370],
         [ 0.3374,  0.1390,  0.0028,  ..., -0.1075, -0.1190, -0.2769],
         [ 0.3656, -0.2279,  0.1314,  ..., -0.1329,  0.0556, -0.0552]],

        [[-0.0632, -0.1202,  0.0142,  ..., -0.3424,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-1.8891e-01,  7.0732e-02, -1.8843e-01,  ..., -4.6802e-02,
           2.0766e-01,  5.4658e-01],
         [ 3.9006e-02,  2.2949e-01, -5.6345e-01,  ...,  3.8312e-02,
           6.3621e-01,  9.5303e-02],
         [-3.4236e-01, -1.4494e-01, -2.9911e-01,  ...,  3.3087e-01,
          -1.9471e-01, -6.1993e-06],
         ...,
         [-4.7567e-01, -5.6624e-01, -2.5492e-01,  ...,  4.2145e-01,
          -2.5395e-02, -2.2904e-01],
         [ 2.0480e-01,  7.3017e-02, -1.9300e-02,  ...,  3.1358e-01,
          -1.6487e-01, -1.0591e-01],
         [ 1.3852e-01,  2.6166e-02, -7.0356e-02,  ...,  2.5034e-01,
          -1.4624e-01, -6.2989e-02]],

        [[-1.8891e-01,  7.0732e-02, -1.8843e-01,  ..., -4.6802e-02,
           2.0766e-01,  5.4658e-01],
         [ 3.9006e-02,  2.2949e-01, -5.6345e-01,  ...,  3.8312e-02,
           6.3621e-01,  9.5303e-02],
         [-3.4236e-01, -1.4494e-01, -2.9911e-01,  ...,  3.3087e-01,
          -1.94

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.0941,  0.2952, -0.3991,  ..., -0.0537,  0.1062,  0.4351],
         [ 0.0883, -0.2846,  0.2851,  ..., -0.1977, -0.0315,  0.9880],
         [ 0.2197,  0.6994, -0.3391,  ...,  0.6003, -0.4632, -0.0969],
         ...,
         [-0.4995, -0.2958,  0.3025,  ...,  0.2443,  0.1719,  0.4898],
         [-0.3734, -0.0683,  0.3422,  ...,  0.2473,  0.2692,  0.3335],
         [-0.4660, -0.1733,  0.3573,  ...,  0.2698,  0.2282,  0.3412]],

        [[-0.0941,  0.2952, -0.3991,  ..., -0.0537,  0.1062,  0.4351],
         [ 0.0883, -0.2846,  0.2851,  ..., -0.1977, -0.0315,  0.9880],
         [ 0.2197,  0.6994, -0.3391,  ...,  0.6003, -0.4632, -0.0969],
         ...,
         [-0.4995, -0.2958,  0.3025,  ...,  0.2443,  0.1719,  0.4898],
         [-0.3734, -0.0683,  0.3422,  ...,  0.2473,  0.2692,  0.3335],
         [-0.4660, -0.1733,  0.3573,  ...,  0.2698,  0.2282,  0.3412]],

        [[-0.3489, -0.0792, -0.6850,  ..., -0.2380,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.0866,  0.1445,  0.3885,  ..., -0.7410,  0.4187,  0.0670],
         [ 0.3750,  0.4148,  0.7579,  ..., -0.4929,  0.5226, -0.2850],
         [ 0.0666,  0.7037,  1.2127,  ..., -0.9614,  0.7405, -1.3208],
         ...,
         [ 0.0752,  0.1173,  0.5265,  ..., -0.2924,  0.3130, -0.1547],
         [ 0.1229,  0.0534,  0.6339,  ..., -0.3825,  0.3235, -0.1552],
         [ 0.5385, -0.1410,  0.5576,  ..., -0.2568,  0.0727, -0.5501]],

        [[-0.0866,  0.1445,  0.3885,  ..., -0.7410,  0.4187,  0.0670],
         [ 0.3750,  0.4148,  0.7579,  ..., -0.4929,  0.5226, -0.2850],
         [ 0.0666,  0.7037,  1.2127,  ..., -0.9614,  0.7405, -1.3208],
         ...,
         [ 0.0752,  0.1173,  0.5265,  ..., -0.2924,  0.3130, -0.1547],
         [ 0.1229,  0.0534,  0.6339,  ..., -0.3825,  0.3235, -0.1552],
         [ 0.5385, -0.1410,  0.5576,  ..., -0.2568,  0.0727, -0.5501]]],
       device='cuda:0'), pooler_output=tensor([[-0.0254

[33m[W 2022-03-26 01:21:34,180][0m Trial 10 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_v

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1514, -0.2086, -0.4468,  ..., -0.1247,  0.5345,  0.0910],
         [-0.1479, -0.4143,  0.0618,  ..., -0.3607,  0.5993, -0.4234],
         [-0.2916,  0.1723, -0.9650,  ..., -0.3139, -0.1067, -0.4862],
         ...,
         [ 0.1575,  0.0489, -0.0123,  ..., -0.1650,  0.1477, -0.4660],
         [ 0.3179,  0.1245,  0.0124,  ..., -0.0791,  0.0376, -0.3813],
         [ 0.2846, -0.1443,  0.2802,  ..., -0.0612,  0.1660, -0.2261]],

        [[-0.1514, -0.2086, -0.4468,  ..., -0.1247,  0.5345,  0.0910],
         [-0.1479, -0.4143,  0.0618,  ..., -0.3607,  0.5993, -0.4234],
         [-0.2916,  0.1723, -0.9650,  ..., -0.3139, -0.1067, -0.4862],
         ...,
         [ 0.1575,  0.0489, -0.0123,  ..., -0.1650,  0.1477, -0.4660],
         [ 0.3179,  0.1245,  0.0124,  ..., -0.0791,  0.0376, -0.3813],
         [ 0.2846, -0.1443,  0.2802,  ..., -0.0612,  0.1660, -0.2261]],

        [[-0.0977, -0.1787,  0.0416,  ..., -0.4263, -0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2637,  0.2026, -0.1490,  ...,  0.0810,  0.2816,  0.2965],
         [ 0.0564,  0.1648, -0.5481,  ...,  0.0833,  0.7855,  0.0161],
         [-0.2047, -0.0234, -0.3958,  ...,  0.6570, -0.1542, -0.1992],
         ...,
         [-0.4270, -0.5067, -0.5175,  ...,  0.6122, -0.0667, -0.2712],
         [ 0.2315,  0.1264, -0.1835,  ...,  0.4913, -0.2025, -0.1445],
         [ 0.1677,  0.0675, -0.2472,  ...,  0.4145, -0.1555, -0.0852]],

        [[-0.2637,  0.2026, -0.1490,  ...,  0.0810,  0.2816,  0.2965],
         [ 0.0564,  0.1648, -0.5481,  ...,  0.0833,  0.7855,  0.0161],
         [-0.2047, -0.0234, -0.3958,  ...,  0.6570, -0.1542, -0.1992],
         ...,
         [-0.4270, -0.5067, -0.5175,  ...,  0.6122, -0.0667, -0.2712],
         [ 0.2315,  0.1264, -0.1835,  ...,  0.4913, -0.2025, -0.1445],
         [ 0.1677,  0.0675, -0.2472,  ...,  0.4145, -0.1555, -0.0852]],

        [[-0.0985,  0.2302, -0.1554,  ..., -0.5645,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[ 0.0892,  0.3182, -0.5097,  ...,  0.1776,  0.1845,  0.3383],
         [ 0.0672, -0.2003,  0.1173,  ..., -0.0180,  0.1781,  1.0440],
         [ 0.5417,  0.7580, -0.4836,  ...,  0.9810, -0.2443, -0.0045],
         ...,
         [-0.4140, -0.2031,  0.3143,  ...,  0.3529,  0.2242,  0.5927],
         [-0.2852,  0.0192,  0.3344,  ...,  0.3608,  0.3911,  0.4605],
         [-0.3786, -0.0525,  0.3372,  ...,  0.3602,  0.3427,  0.4902]],

        [[ 0.0892,  0.3182, -0.5097,  ...,  0.1776,  0.1845,  0.3383],
         [ 0.0672, -0.2003,  0.1173,  ..., -0.0180,  0.1781,  1.0440],
         [ 0.5417,  0.7580, -0.4836,  ...,  0.9810, -0.2443, -0.0045],
         ...,
         [-0.4140, -0.2031,  0.3143,  ...,  0.3529,  0.2242,  0.5927],
         [-0.2852,  0.0192,  0.3344,  ...,  0.3608,  0.3911,  0.4605],
         [-0.3786, -0.0525,  0.3372,  ...,  0.3602,  0.3427,  0.4902]],

        [[-0.3466, -0.2126, -0.9734,  ..., -0.2707, -0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1480,  0.0845,  0.3793,  ..., -0.7629,  0.4018, -0.0447],
         [ 0.4363,  0.3630,  0.5979,  ..., -0.4996,  0.4837, -0.3742],
         [ 0.1660,  0.7821,  1.2051,  ..., -0.9635,  0.8370, -1.3430],
         ...,
         [ 0.0466,  0.1604,  0.4853,  ..., -0.2871,  0.4230, -0.2575],
         [ 0.0672,  0.0667,  0.5940,  ..., -0.3417,  0.4162, -0.2288],
         [ 0.5422, -0.1337,  0.4810,  ..., -0.2768,  0.0918, -0.6442]],

        [[-0.1480,  0.0845,  0.3793,  ..., -0.7629,  0.4018, -0.0447],
         [ 0.4363,  0.3630,  0.5979,  ..., -0.4996,  0.4837, -0.3742],
         [ 0.1660,  0.7821,  1.2051,  ..., -0.9635,  0.8370, -1.3430],
         ...,
         [ 0.0466,  0.1604,  0.4853,  ..., -0.2871,  0.4230, -0.2575],
         [ 0.0672,  0.0667,  0.5940,  ..., -0.3417,  0.4162, -0.2288],
         [ 0.5422, -0.1337,  0.4810,  ..., -0.2768,  0.0918, -0.6442]]],
       device='cuda:0'), pooler_output=tensor([[-0.0392

[33m[W 2022-03-26 01:21:40,072][0m Trial 11 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_v

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-1.4682e-01, -2.0528e-01, -5.5413e-01,  ..., -3.8562e-01,
           4.0493e-01,  5.1016e-01],
         [-1.8047e-02, -3.9114e-01, -1.4780e-01,  ..., -3.6345e-01,
           1.5203e-01, -6.8398e-02],
         [-3.5983e-01,  2.4201e-02, -8.3390e-01,  ..., -1.7947e-01,
          -2.5585e-01, -9.3443e-02],
         ...,
         [ 1.8176e-01,  9.8979e-02,  3.9428e-02,  ..., -1.4163e-01,
          -2.4486e-02, -2.6201e-01],
         [ 3.0636e-01,  1.3734e-01,  6.6770e-02,  ..., -1.0562e-01,
          -1.5593e-01, -2.1651e-01],
         [ 4.1598e-01, -2.0866e-01,  8.2923e-02,  ..., -1.5343e-01,
           7.0099e-02, -3.1753e-02]],

        [[-1.4682e-01, -2.0528e-01, -5.5413e-01,  ..., -3.8562e-01,
           4.0493e-01,  5.1016e-01],
         [-1.8047e-02, -3.9114e-01, -1.4780e-01,  ..., -3.6345e-01,
           1.5203e-01, -6.8398e-02],
         [-3.5983e-01,  2.4201e-02, -8.3390e-01,  ..., -1.7947e-01,
          -2.55

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2521, -0.0355, -0.2985,  ..., -0.0889,  0.2318,  0.4824],
         [ 0.0749,  0.1396, -0.5679,  ...,  0.0470,  0.4794,  0.0253],
         [-0.3769, -0.2951, -0.3401,  ...,  0.1515, -0.1683, -0.0053],
         ...,
         [-0.4567, -0.6161, -0.1698,  ...,  0.3460,  0.0489, -0.2625],
         [ 0.1256, -0.0696,  0.0430,  ...,  0.2244, -0.1114, -0.1089],
         [ 0.0635, -0.1085,  0.0048,  ...,  0.1767, -0.1020, -0.0833]],

        [[-0.2521, -0.0355, -0.2985,  ..., -0.0889,  0.2318,  0.4824],
         [ 0.0749,  0.1396, -0.5679,  ...,  0.0470,  0.4794,  0.0253],
         [-0.3769, -0.2951, -0.3401,  ...,  0.1515, -0.1683, -0.0053],
         ...,
         [-0.4567, -0.6161, -0.1698,  ...,  0.3460,  0.0489, -0.2625],
         [ 0.1256, -0.0696,  0.0430,  ...,  0.2244, -0.1114, -0.1089],
         [ 0.0635, -0.1085,  0.0048,  ...,  0.1767, -0.1020, -0.0833]],

        [[-0.1926,  0.0991, -0.1526,  ..., -0.3596,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2138,  0.2720, -0.3887,  ..., -0.1380,  0.1069,  0.4353],
         [ 0.1032, -0.3483,  0.3228,  ..., -0.2260, -0.0521,  0.8697],
         [ 0.0878,  0.5567, -0.3118,  ...,  0.3525, -0.4489, -0.1763],
         ...,
         [-0.5111, -0.3376,  0.2673,  ...,  0.2170,  0.1881,  0.3760],
         [-0.3815, -0.1249,  0.3227,  ...,  0.2071,  0.2461,  0.2282],
         [-0.4716, -0.2327,  0.3366,  ...,  0.2312,  0.2228,  0.2235]],

        [[-0.2138,  0.2720, -0.3887,  ..., -0.1380,  0.1069,  0.4353],
         [ 0.1032, -0.3483,  0.3228,  ..., -0.2260, -0.0521,  0.8697],
         [ 0.0878,  0.5567, -0.3118,  ...,  0.3525, -0.4489, -0.1763],
         ...,
         [-0.5111, -0.3376,  0.2673,  ...,  0.2170,  0.1881,  0.3760],
         [-0.3815, -0.1249,  0.3227,  ...,  0.2071,  0.2461,  0.2282],
         [-0.4716, -0.2327,  0.3366,  ...,  0.2312,  0.2228,  0.2235]],

        [[-0.3165,  0.0461, -0.4889,  ..., -0.2767,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1141,  0.1743,  0.3471,  ..., -0.6983,  0.4526,  0.1214],
         [ 0.3063,  0.3675,  0.7800,  ..., -0.4619,  0.5309, -0.2078],
         [ 0.0071,  0.5589,  1.0070,  ..., -0.8812,  0.6197, -1.2551],
         ...,
         [ 0.0729,  0.1019,  0.5014,  ..., -0.2664,  0.2445, -0.0958],
         [ 0.1253,  0.0611,  0.6180,  ..., -0.3762,  0.2781, -0.1187],
         [ 0.4820, -0.1456,  0.5480,  ..., -0.2263,  0.0622, -0.4698]],

        [[-0.1141,  0.1743,  0.3471,  ..., -0.6983,  0.4526,  0.1214],
         [ 0.3063,  0.3675,  0.7800,  ..., -0.4619,  0.5309, -0.2078],
         [ 0.0071,  0.5589,  1.0070,  ..., -0.8812,  0.6197, -1.2551],
         ...,
         [ 0.0729,  0.1019,  0.5014,  ..., -0.2664,  0.2445, -0.0958],
         [ 0.1253,  0.0611,  0.6180,  ..., -0.3762,  0.2781, -0.1187],
         [ 0.4820, -0.1456,  0.5480,  ..., -0.2263,  0.0622, -0.4698]]],
       device='cuda:0'), pooler_output=tensor([[-0.0212

[33m[W 2022-03-26 01:21:46,361][0m Trial 12 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_v

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.0705, -0.2300, -0.5108,  ..., -0.1861,  0.4011,  0.3615],
         [-0.1294, -0.3325, -0.0032,  ..., -0.3852,  0.5261, -0.1708],
         [-0.4447,  0.1565, -1.0589,  ..., -0.3987, -0.2095, -0.2090],
         ...,
         [ 0.1523,  0.0478, -0.0799,  ..., -0.1968,  0.0784, -0.4286],
         [ 0.3192,  0.1094, -0.0402,  ..., -0.1395, -0.0422, -0.3451],
         [ 0.2831, -0.1760,  0.2305,  ..., -0.0992,  0.1328, -0.1564]],

        [[-0.0705, -0.2300, -0.5108,  ..., -0.1861,  0.4011,  0.3615],
         [-0.1294, -0.3325, -0.0032,  ..., -0.3852,  0.5261, -0.1708],
         [-0.4447,  0.1565, -1.0589,  ..., -0.3987, -0.2095, -0.2090],
         ...,
         [ 0.1523,  0.0478, -0.0799,  ..., -0.1968,  0.0784, -0.4286],
         [ 0.3192,  0.1094, -0.0402,  ..., -0.1395, -0.0422, -0.3451],
         [ 0.2831, -0.1760,  0.2305,  ..., -0.0992,  0.1328, -0.1564]],

        [[-0.0985, -0.1728,  0.0379,  ..., -0.3988, -0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.2049,  0.1761, -0.1157,  ..., -0.0068,  0.2558,  0.4350],
         [ 0.0060,  0.2044, -0.5654,  ...,  0.0019,  0.7925,  0.1064],
         [-0.3350, -0.0614, -0.4062,  ...,  0.4572, -0.0874,  0.0173],
         ...,
         [-0.5243, -0.5948, -0.4909,  ...,  0.4899, -0.0479, -0.3318],
         [ 0.2072,  0.1561, -0.1187,  ...,  0.3681, -0.2599, -0.1523],
         [ 0.1420,  0.0934, -0.1921,  ...,  0.2932, -0.2174, -0.1040]],

        [[-0.2049,  0.1761, -0.1157,  ..., -0.0068,  0.2558,  0.4350],
         [ 0.0060,  0.2044, -0.5654,  ...,  0.0019,  0.7925,  0.1064],
         [-0.3350, -0.0614, -0.4062,  ...,  0.4572, -0.0874,  0.0173],
         ...,
         [-0.5243, -0.5948, -0.4909,  ...,  0.4899, -0.0479, -0.3318],
         [ 0.2072,  0.1561, -0.1187,  ...,  0.3681, -0.2599, -0.1523],
         [ 0.1420,  0.0934, -0.1921,  ...,  0.2932, -0.2174, -0.1040]],

        [[-0.0405,  0.1960, -0.1808,  ..., -0.4772,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[ 0.0328,  0.3431, -0.3598,  ...,  0.0505,  0.1131,  0.3720],
         [ 0.0911, -0.1587,  0.1347,  ..., -0.0211,  0.1775,  0.9612],
         [ 0.3814,  0.7950, -0.4685,  ...,  0.8762, -0.3413,  0.0364],
         ...,
         [-0.5128, -0.2252,  0.3181,  ...,  0.2814,  0.1547,  0.5517],
         [-0.3848, -0.0367,  0.3443,  ...,  0.2983,  0.3317,  0.4208],
         [-0.5017, -0.1488,  0.3669,  ...,  0.3129,  0.2741,  0.4538]],

        [[ 0.0328,  0.3431, -0.3598,  ...,  0.0505,  0.1131,  0.3720],
         [ 0.0911, -0.1587,  0.1347,  ..., -0.0211,  0.1775,  0.9612],
         [ 0.3814,  0.7950, -0.4685,  ...,  0.8762, -0.3413,  0.0364],
         ...,
         [-0.5128, -0.2252,  0.3181,  ...,  0.2814,  0.1547,  0.5517],
         [-0.3848, -0.0367,  0.3443,  ...,  0.2983,  0.3317,  0.4208],
         [-0.5017, -0.1488,  0.3669,  ...,  0.3129,  0.2741,  0.4538]],

        [[-0.3464, -0.1675, -0.8719,  ..., -0.2536,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1393,  0.0610,  0.3929,  ..., -0.7251,  0.4059,  0.0384],
         [ 0.3842,  0.3358,  0.5477,  ..., -0.3858,  0.4742, -0.2679],
         [ 0.1176,  0.7730,  1.1207,  ..., -0.8922,  0.7839, -1.1328],
         ...,
         [ 0.0644,  0.1271,  0.5202,  ..., -0.2780,  0.4218, -0.2198],
         [ 0.0784,  0.0462,  0.6109,  ..., -0.3131,  0.4269, -0.2058],
         [ 0.5390, -0.1678,  0.4459,  ..., -0.2328,  0.0925, -0.6567]],

        [[-0.1393,  0.0610,  0.3929,  ..., -0.7251,  0.4059,  0.0384],
         [ 0.3842,  0.3358,  0.5477,  ..., -0.3858,  0.4742, -0.2679],
         [ 0.1176,  0.7730,  1.1207,  ..., -0.8922,  0.7839, -1.1328],
         ...,
         [ 0.0644,  0.1271,  0.5202,  ..., -0.2780,  0.4218, -0.2198],
         [ 0.0784,  0.0462,  0.6109,  ..., -0.3131,  0.4269, -0.2058],
         [ 0.5390, -0.1678,  0.4459,  ..., -0.2328,  0.0925, -0.6567]]],
       device='cuda:0'), pooler_output=tensor([[-0.0433

[33m[W 2022-03-26 01:21:52,771][0m Trial 13 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_v

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-6.4431e-02, -2.2013e-01, -5.3843e-01,  ..., -3.6218e-01,
           3.8725e-01,  5.2708e-01],
         [-7.6868e-02, -4.0266e-01, -1.3879e-01,  ..., -3.9501e-01,
           1.7461e-01, -5.7843e-02],
         [-3.3764e-01,  6.0385e-02, -9.0154e-01,  ..., -1.9320e-01,
          -3.1183e-01, -1.4660e-01],
         ...,
         [ 2.0175e-01,  1.0002e-01,  5.0412e-04,  ..., -1.4914e-01,
          -2.8203e-02, -2.9429e-01],
         [ 3.2479e-01,  1.4615e-01,  2.3697e-02,  ..., -1.0675e-01,
          -1.5636e-01, -2.3825e-01],
         [ 4.1073e-01, -2.0400e-01,  1.0742e-01,  ..., -1.2736e-01,
           4.2683e-02, -3.6294e-02]],

        [[-6.4431e-02, -2.2013e-01, -5.3843e-01,  ..., -3.6218e-01,
           3.8725e-01,  5.2708e-01],
         [-7.6868e-02, -4.0266e-01, -1.3879e-01,  ..., -3.9501e-01,
           1.7461e-01, -5.7843e-02],
         [-3.3764e-01,  6.0385e-02, -9.0154e-01,  ..., -1.9320e-01,
          -3.11

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-2.0255e-01, -1.4538e-02, -2.3300e-01,  ..., -9.5135e-02,
           2.1354e-01,  5.1477e-01],
         [ 8.4496e-02,  1.9199e-01, -5.7458e-01,  ...,  4.6918e-02,
           5.3658e-01,  6.0621e-02],
         [-3.3431e-01, -2.3411e-01, -3.3534e-01,  ...,  2.2134e-01,
          -2.2045e-01, -6.6644e-03],
         ...,
         [-4.5754e-01, -5.9555e-01, -2.1278e-01,  ...,  3.7893e-01,
           3.4800e-03, -2.5838e-01],
         [ 1.6968e-01,  9.5635e-04,  5.7555e-03,  ...,  2.5996e-01,
          -1.4924e-01, -1.0204e-01],
         [ 1.0259e-01, -4.3950e-02, -3.4139e-02,  ...,  2.0756e-01,
          -1.3383e-01, -7.0147e-02]],

        [[-2.0255e-01, -1.4538e-02, -2.3300e-01,  ..., -9.5135e-02,
           2.1354e-01,  5.1477e-01],
         [ 8.4496e-02,  1.9199e-01, -5.7458e-01,  ...,  4.6918e-02,
           5.3658e-01,  6.0621e-02],
         [-3.3431e-01, -2.3411e-01, -3.3534e-01,  ...,  2.2134e-01,
          -2.20

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1588,  0.2738, -0.4136,  ..., -0.1092,  0.1008,  0.4366],
         [ 0.1036, -0.3272,  0.3113,  ..., -0.2081, -0.0347,  0.9331],
         [ 0.1465,  0.6188, -0.3417,  ...,  0.4720, -0.4607, -0.1517],
         ...,
         [-0.5170, -0.3284,  0.2795,  ...,  0.2295,  0.1693,  0.4249],
         [-0.3873, -0.1162,  0.3253,  ...,  0.2292,  0.2458,  0.2752],
         [-0.4762, -0.2223,  0.3401,  ...,  0.2496,  0.2186,  0.2754]],

        [[-0.1588,  0.2738, -0.4136,  ..., -0.1092,  0.1008,  0.4366],
         [ 0.1036, -0.3272,  0.3113,  ..., -0.2081, -0.0347,  0.9331],
         [ 0.1465,  0.6188, -0.3417,  ...,  0.4720, -0.4607, -0.1517],
         ...,
         [-0.5170, -0.3284,  0.2795,  ...,  0.2295,  0.1693,  0.4249],
         [-0.3873, -0.1162,  0.3253,  ...,  0.2292,  0.2458,  0.2752],
         [-0.4762, -0.2223,  0.3401,  ...,  0.2496,  0.2186,  0.2754]],

        [[-0.3138, -0.0048, -0.5639,  ..., -0.2506,  0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.0935,  0.1483,  0.3707,  ..., -0.7030,  0.4240,  0.1048],
         [ 0.3364,  0.3627,  0.7308,  ..., -0.4382,  0.5274, -0.2332],
         [ 0.0669,  0.6130,  1.1010,  ..., -0.9100,  0.7008, -1.2720],
         ...,
         [ 0.0746,  0.1042,  0.5203,  ..., -0.2642,  0.2719, -0.1258],
         [ 0.1232,  0.0542,  0.6288,  ..., -0.3650,  0.2982, -0.1356],
         [ 0.5140, -0.1505,  0.5527,  ..., -0.2345,  0.0628, -0.5083]],

        [[-0.0935,  0.1483,  0.3707,  ..., -0.7030,  0.4240,  0.1048],
         [ 0.3364,  0.3627,  0.7308,  ..., -0.4382,  0.5274, -0.2332],
         [ 0.0669,  0.6130,  1.1010,  ..., -0.9100,  0.7008, -1.2720],
         ...,
         [ 0.0746,  0.1042,  0.5203,  ..., -0.2642,  0.2719, -0.1258],
         [ 0.1232,  0.0542,  0.6288,  ..., -0.3650,  0.2982, -0.1356],
         [ 0.5140, -0.1505,  0.5527,  ..., -0.2345,  0.0628, -0.5083]]],
       device='cuda:0'), pooler_output=tensor([[-0.0167

[33m[W 2022-03-26 01:21:57,958][0m Trial 14 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_v

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1490, -0.2459, -0.4779,  ..., -0.1154,  0.4700,  0.1368],
         [-0.0908, -0.2638,  0.0652,  ..., -0.2286,  0.6159, -0.4538],
         [-0.2592,  0.2032, -0.9556,  ..., -0.3883, -0.1381, -0.5081],
         ...,
         [ 0.1254,  0.0721, -0.0368,  ..., -0.1981,  0.1134, -0.5008],
         [ 0.3250,  0.1333, -0.0110,  ..., -0.1298,  0.0086, -0.4079],
         [ 0.2786, -0.1278,  0.3123,  ..., -0.0835,  0.1869, -0.3085]],

        [[-0.1490, -0.2459, -0.4779,  ..., -0.1154,  0.4700,  0.1368],
         [-0.0908, -0.2638,  0.0652,  ..., -0.2286,  0.6159, -0.4538],
         [-0.2592,  0.2032, -0.9556,  ..., -0.3883, -0.1381, -0.5081],
         ...,
         [ 0.1254,  0.0721, -0.0368,  ..., -0.1981,  0.1134, -0.5008],
         [ 0.3250,  0.1333, -0.0110,  ..., -0.1298,  0.0086, -0.4079],
         [ 0.2786, -0.1278,  0.3123,  ..., -0.0835,  0.1869, -0.3085]],

        [[-0.0976, -0.1440,  0.0180,  ..., -0.4225, -0.

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-2.6047e-01,  2.0915e-01, -2.0616e-01,  ...,  8.2375e-02,
           2.3628e-01,  2.0943e-01],
         [ 6.9993e-02,  1.2056e-01, -5.6777e-01,  ...,  1.2200e-01,
           8.6300e-01, -1.1315e-02],
         [-1.9274e-01,  3.1090e-03, -5.6625e-01,  ...,  6.4166e-01,
           3.6889e-02, -1.9726e-01],
         ...,
         [-5.5192e-01, -5.6987e-01, -6.8050e-01,  ...,  5.7830e-01,
          -1.7776e-02, -3.4124e-01],
         [ 1.7116e-01,  1.8767e-01, -2.7399e-01,  ...,  4.8416e-01,
          -2.0873e-01, -1.3187e-01],
         [ 1.2975e-01,  1.3226e-01, -3.5637e-01,  ...,  3.7880e-01,
          -1.7287e-01, -6.9498e-02]],

        [[-2.6047e-01,  2.0915e-01, -2.0616e-01,  ...,  8.2375e-02,
           2.3628e-01,  2.0943e-01],
         [ 6.9993e-02,  1.2056e-01, -5.6777e-01,  ...,  1.2200e-01,
           8.6300e-01, -1.1315e-02],
         [-1.9274e-01,  3.1090e-03, -5.6625e-01,  ...,  6.4166e-01,
           3.68

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[ 1.1830e-01,  2.6755e-01, -5.4004e-01,  ...,  1.4662e-01,
           9.0053e-02,  3.1347e-01],
         [ 2.2560e-01, -1.6542e-01,  2.5995e-02,  ...,  1.3874e-01,
           3.4007e-01,  8.5501e-01],
         [ 6.1818e-01,  6.6266e-01, -5.4409e-01,  ...,  9.5837e-01,
          -2.8103e-01,  2.0609e-02],
         ...,
         [-4.1178e-01, -2.2249e-01,  2.3812e-01,  ...,  2.8420e-01,
           1.9291e-01,  6.0341e-01],
         [-3.0063e-01,  1.5860e-04,  2.7516e-01,  ...,  2.8905e-01,
           3.9215e-01,  4.9266e-01],
         [-4.0837e-01, -9.3404e-02,  2.9836e-01,  ...,  2.7447e-01,
           3.3021e-01,  5.2722e-01]],

        [[ 1.1830e-01,  2.6755e-01, -5.4004e-01,  ...,  1.4662e-01,
           9.0053e-02,  3.1347e-01],
         [ 2.2560e-01, -1.6542e-01,  2.5995e-02,  ...,  1.3874e-01,
           3.4007e-01,  8.5501e-01],
         [ 6.1818e-01,  6.6266e-01, -5.4409e-01,  ...,  9.5837e-01,
          -2.81

evaluate trials
BaseModelOutputWithPooling(last_hidden_state=tensor([[[-0.1783,  0.0757,  0.3210,  ..., -0.6267,  0.2842,  0.0654],
         [ 0.3826,  0.3287,  0.4941,  ..., -0.4334,  0.3943, -0.2983],
         [ 0.1732,  0.9288,  1.2006,  ..., -0.7994,  0.6691, -1.1489],
         ...,
         [ 0.0438,  0.2019,  0.4674,  ..., -0.1616,  0.3687, -0.2798],
         [ 0.0242,  0.0713,  0.5750,  ..., -0.2297,  0.3579, -0.2498],
         [ 0.5444, -0.1255,  0.3838,  ..., -0.2626,  0.0704, -0.6563]],

        [[-0.1783,  0.0757,  0.3210,  ..., -0.6267,  0.2842,  0.0654],
         [ 0.3826,  0.3287,  0.4941,  ..., -0.4334,  0.3943, -0.2983],
         [ 0.1732,  0.9288,  1.2006,  ..., -0.7994,  0.6691, -1.1489],
         ...,
         [ 0.0438,  0.2019,  0.4674,  ..., -0.1616,  0.3687, -0.2798],
         [ 0.0242,  0.0713,  0.5750,  ..., -0.2297,  0.3579, -0.2498],
         [ 0.5444, -0.1255,  0.3838,  ..., -0.2626,  0.0704, -0.6563]]],
       device='cuda:0'), pooler_output=tensor([[-0.0557

[33m[W 2022-03-26 01:22:02,735][0m Trial 15 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_v

Step,Training Loss


KeyboardInterrupt: 

In [13]:
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.schedulers import ASHAScheduler

trainer.hyperparameter_search(
    direction="maximize"
    #compute_objective=my_objective
#     resources_per_trial={
#         "cpu": 1,
#         "gpu": 1
#     },
    # Choose among many libraries:
    # https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
    #search_alg=HyperOptSearch(metric="objective", mode="max"),
    # Choose among schedulers:
    # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html
    #scheduler=ASHAScheduler(metric="objective", mode="max"),
    #n_trials=8 # number of trials
)

[32m[I 2022-03-26 09:55:28,648][0m A new study created in memory with name: no-name-252389af-e34b-4555-8972-8009b0177622[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8


[33m[W 2022-03-26 09:55:40,009][0m Trial 0 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.17.0",
  "type_vo

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 09:55:44,667][0m Trial 1 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_att

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 09:55:49,708][0m Trial 2 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_att

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 09:55:54,191][0m Trial 3 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_att

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 09:55:59,710][0m Trial 4 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_att

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 09:56:03,676][0m Trial 5 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_att

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 09:56:08,461][0m Trial 6 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_att

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 09:56:12,880][0m Trial 7 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_att

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 09:56:16,916][0m Trial 8 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_att

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 09:56:22,019][0m Trial 9 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_att

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 09:56:25,704][0m Trial 10 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_at

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-26 09:56:31,724][0m Trial 11 failed, because the value None could not be cast to float.[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_at

Step,Training Loss


KeyboardInterrupt: 

In [16]:
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.schedulers import ASHAScheduler

trainer.hyperparameter_search(
    direction="maximize",
    backend="ray"
    #compute_objective=my_objective
#     resources_per_trial={
#         "cpu": 1,
#         "gpu": 1
#     },
    # Choose among many libraries:
    # https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
    #search_alg=HyperOptSearch(metric="objective", mode="max"),
    # Choose among schedulers:
    # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html
    #scheduler=ASHAScheduler(metric="objective", mode="max"),
    #n_trials=8 # number of trials
)

No `resources_per_trial` arg was passed into `hyperparameter_search`. Setting it to a default value of 1 CPU and 1 GPU for each trial.


TypeError: cannot pickle 'torch._C.Generator' object

In [14]:
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.schedulers import ASHAScheduler

trainer.hyperparameter_search(
    direction="maximize", 
    backend="optuna",
#     resources_per_trial={
#         "cpu": 1,
#         "gpu": 1
#     },
    # Choose among many libraries:
    # https://docs.ray.io/en/latest/tune/api_docs/suggestion.html
    #search_alg=HyperOptSearch(metric="objective", mode="max"),
    # Choose among schedulers:
    # https://docs.ray.io/en/latest/tune/api_docs/schedulers.html
    #scheduler=ASHAScheduler(metric="objective", mode="max"),
    #n_trials=8 # number of trials
)

[32m[I 2022-03-25 17:12:20,441][0m A new study created in memory with name: no-name-83983eac-8c57-4a93-a391-1b39af9da1e2[0m
Trial:
loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at C:\Users\ng-ka/.cache\huggingface\transformers\3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "

Step,Training Loss




Training completed. Do not forget to share your model on huggingface.co/models =)


***** Running Evaluation *****
  Num examples = 25
  Batch size = 8
[33m[W 2022-03-25 17:12:26,037][0m Trial 0 failed because of the following error: ValueError('too many values to unpack (expected 2)')[0m
Traceback (most recent call last):
  File "C:\Users\ng-ka\anaconda3\envs\cl-distilled\lib\site-packages\optuna\study\_optimize.py", line 213, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\ng-ka\anaconda3\envs\cl-distilled\lib\site-packages\transformers\integrations.py", line 157, in _objective
    metrics = trainer.evaluate()
  File "C:\Users\ng-ka\anaconda3\envs\cl-distilled\lib\site-packages\transformers\trainer.py", line 2257, in evaluate
    output = eval_loop(
  File "C:\Users\ng-ka\anaconda3\envs\cl-distilled\lib\site-packages\transformers\trainer.py", line 2431, in evaluation_loop
    loss, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore

input ids before
torch.Size([8, 2, 32])
tensor([[[  101, 16272,  2001,  2019, 11295,  3017,  1998,  2764,  1999,  1996,
           7364, 26866,  1037,  2177,  1997,  2714,  3241,  4177,  1012,   102,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0],
         [  101, 16272,  2001,  2019, 11295,  3017,  1998,  2764,  1999,  1996,
           7364, 26866,  1037,  2177,  1997,  2714,  3241,  4177,  1012,   102,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0]],

        [[  101,  4821,  2023,  2150,  7262,  1998,  4675, 21572, 26638,  1012,
            102,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
              0,     0],
         [  101,  4821,  2023,  2150,  7262,  1998,  4675, 21572, 26638,  1012,
            102,     0,     0,     0,     0,     0,     0,     0,     0,     0,
   

ValueError: too many values to unpack (expected 2)

In [None]:
model_path = 'trained_model/bert_cl'

#train_result = trainer.train(model_path=model_path)
train_result = trainer.train()

## 4. Evaluate Bert CL Model performance

In [None]:
model.eval()

results = evaluate_model()
results

In [None]:
    print(results["STS12"]["all"]["spearman"].keys())
    print('STS12 mean: ', results["STS12"]["all"]["spearman"]["mean"])
    print('STS12 wmean: ', results["STS12"]["all"]["spearman"]["wmean"])
    print('STS13 mean: ', results["STS13"]["all"]["spearman"]["mean"])
    print('STS13 wmean: ', results["STS13"]["all"]["spearman"]["wmean"])
    print('STS14 mean: ', results["STS14"]["all"]["spearman"]["mean"])
    print('STS14 wmean: ', results["STS14"]["all"]["spearman"]["wmean"])
    print('STS15 mean: ', results["STS15"]["all"]["spearman"]["mean"])
    print('STS15 wmean: ', results["STS15"]["all"]["spearman"]["wmean"])
    print('STSB: ', results["STSBenchmark"]["spearman"])