In [1]:
import dataclasses
import logging
import os
import sys
from dataclasses import dataclass, field
from typing import Dict, Optional

import numpy as np

In [2]:
from transformers import (
    AutoConfig,
    AutoTokenizer,
    T5Tokenizer,
    EvalPrediction,
    HfArgumentParser,
    Trainer,
    TrainingArguments,
    T5ForConditionalGeneration,
    set_seed )

In [3]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [4]:
from utils_glue_T5_classification import (
    T5GlueDataset, 
    T5_glue_compute_metrics, 
    T5_glue_output_modes, 
    T5_glue_tasks_num_labels,
    processors,
    Split
)

In [5]:
@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
    """

    model_name_or_path: str = field(
        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
    )
    config_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
    )
    tokenizer_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    cache_dir: Optional[str] = field(
        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
    )
        
@dataclass
class DataTrainingArguments:
    """
    Arguments pertaining to what data we are going to input our model for training and eval.
    """

    task_name: str = field(metadata={"help": "The name of the task to train on: " + ", ".join(processors.keys())})
    data_dir: str = field(metadata={"help": "Should contain the data files for the task."})
    max_seq_length: int = field(
        default=128,
        metadata={
            "help": "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded."
        },
    )
    overwrite_cache: bool = field(
        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
    )
    
    def __post_init__(self):
        self.task_name = self.task_name.lower()   

parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
logger = logging.getLogger(__name__)

MODEL_NAME = "t5-small"
DATESTAMP = "20200814"
SUPER_GLUE_DIR = "/home/keyur/medhas/glue_data/"
TASK_NAME = "CoLA"
PER_DEVICE_BATCH_SIZE = 48
EXPERIMENT_DIR="/mnt/data/medhas/glue_experiments/%s/%s"%(MODEL_NAME, DATESTAMP)

custom_sysargv = [
"--model_name_or_path=%s"%MODEL_NAME,
"--task_name=%s"%TASK_NAME,
"--do_train",
"--do_eval",
"--data_dir=%s"%os.path.join(SUPER_GLUE_DIR, TASK_NAME),
"--max_seq_length=256",
"--per_device_train_batch_size=%s"%PER_DEVICE_BATCH_SIZE,
"--per_device_eval_batch_size=%s"%PER_DEVICE_BATCH_SIZE,
"--learning_rate=2e-5",
"--num_train_epochs=100",
"--output_dir=%s"%os.path.join(EXPERIMENT_DIR, TASK_NAME),
"--logging_dir=%s/logs"%os.path.join(EXPERIMENT_DIR, TASK_NAME),
"--logging_steps=90",
"--evaluate_during_training",
"--eval_step=90",
"--save_total_limit=2",
"--save_steps=1000",
"--gradient_accumulation_steps=1",
"--overwrite_output_dir"
]

model_args, data_args, training_args = parser.parse_args_into_dataclasses(args=custom_sysargv)

# Setup logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.WARN if training_args.local_rank in [-1, 0] else logging.WARN,
)
logger.warning(
    "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
    training_args.local_rank,
    training_args.device,
    training_args.n_gpu,
    bool(training_args.local_rank != -1),
    training_args.fp16,
)
logger.info("Training/evaluation parameters %s", training_args)

set_seed(training_args.seed)
training_args.seed
num_labels = T5_glue_tasks_num_labels[data_args.task_name]
output_mode = T5_glue_output_modes[data_args.task_name]
print ("Task:", data_args.task_name, "Labels:", num_labels, ', Output', output_mode)




Task: cola Labels: 2 , Output classification


In [6]:
T5_glue_tasks_num_labels

{'sst-2': 2, 'cola': 2}

In [7]:
config = AutoConfig.from_pretrained(
        model_args.config_name if model_args.config_name else     model_args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
    )

In [8]:
tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
    )

In [9]:
model = T5ForConditionalGeneration.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
)

In [10]:
train_dataset = T5GlueDataset(data_args.data_dir, tokenizer=tokenizer, 
                    task=data_args.task_name, max_seq_length=data_args.max_seq_length, 
                    overwrite_cache=data_args.overwrite_cache, mode=Split.train,) if training_args.do_train else None

eval_dataset = T5GlueDataset(data_args.data_dir, tokenizer=tokenizer, 
                    task=data_args.task_name, max_seq_length=data_args.max_seq_length, 
                    overwrite_cache=data_args.overwrite_cache, mode=Split.dev,) if training_args.do_eval else None

test_dataset = T5GlueDataset(data_args.data_dir, tokenizer=tokenizer, 
                    task=data_args.task_name, max_seq_length=data_args.max_seq_length, 
                    overwrite_cache=data_args.overwrite_cache, mode=Split.test,) if training_args.do_predict else None

In [11]:
def compute_metrics(p: EvalPrediction) -> Dict:
    if output_mode == "classification":
        preds = np.argmax(p.predictions, axis=2)
    elif output_mode == "regression":
        preds = np.squeeze(p.predictions)
    return T5_glue_compute_metrics(data_args.task_name, preds, p.label_ids)

In [12]:
# Initialize our Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

In [13]:
if training_args.do_train:
        trainer.train(
            model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
        )
        trainer.save_model()
        # For convenience, we also re-save the tokenizer to the same directory,
        # so that you can share your model easily on huggingface.co/models =)
        if trainer.is_world_master():
            tokenizer.save_pretrained(training_args.output_dir)

HBox(children=(FloatProgress(value=0.0, description='Epoch', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…



{'loss': 3.491254238287608, 'learning_rate': 1.98e-05, 'epoch': 1.0, 'step': 90}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.8428999348120256, 'eval_mcc': 0.0, 'epoch': 1.0, 'step': 90}



  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.8509377612007989, 'learning_rate': 1.9600000000000002e-05, 'epoch': 2.0, 'step': 180}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6784975149414756, 'eval_mcc': -0.0163824847096089, 'epoch': 2.0, 'step': 180}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.7215930703613493, 'learning_rate': 1.94e-05, 'epoch': 3.0, 'step': 270}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6413661512461576, 'eval_mcc': 0.0, 'epoch': 3.0, 'step': 270}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.6989964107672374, 'learning_rate': 1.9200000000000003e-05, 'epoch': 4.0, 'step': 360}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6286214264956388, 'eval_mcc': 0.0, 'epoch': 4.0, 'step': 360}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.6813677655325996, 'learning_rate': 1.9e-05, 'epoch': 5.0, 'step': 450}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6463551250371066, 'eval_mcc': 0.0, 'epoch': 5.0, 'step': 450}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.6734462552600436, 'learning_rate': 1.88e-05, 'epoch': 6.0, 'step': 540}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6282941753214056, 'eval_mcc': 0.0, 'epoch': 6.0, 'step': 540}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.6577951861752405, 'learning_rate': 1.86e-05, 'epoch': 7.0, 'step': 630}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6138104525479403, 'eval_mcc': 0.0, 'epoch': 7.0, 'step': 630}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.6502942621707917, 'learning_rate': 1.8400000000000003e-05, 'epoch': 8.0, 'step': 720}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6049125465479764, 'eval_mcc': 0.04634607366757289, 'epoch': 8.0, 'step': 720}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.648073179854287, 'learning_rate': 1.8200000000000002e-05, 'epoch': 9.0, 'step': 810}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5975882681933317, 'eval_mcc': 0.08035097777030693, 'epoch': 9.0, 'step': 810}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.6387623210748037, 'learning_rate': 1.8e-05, 'epoch': 10.0, 'step': 900}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.587933518669822, 'eval_mcc': 0.14652882380393115, 'epoch': 10.0, 'step': 900}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.6253244307306077, 'learning_rate': 1.7800000000000002e-05, 'epoch': 11.0, 'step': 990}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.596479207277298, 'eval_mcc': 0.15295373143670263, 'epoch': 11.0, 'step': 990}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…



{'loss': 0.6316544946697024, 'learning_rate': 1.76e-05, 'epoch': 12.0, 'step': 1080}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5839484632015228, 'eval_mcc': 0.1396035147846358, 'epoch': 12.0, 'step': 1080}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.6235953119066027, 'learning_rate': 1.7400000000000003e-05, 'epoch': 13.0, 'step': 1170}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.579039763320576, 'eval_mcc': 0.14239642779351247, 'epoch': 13.0, 'step': 1170}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.612454907099406, 'learning_rate': 1.72e-05, 'epoch': 14.0, 'step': 1260}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5822232853282582, 'eval_mcc': 0.15224690038076166, 'epoch': 14.0, 'step': 1260}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.6065926584932539, 'learning_rate': 1.7e-05, 'epoch': 15.0, 'step': 1350}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5674228451468728, 'eval_mcc': 0.22774158716327836, 'epoch': 15.0, 'step': 1350}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5988545758856667, 'learning_rate': 1.6800000000000002e-05, 'epoch': 16.0, 'step': 1440}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5687035132538188, 'eval_mcc': 0.21279356029456914, 'epoch': 16.0, 'step': 1440}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.590344660811954, 'learning_rate': 1.66e-05, 'epoch': 17.0, 'step': 1530}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.560179206458005, 'eval_mcc': 0.26566367283364495, 'epoch': 17.0, 'step': 1530}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5833084228965971, 'learning_rate': 1.64e-05, 'epoch': 18.0, 'step': 1620}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5673991414633665, 'eval_mcc': 0.24836857095177145, 'epoch': 18.0, 'step': 1620}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.580294602115949, 'learning_rate': 1.62e-05, 'epoch': 19.0, 'step': 1710}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5751560709693215, 'eval_mcc': 0.2481511080069938, 'epoch': 19.0, 'step': 1710}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5668773372968038, 'learning_rate': 1.6000000000000003e-05, 'epoch': 20.0, 'step': 1800}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.57860006256537, 'eval_mcc': 0.2558216346917546, 'epoch': 20.0, 'step': 1800}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5667253288957808, 'learning_rate': 1.58e-05, 'epoch': 21.0, 'step': 1890}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5693809173323892, 'eval_mcc': 0.2683485984200259, 'epoch': 21.0, 'step': 1890}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.55931403140227, 'learning_rate': 1.5600000000000003e-05, 'epoch': 22.0, 'step': 1980}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5823531421748075, 'eval_mcc': 0.26743333583793716, 'epoch': 22.0, 'step': 1980}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.557472793592347, 'learning_rate': 1.54e-05, 'epoch': 23.0, 'step': 2070}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5732827863909982, 'eval_mcc': 0.2793055818747123, 'epoch': 23.0, 'step': 2070}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.554112915860282, 'learning_rate': 1.5200000000000002e-05, 'epoch': 24.0, 'step': 2160}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5663419176231731, 'eval_mcc': 0.2803787380434245, 'epoch': 24.0, 'step': 2160}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5426279571321275, 'learning_rate': 1.5000000000000002e-05, 'epoch': 25.0, 'step': 2250}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5730306939645247, 'eval_mcc': 0.28977945741739114, 'epoch': 25.0, 'step': 2250}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5345272415214115, 'learning_rate': 1.48e-05, 'epoch': 26.0, 'step': 2340}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5835445441982963, 'eval_mcc': 0.28257858516575957, 'epoch': 26.0, 'step': 2340}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5427167461978064, 'learning_rate': 1.46e-05, 'epoch': 27.0, 'step': 2430}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5778217261487787, 'eval_mcc': 0.28602447171684914, 'epoch': 27.0, 'step': 2430}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5348879257837932, 'learning_rate': 1.4400000000000001e-05, 'epoch': 28.0, 'step': 2520}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5663955699313771, 'eval_mcc': 0.3011038306456881, 'epoch': 28.0, 'step': 2520}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5296464562416077, 'learning_rate': 1.4200000000000001e-05, 'epoch': 29.0, 'step': 2610}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5727948112921282, 'eval_mcc': 0.2918790757497544, 'epoch': 29.0, 'step': 2610}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5222729212707944, 'learning_rate': 1.4e-05, 'epoch': 30.0, 'step': 2700}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.581150163303722, 'eval_mcc': 0.29005034631254833, 'epoch': 30.0, 'step': 2700}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5165717916356193, 'learning_rate': 1.38e-05, 'epoch': 31.0, 'step': 2790}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5632028904828158, 'eval_mcc': 0.307264433758688, 'epoch': 31.0, 'step': 2790}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5144746998945872, 'learning_rate': 1.3600000000000002e-05, 'epoch': 32.0, 'step': 2880}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5671122155406259, 'eval_mcc': 0.30616424473986925, 'epoch': 32.0, 'step': 2880}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5132243815395567, 'learning_rate': 1.3400000000000002e-05, 'epoch': 33.0, 'step': 2970}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5669901289723136, 'eval_mcc': 0.31225129638070365, 'epoch': 33.0, 'step': 2970}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5026847882403268, 'learning_rate': 1.3200000000000002e-05, 'epoch': 34.0, 'step': 3060}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5822597958824851, 'eval_mcc': 0.30435451075961845, 'epoch': 34.0, 'step': 3060}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.5021057539516025, 'learning_rate': 1.3000000000000001e-05, 'epoch': 35.0, 'step': 3150}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5582999614152041, 'eval_mcc': 0.33156921160370506, 'epoch': 35.0, 'step': 3150}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4998887982633379, 'learning_rate': 1.2800000000000001e-05, 'epoch': 36.0, 'step': 3240}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5666658228093927, 'eval_mcc': 0.330463990595188, 'epoch': 36.0, 'step': 3240}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4914880769120322, 'learning_rate': 1.2600000000000001e-05, 'epoch': 37.0, 'step': 3330}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5823270651427183, 'eval_mcc': 0.3189042768080352, 'epoch': 37.0, 'step': 3330}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4873234387901094, 'learning_rate': 1.2400000000000002e-05, 'epoch': 38.0, 'step': 3420}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5711883496154438, 'eval_mcc': 0.330463990595188, 'epoch': 38.0, 'step': 3420}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4920119576983982, 'learning_rate': 1.22e-05, 'epoch': 39.0, 'step': 3510}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5860696489160712, 'eval_mcc': 0.3380143249102841, 'epoch': 39.0, 'step': 3510}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.47515310943126676, 'learning_rate': 1.2e-05, 'epoch': 40.0, 'step': 3600}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5632775642655112, 'eval_mcc': 0.34534242959193096, 'epoch': 40.0, 'step': 3600}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4768660777144962, 'learning_rate': 1.18e-05, 'epoch': 41.0, 'step': 3690}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5725998282432556, 'eval_mcc': 0.3438365035781525, 'epoch': 41.0, 'step': 3690}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4660870442787806, 'learning_rate': 1.16e-05, 'epoch': 42.0, 'step': 3780}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5886092348532244, 'eval_mcc': 0.3354718506602832, 'epoch': 42.0, 'step': 3780}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4712531914313634, 'learning_rate': 1.14e-05, 'epoch': 43.0, 'step': 3870}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5750699422576211, 'eval_mcc': 0.34918544881779956, 'epoch': 43.0, 'step': 3870}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.46430234644148083, 'learning_rate': 1.1200000000000001e-05, 'epoch': 44.0, 'step': 3960}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5685504566539418, 'eval_mcc': 0.36917268219372684, 'epoch': 44.0, 'step': 3960}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.47053693168693117, 'learning_rate': 1.1000000000000001e-05, 'epoch': 45.0, 'step': 4050}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5870094190944325, 'eval_mcc': 0.3518246428875314, 'epoch': 45.0, 'step': 4050}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.45945047173235154, 'learning_rate': 1.0800000000000002e-05, 'epoch': 46.0, 'step': 4140}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5774755830114539, 'eval_mcc': 0.36828762347459343, 'epoch': 46.0, 'step': 4140}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.45703392790423497, 'learning_rate': 1.0600000000000002e-05, 'epoch': 47.0, 'step': 4230}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5840776616876776, 'eval_mcc': 0.36828762347459343, 'epoch': 47.0, 'step': 4230}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4563980264796151, 'learning_rate': 1.04e-05, 'epoch': 48.0, 'step': 4320}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5813309306448157, 'eval_mcc': 0.3658152450806192, 'epoch': 48.0, 'step': 4320}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.45424656536844044, 'learning_rate': 1.02e-05, 'epoch': 49.0, 'step': 4410}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5844028890132904, 'eval_mcc': 0.35570196596359493, 'epoch': 49.0, 'step': 4410}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.44742532604270513, 'learning_rate': 1e-05, 'epoch': 50.0, 'step': 4500}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5853145881132646, 'eval_mcc': 0.3624512123591821, 'epoch': 50.0, 'step': 4500}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4452272481388516, 'learning_rate': 9.800000000000001e-06, 'epoch': 51.0, 'step': 4590}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5874618102203716, 'eval_mcc': 0.3716057823196969, 'epoch': 51.0, 'step': 4590}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4378038293785519, 'learning_rate': 9.600000000000001e-06, 'epoch': 52.0, 'step': 4680}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5988357040015134, 'eval_mcc': 0.37350015954175864, 'epoch': 52.0, 'step': 4680}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.44028736650943756, 'learning_rate': 9.4e-06, 'epoch': 53.0, 'step': 4770}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5796772783452814, 'eval_mcc': 0.3645466646822362, 'epoch': 53.0, 'step': 4770}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4423256885674265, 'learning_rate': 9.200000000000002e-06, 'epoch': 54.0, 'step': 4860}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.601659438826821, 'eval_mcc': 0.3671871922211316, 'epoch': 54.0, 'step': 4860}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.44292354418171775, 'learning_rate': 9e-06, 'epoch': 55.0, 'step': 4950}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5844760212031278, 'eval_mcc': 0.3749187750871912, 'epoch': 55.0, 'step': 4950}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.42492155929406483, 'learning_rate': 8.8e-06, 'epoch': 56.0, 'step': 5040}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5788937373594805, 'eval_mcc': 0.3866423842515296, 'epoch': 56.0, 'step': 5040}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4314571483267678, 'learning_rate': 8.6e-06, 'epoch': 57.0, 'step': 5130}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.590450950644233, 'eval_mcc': 0.37358812128302665, 'epoch': 57.0, 'step': 5130}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4275545782513089, 'learning_rate': 8.400000000000001e-06, 'epoch': 58.0, 'step': 5220}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.580060132525184, 'eval_mcc': 0.3662442029418099, 'epoch': 58.0, 'step': 5220}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4264437428779072, 'learning_rate': 8.2e-06, 'epoch': 59.0, 'step': 5310}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.592732001434673, 'eval_mcc': 0.38153037319510796, 'epoch': 59.0, 'step': 5310}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4172792010837131, 'learning_rate': 8.000000000000001e-06, 'epoch': 60.0, 'step': 5400}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.588096255605871, 'eval_mcc': 0.37920882679541384, 'epoch': 60.0, 'step': 5400}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4196272134780884, 'learning_rate': 7.800000000000002e-06, 'epoch': 61.0, 'step': 5490}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5975776802409779, 'eval_mcc': 0.37956756539307757, 'epoch': 61.0, 'step': 5490}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.42198962569236753, 'learning_rate': 7.600000000000001e-06, 'epoch': 62.0, 'step': 5580}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5898017097603191, 'eval_mcc': 0.37818554027258383, 'epoch': 62.0, 'step': 5580}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4171837674246894, 'learning_rate': 7.4e-06, 'epoch': 63.0, 'step': 5670}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6007112616842444, 'eval_mcc': 0.38185225030961506, 'epoch': 63.0, 'step': 5670}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.41908892227543726, 'learning_rate': 7.2000000000000005e-06, 'epoch': 64.0, 'step': 5760}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5889270305633545, 'eval_mcc': 0.37818554027258383, 'epoch': 64.0, 'step': 5760}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4157561371723811, 'learning_rate': 7e-06, 'epoch': 65.0, 'step': 5850}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5892515670169484, 'eval_mcc': 0.37956756539307757, 'epoch': 65.0, 'step': 5850}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.41695739295747547, 'learning_rate': 6.800000000000001e-06, 'epoch': 66.0, 'step': 5940}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5950368371876803, 'eval_mcc': 0.37696189770911276, 'epoch': 66.0, 'step': 5940}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.41785827345318266, 'learning_rate': 6.600000000000001e-06, 'epoch': 67.0, 'step': 6030}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.603991215879267, 'eval_mcc': 0.37920882679541384, 'epoch': 67.0, 'step': 6030}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.41288305521011354, 'learning_rate': 6.4000000000000006e-06, 'epoch': 68.0, 'step': 6120}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5927707065235485, 'eval_mcc': 0.3747864339444327, 'epoch': 68.0, 'step': 6120}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4104944669538074, 'learning_rate': 6.200000000000001e-06, 'epoch': 69.0, 'step': 6210}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5984820019115101, 'eval_mcc': 0.3765825650375095, 'epoch': 69.0, 'step': 6210}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.40524515840742326, 'learning_rate': 6e-06, 'epoch': 70.0, 'step': 6300}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5918062248013236, 'eval_mcc': 0.37776376581057897, 'epoch': 70.0, 'step': 6300}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4148719837268194, 'learning_rate': 5.8e-06, 'epoch': 71.0, 'step': 6390}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5885961516336962, 'eval_mcc': 0.37657464734689533, 'epoch': 71.0, 'step': 6390}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.41660198701752554, 'learning_rate': 5.600000000000001e-06, 'epoch': 72.0, 'step': 6480}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6025622161951932, 'eval_mcc': 0.37920882679541384, 'epoch': 72.0, 'step': 6480}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.41494983070426517, 'learning_rate': 5.400000000000001e-06, 'epoch': 73.0, 'step': 6570}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5958904461427168, 'eval_mcc': 0.37313880856315196, 'epoch': 73.0, 'step': 6570}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.40070818695757127, 'learning_rate': 5.2e-06, 'epoch': 74.0, 'step': 6660}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6051890850067139, 'eval_mcc': 0.373973214730547, 'epoch': 74.0, 'step': 6660}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4005940700570742, 'learning_rate': 5e-06, 'epoch': 75.0, 'step': 6750}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5987063375386324, 'eval_mcc': 0.3815782534141674, 'epoch': 75.0, 'step': 6750}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.4058404806587431, 'learning_rate': 4.800000000000001e-06, 'epoch': 76.0, 'step': 6840}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5961154374209318, 'eval_mcc': 0.3820211908832823, 'epoch': 76.0, 'step': 6840}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.398508189453019, 'learning_rate': 4.600000000000001e-06, 'epoch': 77.0, 'step': 6930}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5928250307386572, 'eval_mcc': 0.3829451727207349, 'epoch': 77.0, 'step': 6930}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.39960473477840425, 'learning_rate': 4.4e-06, 'epoch': 78.0, 'step': 7020}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6014950492165305, 'eval_mcc': 0.37657464734689533, 'epoch': 78.0, 'step': 7020}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.3979138480292426, 'learning_rate': 4.2000000000000004e-06, 'epoch': 79.0, 'step': 7110}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.603584801608866, 'eval_mcc': 0.3761082371796093, 'epoch': 79.0, 'step': 7110}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.3912086352705956, 'learning_rate': 4.000000000000001e-06, 'epoch': 80.0, 'step': 7200}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6011851592497393, 'eval_mcc': 0.3829451727207349, 'epoch': 80.0, 'step': 7200}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.3919779893424776, 'learning_rate': 3.8000000000000005e-06, 'epoch': 81.0, 'step': 7290}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6094341765750538, 'eval_mcc': 0.37818554027258383, 'epoch': 81.0, 'step': 7290}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.39574044446150464, 'learning_rate': 3.6000000000000003e-06, 'epoch': 82.0, 'step': 7380}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6023746132850647, 'eval_mcc': 0.37952999607347715, 'epoch': 82.0, 'step': 7380}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.3935896419816547, 'learning_rate': 3.4000000000000005e-06, 'epoch': 83.0, 'step': 7470}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6076522361148488, 'eval_mcc': 0.384102648016182, 'epoch': 83.0, 'step': 7470}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.39816176560189986, 'learning_rate': 3.2000000000000003e-06, 'epoch': 84.0, 'step': 7560}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6118025156584653, 'eval_mcc': 0.3862504302953925, 'epoch': 84.0, 'step': 7560}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.3909621391031477, 'learning_rate': 3e-06, 'epoch': 85.0, 'step': 7650}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6103296117349104, 'eval_mcc': 0.3811484475746342, 'epoch': 85.0, 'step': 7650}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.38630707793765595, 'learning_rate': 2.8000000000000003e-06, 'epoch': 86.0, 'step': 7740}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6116718812422319, 'eval_mcc': 0.3815782534141674, 'epoch': 86.0, 'step': 7740}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.3938834975163142, 'learning_rate': 2.6e-06, 'epoch': 87.0, 'step': 7830}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6066059388897636, 'eval_mcc': 0.3883460669842227, 'epoch': 87.0, 'step': 7830}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.38820330053567886, 'learning_rate': 2.4000000000000003e-06, 'epoch': 88.0, 'step': 7920}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6087935214692896, 'eval_mcc': 0.39332394796984627, 'epoch': 88.0, 'step': 7920}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.3837842229339812, 'learning_rate': 2.2e-06, 'epoch': 89.0, 'step': 8010}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6130517016757618, 'eval_mcc': 0.3903999387898909, 'epoch': 89.0, 'step': 8010}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.393925764825609, 'learning_rate': 2.0000000000000003e-06, 'epoch': 90.0, 'step': 8100}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6123490875417535, 'eval_mcc': 0.3874677344196693, 'epoch': 90.0, 'step': 8100}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.38924532731374106, 'learning_rate': 1.8000000000000001e-06, 'epoch': 91.0, 'step': 8190}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6161283904855902, 'eval_mcc': 0.3815782534141674, 'epoch': 91.0, 'step': 8190}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.3838531203567982, 'learning_rate': 1.6000000000000001e-06, 'epoch': 92.0, 'step': 8280}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.609343555840579, 'eval_mcc': 0.38541558488413336, 'epoch': 92.0, 'step': 8280}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.3885502603318956, 'learning_rate': 1.4000000000000001e-06, 'epoch': 93.0, 'step': 8370}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6111066016283903, 'eval_mcc': 0.38790044127674095, 'epoch': 93.0, 'step': 8370}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.387743772731887, 'learning_rate': 1.2000000000000002e-06, 'epoch': 94.0, 'step': 8460}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6116518893025138, 'eval_mcc': 0.38790044127674095, 'epoch': 94.0, 'step': 8460}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.3858020308944914, 'learning_rate': 1.0000000000000002e-06, 'epoch': 95.0, 'step': 8550}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6082124682989988, 'eval_mcc': 0.38541558488413336, 'epoch': 95.0, 'step': 8550}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.38321695244974563, 'learning_rate': 8.000000000000001e-07, 'epoch': 96.0, 'step': 8640}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6139356168833646, 'eval_mcc': 0.3820211908832823, 'epoch': 96.0, 'step': 8640}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.38303483956389955, 'learning_rate': 6.000000000000001e-07, 'epoch': 97.0, 'step': 8730}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6115339913151481, 'eval_mcc': 0.38790044127674095, 'epoch': 97.0, 'step': 8730}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.38397581345505183, 'learning_rate': 4.0000000000000003e-07, 'epoch': 98.0, 'step': 8820}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6144245564937592, 'eval_mcc': 0.384964967431684, 'epoch': 98.0, 'step': 8820}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.37660027113225725, 'learning_rate': 2.0000000000000002e-07, 'epoch': 99.0, 'step': 8910}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6141540733250704, 'eval_mcc': 0.38790044127674095, 'epoch': 99.0, 'step': 8910}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=90.0, style=ProgressStyle(description_wid…

{'loss': 0.38623579376273687, 'learning_rate': 0.0, 'epoch': 100.0, 'step': 9000}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6140546663240953, 'eval_mcc': 0.38790044127674095, 'epoch': 100.0, 'step': 9000}




In [27]:
eval_dataloader = trainer.get_eval_dataloader(eval_dataset)

In [15]:
model = trainer.model

In [16]:
inputs = eval_dataloader.__iter__().next()

In [17]:
loss, logits, labels = trainer.prediction_step(model, inputs, False)

In [18]:
logits.shape

torch.Size([96, 1, 32128])

In [19]:
preds = logits
label_ids = labels
preds = preds.cpu().numpy()
label_ids = label_ids.cpu().numpy()
p = EvalPrediction(predictions=preds, label_ids=label_ids)
preds = np.argmax(p.predictions, axis=2)

In [22]:
matthews_corrcoef(label_ids, preds)

0.024539928613451497

In [23]:
model = trainer.model

In [32]:
import torch
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

preds: torch.Tensor = None
label_ids: torch.Tensor = None
eval_losses: List[float] = []

In [26]:
model.eval()

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseReluDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dro

In [33]:
from tqdm.auto import tqdm, trange
import torch
description = "Evaluation"
prediction_loss_only = False
for inputs in tqdm(eval_dataloader, desc=description):
    loss, logits, labels = trainer.prediction_step(model, inputs, prediction_loss_only)
    if loss is not None:
        eval_losses.append(loss)
    if logits is not None:
        preds = logits if preds is None else torch.cat((preds, logits), dim=0)
    if labels is not None:
        label_ids = labels if label_ids is None else torch.cat((label_ids, labels), dim=0)


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=11.0, style=ProgressStyle(description_wi…




In [36]:
if preds is not None:
    preds = preds.cpu().numpy()
if label_ids is not None:
    label_ids = label_ids.cpu().numpy()

In [37]:
trainer.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))

{'mcc': -0.023180772569263212}

In [41]:
p = EvalPrediction(predictions=preds, label_ids=label_ids)
preds1 = np.argmax(p.predictions, axis=2)
T5_glue_compute_metrics(data_args.task_name, preds1, p.label_ids)

{'mcc': -0.023180772569263212}

In [42]:
label_ids

array([[209],
       [209],
       [209],
       ...,
       [  3],
       [209],
       [209]])

In [48]:
T5_glue_compute_metrics(data_args.task_name, preds1.squeeze()==3, p.label_ids.squeeze()==3)

{'mcc': -0.023303462047471274}

In [46]:
preds1.squeeze()==3

array([ True,  True,  True, ..., False,  True, False])

In [47]:
p.label_ids.squeeze()==3

array([False, False, False, ...,  True, False, False])

In [21]:
sum(label_ids == preds)/len(preds)

array([0.625])

In [22]:
len(preds)

96

In [30]:
l = label_ids.squeeze()

In [33]:
p = preds.squeeze()

In [34]:
sum(l == p)/len(l)

0.625

In [35]:
l

array([209, 209, 209,   3,   3,   3, 209, 209, 209, 209, 209, 209,   3,
       209, 209,   3, 209, 209, 209, 209, 209, 209,   3, 209, 209, 209,
       209,   3,   3, 209, 209,   3,   3, 209, 209, 209, 209, 209,   3,
       209,   3, 209, 209,   3,   3, 209,   3, 209, 209, 209,   3, 209,
       209,   3, 209, 209,   3,   3, 209,   3, 209, 209, 209, 209,   3,
       209, 209,   3, 209, 209, 209, 209,   3, 209,   3,   3,   3,   3,
       209,   3, 209,   3,   3, 209, 209,   3, 209, 209,   3, 209,   3,
       209, 209,   3,   3, 209])

In [36]:
p

array([209, 209, 209,   3, 209,   3, 209, 209,   3, 209, 209, 209, 209,
       209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209,   3,
       209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209,   3,
       209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209,
       209, 209, 209, 209, 209, 209, 209, 209, 209, 209, 209,   3,   3,
       209, 209, 209, 209,   3, 209, 209, 209,   3,   3, 209, 209, 209,
       209, 209,   3, 209,   3, 209, 209, 209, 209, 209, 209, 209, 209,
       209, 209, 209, 209,   3])

In [21]:
from sklearn.metrics import matthews_corrcoef, f1_score


In [39]:
matthews_corrcoef(l, p)

0.07972154528942839

In [45]:
l1 = list(map(lambda x: int(x==209), l))

In [46]:
p1 = list(map(lambda x: int(x==209), p))

In [50]:
matthews_corrcoef(l1, p1)

0.07972154528942839

In [51]:
matthews_corrcoef(label_ids, preds)

0.07972154528942839