In [1]:
import dataclasses
import logging
import os
import sys
from dataclasses import dataclass, field
from typing import Dict, Optional

import numpy as np

In [2]:
from transformers import (
    AutoConfig,
    AutoTokenizer,
    EvalPrediction,
    HfArgumentParser,
    Trainer,
    TrainingArguments,
    AutoModelForSequenceClassification,
    set_seed )

In [3]:
from utils_superglue_classification import (
    SuperGlueDataset, 
    superglue_compute_metrics, 
    superglue_output_modes, 
    superglue_tasks_num_labels,
    processors,
    Split
)

In [4]:
@dataclass
class ModelArguments:
    """
    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
    """

    model_name_or_path: str = field(
        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
    )
    config_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
    )
    tokenizer_name: Optional[str] = field(
        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
    )
    cache_dir: Optional[str] = field(
        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
    )
        
@dataclass
class DataTrainingArguments:
    """
    Arguments pertaining to what data we are going to input our model for training and eval.
    """

    task_name: str = field(metadata={"help": "The name of the task to train on: " + ", ".join(processors.keys())})
    data_dir: str = field(metadata={"help": "Should contain the data files for the task."})
    max_seq_length: int = field(
        default=128,
        metadata={
            "help": "The maximum total input sequence length after tokenization. Sequences longer "
            "than this will be truncated, sequences shorter will be padded."
        },
    )
    overwrite_cache: bool = field(
        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
    )
    
    def __post_init__(self):
        self.task_name = self.task_name.lower()   

parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
logger = logging.getLogger(__name__)

MODEL_NAME = "bert-base-cased"
DATESTAMP = "20200814"
SUPER_GLUE_DIR = "/home/keyur/medhas/superglue_data/"
TASK_NAME = "BoolQ"
PER_DEVICE_BATCH_SIZE = 48
EXPERIMENT_DIR="/mnt/data/medhas/glue_experiments/%s/%s"%(MODEL_NAME, DATESTAMP)

custom_sysargv = [
"--model_name_or_path=%s"%MODEL_NAME,
"--task_name=%s"%TASK_NAME,
"--do_train",
"--do_eval",
"--data_dir=%s"%os.path.join(SUPER_GLUE_DIR, TASK_NAME),
"--max_seq_length=256",
"--per_device_train_batch_size=%s"%PER_DEVICE_BATCH_SIZE,
"--per_device_eval_batch_size=%s"%PER_DEVICE_BATCH_SIZE,
"--learning_rate=2e-5",
"--num_train_epochs=10",
"--output_dir=%s"%os.path.join(EXPERIMENT_DIR, TASK_NAME),
"--logging_dir=%s/logs"%os.path.join(EXPERIMENT_DIR, TASK_NAME),
"--logging_steps=99",
"--evaluate_during_training",
"--eval_step=99",
"--save_total_limit=2",
"--save_steps=1000",
"--gradient_accumulation_steps=1",
"--overwrite_output_dir"
]

model_args, data_args, training_args = parser.parse_args_into_dataclasses(args=custom_sysargv)

# Setup logging
logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.WARN if training_args.local_rank in [-1, 0] else logging.WARN,
)
logger.warning(
    "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
    training_args.local_rank,
    training_args.device,
    training_args.n_gpu,
    bool(training_args.local_rank != -1),
    training_args.fp16,
)
logger.info("Training/evaluation parameters %s", training_args)

set_seed(training_args.seed)
training_args.seed
num_labels = superglue_tasks_num_labels[data_args.task_name]
output_mode = superglue_output_modes[data_args.task_name]
print ("Task:", data_args.task_name, "Labels:", num_labels, ', Output', output_mode)




Task: boolq Labels: 2 , Output classification


In [5]:
config = AutoConfig.from_pretrained(
        model_args.config_name if model_args.config_name else     model_args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
    )

In [6]:
tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
    )

In [7]:
model = AutoModelForSequenceClassification.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
)
#model = BertForNLI.from_pretrained(model_args.model_name_or_path, config=config, cache_dir=model_args.cache_dir)

- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
train_dataset = SuperGlueDataset(data_args.data_dir, tokenizer=tokenizer, 
                    task=data_args.task_name, max_seq_length=data_args.max_seq_length, 
                    overwrite_cache=data_args.overwrite_cache, mode=Split.train,) if training_args.do_train else None

eval_dataset = SuperGlueDataset(data_args.data_dir, tokenizer=tokenizer, 
                    task=data_args.task_name, max_seq_length=data_args.max_seq_length, 
                    overwrite_cache=data_args.overwrite_cache, mode=Split.dev,) if training_args.do_eval else None

test_dataset = SuperGlueDataset(data_args.data_dir, tokenizer=tokenizer, 
                    task=data_args.task_name, max_seq_length=data_args.max_seq_length, 
                    overwrite_cache=data_args.overwrite_cache, mode=Split.test,) if training_args.do_predict else None

In [9]:
def compute_metrics(p: EvalPrediction) -> Dict:
    if output_mode == "classification":
        preds = np.argmax(p.predictions, axis=1)
    elif output_mode == "regression":
        preds = np.squeeze(p.predictions)
    return superglue_compute_metrics(data_args.task_name, preds, p.label_ids)

In [10]:
# Initialize our Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
)

In [11]:
if training_args.do_train:
        trainer.train(
            model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
        )
        trainer.save_model()
        # For convenience, we also re-save the tokenizer to the same directory,
        # so that you can share your model easily on huggingface.co/models =)
        if trainer.is_world_master():
            tokenizer.save_pretrained(training_args.output_dir)

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=10.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Iteration', max=99.0, style=ProgressStyle(description_wid…



{'loss': 0.6512166701181971, 'learning_rate': 1.8e-05, 'epoch': 1.0, 'step': 99}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=35.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6126354779515948, 'eval_acc': 0.6671765065769348, 'epoch': 1.0, 'step': 99}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=99.0, style=ProgressStyle(description_wid…

{'loss': 0.5389884180492825, 'learning_rate': 1.6000000000000003e-05, 'epoch': 2.0, 'step': 198}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=35.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.5440494920526232, 'eval_acc': 0.7188742734781278, 'epoch': 2.0, 'step': 198}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=99.0, style=ProgressStyle(description_wid…

{'loss': 0.39326986519977297, 'learning_rate': 1.4e-05, 'epoch': 3.0, 'step': 297}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=35.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6078403038637978, 'eval_acc': 0.7197919853166106, 'epoch': 3.0, 'step': 297}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=99.0, style=ProgressStyle(description_wid…

{'loss': 0.26285788988826253, 'learning_rate': 1.2e-05, 'epoch': 4.0, 'step': 396}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=35.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.6852830410003662, 'eval_acc': 0.7173447537473233, 'epoch': 4.0, 'step': 396}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=99.0, style=ProgressStyle(description_wid…

{'loss': 0.17448157184954846, 'learning_rate': 1e-05, 'epoch': 5.0, 'step': 495}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=35.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.7750344838414873, 'eval_acc': 0.724686448455185, 'epoch': 5.0, 'step': 495}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=99.0, style=ProgressStyle(description_wid…

{'loss': 0.13039104271717747, 'learning_rate': 8.000000000000001e-06, 'epoch': 6.0, 'step': 594}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=35.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.8637520679405757, 'eval_acc': 0.7265218721321505, 'epoch': 6.0, 'step': 594}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=99.0, style=ProgressStyle(description_wid…

{'loss': 0.10109250853308524, 'learning_rate': 6e-06, 'epoch': 7.0, 'step': 693}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=35.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.9357203824179513, 'eval_acc': 0.721627408993576, 'epoch': 7.0, 'step': 693}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=99.0, style=ProgressStyle(description_wid…

{'loss': 0.08722988271502534, 'learning_rate': 4.000000000000001e-06, 'epoch': 8.0, 'step': 792}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=35.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.934592695747103, 'eval_acc': 0.7308045273784032, 'epoch': 8.0, 'step': 792}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=99.0, style=ProgressStyle(description_wid…

{'loss': 0.07473556035094792, 'learning_rate': 2.0000000000000003e-06, 'epoch': 9.0, 'step': 891}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=35.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.9735963702201843, 'eval_acc': 0.7332517589476905, 'epoch': 9.0, 'step': 891}



HBox(children=(FloatProgress(value=0.0, description='Iteration', max=99.0, style=ProgressStyle(description_wid…

{'loss': 0.06457428308674182, 'learning_rate': 0.0, 'epoch': 10.0, 'step': 990}


HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=35.0, style=ProgressStyle(description_wi…


{'eval_loss': 0.9966532928603036, 'eval_acc': 0.7308045273784032, 'epoch': 10.0, 'step': 990}


