In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
from functools import partial

import os
import torch.nn.functional as F
from torch import nn

import emmental
from emmental import Meta
from emmental.data import EmmentalDataLoader, EmmentalDataset
from emmental.learner import EmmentalLearner
from emmental.model import EmmentalModel
from emmental.scorer import Scorer
from emmental.task import EmmentalTask
from modules.bert_module import BertModule
from modules.classification_module import ClassificationModule
from preprocessor import preprocessor
from task_config import LABEL_MAPPING

In [3]:
logger = logging.getLogger(__name__)

In [4]:
TASK_NAME = "MNLI"
DATA_DIR = os.environ["GLUEDATA"]
BERT_MODEL_NAME = "bert-large-uncased"
BATCH_SIZE = 32

# Initalize Emmental

In [5]:
emmental.init("logs/CB_finetune",
    config={
        "meta_config": {"seed": 1},
        "model_config": {"device": 0, "dataparallel": True},
        "learner_config": {
            "n_epochs": 10,
            "valid_split": "val",
            "optimizer_config": {"optimizer": "adam", "lr": 1e-5},
            "lr_scheduler_config": {
                "lr_scheduler": "linear",  # "linear",
                "min_lr": 1e-7,
            },
        },
        "logging_config": {
            "counter_unit": "epoch",
            "evaluation_freq": 0.5,
            "checkpointing": True,
            "checkpointer_config": {
                "checkpoint_metric": {"CB/SuperGLUE/val/accuracy":"max"},
                "checkpoint_freq": 1,
            },
        },
    }
)

[2019-05-31 07:56:16,973][INFO] emmental.meta:95 - Setting logging directory to: logs/CB_finetune/2019_05_31/07_56_16
[2019-05-31 07:56:16,983][INFO] emmental.meta:56 - Loading Emmental default config from /home/hazymturk/vincent/emmental/src/emmental/emmental-default-config.yaml.
[2019-05-31 07:56:16,984][INFO] emmental.meta:143 - Updating Emmental config from user provided config.


In [6]:
def ce_loss(task_name, immediate_ouput_dict, Y, active):
    module_name = f"{task_name}_pred_head"
    return F.cross_entropy(
        immediate_ouput_dict[module_name][0][active], (Y.view(-1) - 1)[active]
    )

In [7]:
def output(task_name, immediate_ouput_dict):
    module_name = f"{task_name}_pred_head"
    return F.softmax(immediate_ouput_dict[module_name][0], dim=1)

In [8]:
from sklearn.metrics import f1_score
def macro_f1(golds, probs, preds):
    return {"macro_f1": f1_score(golds, preds, average="macro")}

In [9]:
mtl_model = EmmentalModel(name="GLUE_single_task")

[2019-05-31 07:56:24,109][INFO] emmental.model:44 - Created emmental model GLUE_single_task that contains task set().
[2019-05-31 07:56:24,110][INFO] emmental.model:58 - Moving model to GPU (cuda:0).


In [10]:
mtl_model.load(
    "logs/CB_finetune/2019_05_31/07_44_08/best_model_CB_SuperGLUE_val_accuracy.pth"
)

[2019-05-31 07:56:31,873][INFO] emmental.model:412 - [GLUE_multi_task] Model loaded from logs/CB_finetune/2019_05_31/07_44_08/best_model_CB_SuperGLUE_val_accuracy.pth
[2019-05-31 07:56:31,875][INFO] emmental.model:58 - Moving model to GPU (cuda:0).


In [12]:
TASK_NAME = "CB"

In [13]:
from superglue.parse_CB import get_CB_dataloaders

In [14]:
from superglue.task_config import SuperGLUE_LABEL_MAPPING, SuperGLUE_TASK_METRIC_MAPPING

In [15]:
BERT_OUTPUT_DIM = 768 if "base" in BERT_MODEL_NAME else 1024
TASK_CARDINALITY = (
    len(SuperGLUE_LABEL_MAPPING[TASK_NAME].keys())
    if SuperGLUE_LABEL_MAPPING[TASK_NAME] is not None
    else 1
)

emmental_task = EmmentalTask(
    name=TASK_NAME,
    module_pool=nn.ModuleDict(
        {
            "bert_module": BertModule(BERT_MODEL_NAME),
            f"{TASK_NAME}_pred_head": nn.Linear(BERT_OUTPUT_DIM, TASK_CARDINALITY),
        }
    ),
    task_flow=[
        {
            "name": "input",
            "module": "bert_module",
            "inputs": [("_input_", "token_ids"), ("_input_", "token_segments")],
        },
        {
            "name": f"{TASK_NAME}_pred_head",
            "module": f"{TASK_NAME}_pred_head",
            "inputs": [("input", 1)],
        },
    ],
    loss_func=partial(ce_loss, TASK_NAME),
    output_func=partial(output, TASK_NAME),
    scorer=Scorer(metrics=SuperGLUE_TASK_METRIC_MAPPING[TASK_NAME]),
)

[2019-05-31 07:56:32,806][INFO] pytorch_pretrained_bert.modeling:583 - loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased.tar.gz from cache at ./cache/214d4777e8e3eb234563136cd3a49f6bc34131de836848454373fa43f10adc5e.abfbb80ee795a608acbf35c7bf2d2d58574df3887cdd94b355fc67e03fddba05
[2019-05-31 07:56:32,808][INFO] pytorch_pretrained_bert.modeling:591 - extracting archive file ./cache/214d4777e8e3eb234563136cd3a49f6bc34131de836848454373fa43f10adc5e.abfbb80ee795a608acbf35c7bf2d2d58574df3887cdd94b355fc67e03fddba05 to temp dir /tmp/tmpnb2e40x3
[2019-05-31 07:56:44,029][INFO] pytorch_pretrained_bert.modeling:601 - Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "type_vocab_size": 2,
  "vocab_size":

In [16]:
mtl_model.add_task(emmental_task)

[2019-05-31 07:56:49,178][INFO] emmental.model:58 - Moving model to GPU (cuda:0).


In [17]:
DATA_DIR = os.environ["SUPERGLUEDATA"]
dataloaders = get_CB_dataloaders(
    data_dir=DATA_DIR,
    task_name=TASK_NAME,
    splits=["train", "val"],
    max_sequence_length=200,
    max_data_samples=None,
    tokenizer_name=BERT_MODEL_NAME,
    batch_size=BATCH_SIZE,
)

[2019-05-31 07:56:49,201][INFO] superglue.tokenizer:8 - Loading Tokenizer bert-large-uncased
[2019-05-31 07:56:50,002][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt from cache at /home/hazymturk/.cache/torch/pytorch_pretrained_bert/9b3c03a36e83b13d5ba95ac965c9f9074a99e14340c523ab405703179e79fc46.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


/home/hazymturk/vincent/emmental-tutorials/superglue/data/CB/train.jsonl
{'premise': 'It was a complex language. Not written down but handed down. One might say it was peeled down.', 'hypothesis': 'the language was peeled down', 'label': 'entailment', 'idx': 0}


[2019-05-31 07:56:50,281][INFO] superglue.parse_CB:123 - Loaded train for CB.
[2019-05-31 07:56:50,344][INFO] superglue.parse_CB:123 - Loaded val for CB.


/home/hazymturk/vincent/emmental-tutorials/superglue/data/CB/val.jsonl
{'premise': "Valence the void-brain, Valence the virtuous valet. Why couldn't the figger choose his own portion of titanic anatomy to shaft? Did he think he was helping?", 'hypothesis': 'Valence was helping', 'label': 'contradiction', 'idx': 0}


In [18]:
mtl_model.score(dataloaders["val"])

{'CB/SuperGLUE/val/accuracy': 0.9107142857142857}