In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import logging
from functools import partial

import torch.nn.functional as F
from torch import nn
import torch
import emmental
from emmental import Meta
from emmental.learner import EmmentalLearner
from emmental.model import EmmentalModel
from emmental.scorer import Scorer
from emmental.task import EmmentalTask
from modules.bert_module import BertModule
from parse_COPA import get_COPA_dataloaders
from task_config import SuperGLUE_LABEL_MAPPING, SuperGLUE_TASK_METRIC_MAPPING
from sklearn.metrics import f1_score

In [3]:
logger = logging.getLogger(__name__)

# Initalize Emmental

In [4]:
emmental.init(
    "logs",
    config={
        "model_config": {"device": 1, "dataparallel": False},
        "learner_config": {
            "n_epochs": 10,
            "valid_split": "val",
            "optimizer_config": {"optimizer": "adam", "lr": 1e-5},
            "lr_scheduler_config": {
                "warmup_percentage": 0.1,
                "lr_scheduler": None, #"linear",
#                 "min_lr": 1e-7,
            },
        },
        "logging_config": {
            "counter_unit": "batch",
            "evaluation_freq": 100,
            "checkpointing": None,
        },
    },
)

[2019-05-28 02:36:05,843][INFO] emmental.meta:95 - Setting logging directory to: logs/2019_05_28/02_36_05
[2019-05-28 02:36:05,860][INFO] emmental.meta:56 - Loading Emmental default config from /dfs/scratch1/senwu/mmtl/emmental/src/emmental/emmental-default-config.yaml.
[2019-05-28 02:36:05,861][INFO] emmental.meta:143 - Updating Emmental config from user provided config.


In [5]:
Meta.config

{'meta_config': {'seed': 0, 'verbose': True, 'log_path': None},
 'model_config': {'model_path': None, 'device': 1, 'dataparallel': False},
 'learner_config': {'fp16': False,
  'n_epochs': 10,
  'train_split': 'train',
  'valid_split': 'val',
  'test_split': 'test',
  'ignore_index': -100,
  'optimizer_config': {'optimizer': 'adam',
   'lr': 1e-05,
   'l2': 0.0,
   'grad_clip': 1.0,
   'sgd_config': {'momentum': 0.9},
   'adam_config': {'betas': (0.9, 0.999)}},
  'lr_scheduler_config': {'lr_scheduler': None,
   'warmup_steps': None,
   'warmup_unit': 'batch',
   'warmup_percentage': 0.1,
   'min_lr': 0.0,
   'linear_config': {'min_lr': 0.0},
   'exponential_config': {'gamma': 0.9},
   'plateau_config': {'factor': 0.5, 'patience': 10, 'threshold': 0.0001}},
  'task_scheduler': 'round_robin',
  'global_evaluation_metric_dict': None},
 'logging_config': {'counter_unit': 'batch',
  'evaluation_freq': 100,
  'writer_config': {'writer': 'tensorboard', 'verbose': True},
  'checkpointing': None

In [6]:
TASK_NAME = "COPA"
DATA_DIR = "data"
BERT_MODEL_NAME = "bert-large-cased"
BATCH_SIZE = 4

BERT_OUTPUT_DIM = 768 if "base" in BERT_MODEL_NAME else 1024

# Extract train/dev dataset from file

In [7]:
dataloaders = get_COPA_dataloaders(
    data_dir=DATA_DIR,
    task_name=TASK_NAME,
    splits=["train", "val", "test"],
    max_sequence_length=128,
    max_data_samples=None,
    tokenizer_name=BERT_MODEL_NAME,
    batch_size=BATCH_SIZE,
)

[2019-05-28 02:36:05,997][INFO] tokenizer:8 - Loading Tokenizer bert-large-cased
[2019-05-28 02:36:06,270][INFO] pytorch_pretrained_bert.tokenization:190 - loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt from cache at /lfs/local/0/senwu/.pytorch_pretrained_bert/cee054f6aafe5e2cf816d2228704e326446785f940f5451a5b26033516a4ac3d.e13dbb970cb325137104fb2e5f36fe865f27746c6b526f6352861b1980eb80b1


data/COPA/train.jsonl
{'premise': 'My body cast a shadow over the grass.', 'choice1': 'The sun was rising.', 'choice2': 'The grass was cut.', 'question': 'cause', 'label': 0, 'idx': 0}


[2019-05-28 02:36:06,630][INFO] parse_COPA:123 - Loaded train for COPA.
[2019-05-28 02:36:06,712][INFO] parse_COPA:123 - Loaded val for COPA.
[2019-05-28 02:36:07,102][INFO] parse_COPA:123 - Loaded test for COPA.


max len 34
data/COPA/val.jsonl
{'premise': 'The man turned on the faucet.', 'choice1': 'The toilet filled with water.', 'choice2': 'Water flowed from the spout.', 'question': 'effect', 'label': 1, 'idx': 0}
max len 34
data/COPA/test.jsonl
{'premise': 'The item was packaged in bubble wrap.', 'choice1': 'It was fragile.', 'choice2': 'It was small.', 'question': 'cause', 'idx': 0}
max len 36


# Build Emmental task

In [8]:
def ce_loss(task_name, immediate_ouput_dict, Y, active):
    module_name = f"{task_name}_pred_head"
    return F.cross_entropy(
        immediate_ouput_dict[module_name][0][active], (Y.view(-1) - 1)[active]
    )

In [9]:
def output(task_name, immediate_ouput_dict):
    module_name = f"{task_name}_pred_head"
    return F.softmax(immediate_ouput_dict[module_name][0], dim=1)

In [10]:
class ChoiceModule(nn.Module):
    def __init__(self, n_choices=2):
        super().__init__()

        self.n_choices = n_choices
        self.linears = nn.ModuleDict(
            {f"linear{str(i)}": nn.Linear(BERT_OUTPUT_DIM, 1) for i in range(n_choices)}
        )

    def forward(self, immediate_ouput_dict):
        logits = []

        for i in range(self.n_choices):
            logit = self.linears[f"linear{str(i)}"].forward(
                immediate_ouput_dict[f"choice{str(i)}"][0][-1][:,0,:]
            )
            logits.append(logit)

        logits = torch.cat(logits, dim=1)

        return logits

In [11]:
# class ChoiceModule(nn.Module):
#     def __init__(self, n_choices=2):
#         super().__init__()

#         self.n_choices = n_choices
#         self.linear = nn.Linear(BERT_OUTPUT_DIM, 1)

#     def forward(self, immediate_ouput_dict):
#         logits = []

#         for i in range(self.n_choices):
#             logit = self.linear.forward(
#                 immediate_ouput_dict[f"choice{str(i)}"][0][-1][:,0,:]
#             )
#             logits.append(logit)

#         logits = torch.cat(logits, dim=1)

#         return logits

In [12]:
emmental_task = EmmentalTask(
    name=TASK_NAME,
    module_pool=nn.ModuleDict(
        {
            "bert_module": BertModule(BERT_MODEL_NAME),
            f"{TASK_NAME}_pred_head": ChoiceModule(2),
        }
    ),
    task_flow=[
        {
            "name": "choice0",
            "module": "bert_module",
            "inputs": [("_input_", "token1_ids")],
        },
        {
            "name": "choice1",
            "module": "bert_module",
            "inputs": [("_input_", "token2_ids")],
        },
        {
            "name": f"{TASK_NAME}_pred_head",
            "module": f"{TASK_NAME}_pred_head",
            "inputs": [],
        },
    ],
    loss_func=partial(ce_loss, TASK_NAME),
    output_func=partial(output, TASK_NAME),
    scorer=Scorer(
        metrics=SuperGLUE_TASK_METRIC_MAPPING[TASK_NAME]
    ),
)

[2019-05-28 02:36:07,564][INFO] pytorch_pretrained_bert.modeling:580 - loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased.tar.gz from cache at ./cache/7fb0534b83c42daee7d3ddb0ebaa81387925b71665d6ea195c5447f1077454cd.eea60d9ebb03c75bb36302aa9d241d3b7a04bba39c360cf035e8bf8140816233
[2019-05-28 02:36:07,566][INFO] pytorch_pretrained_bert.modeling:588 - extracting archive file ./cache/7fb0534b83c42daee7d3ddb0ebaa81387925b71665d6ea195c5447f1077454cd.eea60d9ebb03c75bb36302aa9d241d3b7a04bba39c360cf035e8bf8140816233 to temp dir /tmp/tmp6osx68u9
[2019-05-28 02:36:24,890][INFO] pytorch_pretrained_bert.modeling:598 - Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "max_position_embeddings": 512,
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pooler_fc_size": 768,
  "pooler_num_a

In [13]:
mtl_model = EmmentalModel(name="SuperGLUE_single_task", tasks=[emmental_task])

[2019-05-28 02:36:56,092][INFO] emmental.model:58 - Moving model to GPU (cuda:1).
[2019-05-28 02:37:01,999][INFO] emmental.model:44 - Created emmental model SuperGLUE_single_task that contains task {'COPA'}.
[2019-05-28 02:37:02,001][INFO] emmental.model:58 - Moving model to GPU (cuda:1).


In [14]:
emmental_learner = EmmentalLearner()

In [15]:
emmental_learner.learn(mtl_model, dataloaders.values())

[2019-05-28 02:37:02,092][INFO] emmental.logging.logging_manager:33 - Evaluating every 100 batch.
[2019-05-28 02:37:02,093][INFO] emmental.logging.logging_manager:51 - No checkpointing.
[2019-05-28 02:37:02,138][INFO] root:123 - Generating grammar tables from /usr/lib/python3.6/lib2to3/Grammar.txt
[2019-05-28 02:37:02,185][INFO] root:123 - Generating grammar tables from /usr/lib/python3.6/lib2to3/PatternGrammar.txt
[2019-05-28 02:37:02,314][INFO] emmental.learner:152 - Warmup 100 batchs.
[2019-05-28 02:37:02,318][INFO] emmental.learner:298 - Start learning...


HBox(children=(IntProgress(value=0, description='Epoch 0:', style=ProgressStyle(description_width='initial')),…




HBox(children=(IntProgress(value=0, description='Epoch 1:', style=ProgressStyle(description_width='initial')),…




HBox(children=(IntProgress(value=0, description='Epoch 2:', style=ProgressStyle(description_width='initial')),…




HBox(children=(IntProgress(value=0, description='Epoch 3:', style=ProgressStyle(description_width='initial')),…




HBox(children=(IntProgress(value=0, description='Epoch 4:', style=ProgressStyle(description_width='initial')),…




HBox(children=(IntProgress(value=0, description='Epoch 5:', style=ProgressStyle(description_width='initial')),…




HBox(children=(IntProgress(value=0, description='Epoch 6:', style=ProgressStyle(description_width='initial')),…




HBox(children=(IntProgress(value=0, description='Epoch 7:', style=ProgressStyle(description_width='initial')),…




HBox(children=(IntProgress(value=0, description='Epoch 8:', style=ProgressStyle(description_width='initial')),…




HBox(children=(IntProgress(value=0, description='Epoch 9:', style=ProgressStyle(description_width='initial')),…




In [16]:
mtl_model.score(dataloaders["val"])

{'COPA/SuperGLUE/val/accuracy': 0.6}

In [17]:
mtl_model.score(dataloaders["train"])

{'COPA/SuperGLUE/train/accuracy': 1.0}

In [18]:
mtl_model.score(dataloaders["val"])

{'COPA/SuperGLUE/val/accuracy': 0.6}

In [19]:
a, b, c = mtl_model.predict(dataloaders["val"], return_preds=True)

In [20]:
a

defaultdict(list,
            {'COPA': array([2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 2,
                    1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 1,
                    2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1,
                    1, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1,
                    1, 1, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2])})

In [21]:
c

defaultdict(list,
            {'COPA': array([1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1,
                    1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2,
                    2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2,
                    1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1,
                    1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 2])})

In [22]:
b

defaultdict(list, {'COPA': array([[9.99761522e-01, 2.38526700e-04],
                    [1.69803783e-01, 8.30196202e-01],
                    [5.43002822e-02, 9.45699692e-01],
                    [9.39647257e-01, 6.03526980e-02],
                    [5.59555786e-03, 9.94404435e-01],
                    [3.17611635e-01, 6.82388365e-01],
                    [9.99995947e-01, 4.09298946e-06],
                    [9.25400972e-01, 7.45989755e-02],
                    [5.81856608e-01, 4.18143421e-01],
                    [9.97979581e-01, 2.02048244e-03],
                    [1.68916536e-04, 9.99831080e-01],
                    [3.77696753e-02, 9.62230265e-01],
                    [1.21609941e-04, 9.99878407e-01],
                    [2.07558796e-02, 9.79244173e-01],
                    [1.82919607e-01, 8.17080379e-01],
                    [2.54585780e-03, 9.97454107e-01],
                    [9.98093426e-01, 1.90656923e-03],
                    [5.52746579e-02, 9.44725275e-01],
              