This notebook shows how to perform logit ensembled predictions. 

## Initial setup

In [1]:
!pip install datasets transformers -q

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/pytorch_p38/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [2]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Dataset loading

In [3]:
from datasets import load_dataset

emotions = load_dataset("emotion")

Found cached dataset emotion (/home/ec2-user/.cache/huggingface/datasets/emotion/default/0.0.0/348f63ca8e27b3713b6c04d723efe6d824a56fb3d1449794716c0f0296072705)


  0%|          | 0/3 [00:00<?, ?it/s]

## Dataset preprocessing

In [4]:
from transformers import AutoTokenizer

model_ckpt = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)

In [5]:
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)


emotions_encoded = emotions.map(tokenize, batched=True, batch_size=None)
emotions_encoded.set_format("torch", columns=["input_ids", "attention_mask", "label"])

Loading cached processed dataset at /home/ec2-user/.cache/huggingface/datasets/emotion/default/0.0.0/348f63ca8e27b3713b6c04d723efe6d824a56fb3d1449794716c0f0296072705/cache-19f1f721714bd971.arrow


  0%|          | 0/1 [00:00<?, ?ba/s]

Loading cached processed dataset at /home/ec2-user/.cache/huggingface/datasets/emotion/default/0.0.0/348f63ca8e27b3713b6c04d723efe6d824a56fb3d1449794716c0f0296072705/cache-11b6ba8196e5b164.arrow


## Metric computation utilities

In [6]:
def get_test_accuracy(models):
    def fn(batch):
        inputs = {
            k: v.to(device)
            for k, v in batch.items()
            if k in tokenizer.model_input_names
        }
        outputs = []
        # Ensembling.
        for model in models:
            with torch.no_grad():
                outputs.append(model(**inputs).logits)
        outputs = torch.stack(outputs, 0)
        output = torch.sum(outputs, 0)
        pred_label = torch.argmax(output, axis=-1)
        return {"predicted_label": pred_label.cpu().numpy()}

    return fn


def compute_test_accuracy(models, split="validation"):
    accuracy_fn = get_test_accuracy(models)

    new_dataset = emotions_encoded[split].map(accuracy_fn, batched=True, batch_size=128)
    new_dataset.set_format("pandas")

    cols = ["label", "predicted_label"]
    df = new_dataset[:][cols]
    return sum(df["label"] == df["predicted_label"]) / len(df)

## Perform ensembling

In [7]:
from transformers import AutoModelForSequenceClassification

models = []

lrs = [3e-5, 2e-5, 6e-4, 1e-5, 3e-4]
wds = [1e-2, 1e-3, 3e-3, 2e-3, 3e-2]

for lr, wd in zip(lrs, wds):
    model_name = f"{model_ckpt}-finetuned-emotion-lr-{lr}-wd-{str(wd).replace('.', '')}"
    model_id = f"sayakpaul/{model_name}"
    print(f"Loading checkpoint: {model_id}.")
    models.append(
        AutoModelForSequenceClassification.from_pretrained(model_id).to(device)
    )
    print("Checkpoint loaded.")

Loading checkpoint: sayakpaul/distilbert-base-uncased-finetuned-emotion-lr-3e-05-wd-001.
Checkpoint loaded.
Loading checkpoint: sayakpaul/distilbert-base-uncased-finetuned-emotion-lr-2e-05-wd-0001.
Checkpoint loaded.
Loading checkpoint: sayakpaul/distilbert-base-uncased-finetuned-emotion-lr-0.0006-wd-0003.
Checkpoint loaded.
Loading checkpoint: sayakpaul/distilbert-base-uncased-finetuned-emotion-lr-1e-05-wd-0002.
Checkpoint loaded.
Loading checkpoint: sayakpaul/distilbert-base-uncased-finetuned-emotion-lr-0.0003-wd-003.
Checkpoint loaded.


In [8]:
ensemble_scores = {}

for num_members in range(1, len(models) + 1):
    accuracy = compute_test_accuracy(models[:num_members], "validation")
    ensemble_scores.update({num_members: accuracy})

print(ensemble_scores)

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

  0%|          | 0/16 [00:00<?, ?ba/s]

{1: 0.919, 2: 0.914, 3: 0.9195, 4: 0.917, 5: 0.935}
