In [1]:
%load_ext autoreload
%autoreload 2
import os

os.environ["WANDB_SILENT"] = "true"

In [4]:
import argparse
import os
import statistics
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import yaml
from src import BertClassifier
from src import datasets as data_utils
from src import influence, train_utils, utils
from src.datasets import create_loo_dataset, create_test_sst2, create_train_sst2
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset, TensorDataset
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer

import wandb

device = utils.get_device()

# config = utils.load_config(
#     "model_params/bert_classifier.yaml", epochs=5, num_training_examples=1000
# )

og_model, config = BertClassifier.load_model(
    "model_params/bert-epoch30-reg0.001-10000.yaml",
)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_projector.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Create Datasets

In [5]:
USE_BERT_EMBEDDINGS = True

# Create datasets
train_dataset = create_train_sst2(
    num_samples=config["num_training_examples"],
    tokenizer_name=config["bert_model_name"],
    max_seq_len=config["max_sequence_length"],
    device=device,
    use_bert_embeddings=USE_BERT_EMBEDDINGS,
)

test_dataset = create_test_sst2(
    tokenizer_name=config["bert_model_name"],
    max_seq_len=config["max_sequence_length"],
    device=device,
    use_bert_embeddings=USE_BERT_EMBEDDINGS,
)
test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=1)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [00:00<00:00, 14321.44it/s]
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_projector.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████

## Train model

In [6]:
# full_model, original_df, test_loss, test_acc = train_utils.train_bert_model(
#     train_dataset=train_dataset,
#     test_dataset=test_dataset,
#     config=config,
#     use_bert_embeddings=USE_BERT_EMBEDDINGS,
# )
# test_loss, test_acc
fdf, test_loss, test_acc = train_utils.evaluate_loss(og_model, test_dataloader, use_bert_embeddings=True)
test_loss, test_acc

(0.37703973579917965, 83.14220183486239)

## Adversarial Attack

In [42]:
def perturb_datapoint(dataset, data_guid, perturbation):
    """This modifies the dataset in place"""
    device = utils.get_device()
    guid, inputs, attn_mask, labels = [t[data_guid] for t in train_dataset.tensors]
    assert guid.squeeze() == data_guid

    inputs_before = inputs.detach().clone()
    inputs += perturbation.to(device)
    return inputs_before, inputs


def perform_attack(
    model,
    config,
    train_dataset,
    test_dataset,
    target_test_guid,
    target_train_guid=None,
    alpha=2e-2,
):
    infl = None
    if target_train_guid is None:
        print("---Computing Influence Function---")
        infl = influence.compute_influence(
            model,
            target_test_guid,
            param_influence=list(model.classifier.parameters()),
            train_dataset=train_dataset,
            test_dataset=test_dataset,
            use_bert_embeddings=True,
            lissa_r=1,
            lissa_depth=1,
            damping=5e-3,
            scale=100,
        )

        # Most negative influence is most helpful
        helpful_idxs = np.argsort(infl)[:10]
        target_train_guid = helpful_idxs[0]

    print("---Computing Input Influence Function---")
    input_infl = influence.compute_input_influence(
        model,
        target_test_guid,
        param_influence=list(model.classifier.parameters()),
        train_dataset=train_dataset,
        test_dataset=test_dataset,
        use_bert_embeddings=True,
        lissa_r=1,
        lissa_depth=1,
        damping=5e-3,
        scale=100,
        training_indices=[target_train_guid],
    )

    print(f"---Perturbing training guid {target_train_guid}---")
    perturb = alpha * input_infl[target_train_guid]
    perturb_datapoint(train_dataset, target_train_guid, perturb)

    print("---Retraining on perturbed data---")
    # Retrain model on perturbed dataset
    model, df, full_test_loss, full_test_acc = train_utils.train_bert_model(
        train_dataset=train_dataset,
        test_dataset=test_dataset,
        config=config,
        use_bert_embeddings=True,
    )
    df["perturbed_guid"] = target_train_guid
    return model, df, infl, input_infl

In [52]:
fdf[fdf.pred == fdf.label].sort_values("loss", ascending=False).iloc[200]

test_guid                        845
logits       [0.5202583, -0.6204333]
pred                               0
label                              0
loss                        0.277327
Name: 845, dtype: object

In [53]:
# Top 100 can be attacked

TEST_GUID = 845

baseline_test_loss = fdf[fdf.test_guid == TEST_GUID].loss.squeeze()
baseline_test_loss

0.27732712030410767

In [54]:
# target_train_guid = 262

hist = {
    "loss_df": [],
    "influence": [],
    "input_influence": [],
}
model = og_model
for i in range(15):
    model, loss_df, infl, input_infl = perform_attack(
        model=model,
        config=config,
        train_dataset=train_dataset,
        test_dataset=test_dataset,
        target_test_guid=TEST_GUID,
        alpha=5e-1,
        # target_train_guid=target_train_guid,
    )
    loss_df["iter"] = i

    hist["loss_df"].append(loss_df)
    hist["influence"].append(infl)
    hist["input_influence"].append(input_infl)
    
    test_df = loss_df[loss_df.test_guid == TEST_GUID]
    if test_df.pred.squeeze() != test_df.label.squeeze():
        break

---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 7.170006
Recursion at depth 100: norm is 108.369644
Recursion at depth 200: norm is 163.181732
Recursion at depth 300: norm is 192.374741
Recursion at depth 400: norm is 208.072952
Recursion at depth 500: norm is 218.451111
Recursion at depth 600: norm is 223.446838
Recursion at depth 700: norm is 226.833298
Recursion at depth 800: norm is 229.715286
Recursion at depth 900: norm is 230.460648
Recursion at depth 999: norm is 232.348038


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 147.71it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 7.114713
Recursion at depth 100: norm is 107.310768
Recursion at depth 200: norm is 163.592789
Recursion at depth 300: norm is 192.630981
Recursion at depth 400: norm is 209.661072
Recursion at depth 500: norm is 218.678299
Recursion at depth 600: norm is 223.884995
Recursion at depth 700: norm is 226.265518
Recursion at depth 800: norm is 229.755447
Recursion at depth 900: norm is 229.281372
Recursion at depth 999: norm is 230.660950


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:11<00:00, 89.16it/s]


---Perturbing training guid 146---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 27.76batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.15batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.79batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.32batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.50batch/s]


Final 0.4042343917438327, 81.19266055045871
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 3.812735
Recursion at depth 100: norm is 56.911823
Recursion at depth 200: norm is 85.974640
Recursion at depth 300: norm is 101.695328
Recursion at depth 400: norm is 110.046654
Recursion at depth 500: norm is 115.577110
Recursion at depth 600: norm is 118.231163
Recursion at depth 700: norm is 120.180870
Recursion at depth 800: norm is 121.836266
Recursion at depth 900: norm is 122.367760
Recursion at depth 999: norm is 123.418045


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 147.43it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 3.779460
Recursion at depth 100: norm is 56.005798
Recursion at depth 200: norm is 85.901062
Recursion at depth 300: norm is 101.519035
Recursion at depth 400: norm is 110.650879
Recursion at depth 500: norm is 115.933182
Recursion at depth 600: norm is 118.817421
Recursion at depth 700: norm is 119.999832
Recursion at depth 800: norm is 121.885422
Recursion at depth 900: norm is 122.058739
Recursion at depth 999: norm is 122.373367


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:08<00:00, 114.12it/s]


---Perturbing training guid 710---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.61batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.74batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.18batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.93batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.01batch/s]


Final 0.4043390411532761, 81.53669724770643
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.114989
Recursion at depth 100: norm is 61.329872
Recursion at depth 200: norm is 92.702011
Recursion at depth 300: norm is 109.599182
Recursion at depth 400: norm is 118.624924
Recursion at depth 500: norm is 124.582329
Recursion at depth 600: norm is 127.442528
Recursion at depth 700: norm is 129.551636
Recursion at depth 800: norm is 131.352310
Recursion at depth 900: norm is 131.897690
Recursion at depth 999: norm is 133.047333


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 147.24it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.075822
Recursion at depth 100: norm is 60.361996
Recursion at depth 200: norm is 92.578430
Recursion at depth 300: norm is 109.431740
Recursion at depth 400: norm is 119.318367
Recursion at depth 500: norm is 124.969841
Recursion at depth 600: norm is 128.114563
Recursion at depth 700: norm is 129.382355
Recursion at depth 800: norm is 131.407166
Recursion at depth 900: norm is 131.584976
Recursion at depth 999: norm is 131.951660


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:08<00:00, 116.89it/s]


---Perturbing training guid 761---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 28.12batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.02batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.68batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.91batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.03batch/s]


Final 0.40607961663677883, 81.65137614678899
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.200695
Recursion at depth 100: norm is 62.507938
Recursion at depth 200: norm is 94.504013
Recursion at depth 300: norm is 111.721375
Recursion at depth 400: norm is 120.909462
Recursion at depth 500: norm is 126.986595
Recursion at depth 600: norm is 129.881638
Recursion at depth 700: norm is 132.038437
Recursion at depth 800: norm is 133.879395
Recursion at depth 900: norm is 134.437546
Recursion at depth 999: norm is 135.648254


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 146.90it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.152007
Recursion at depth 100: norm is 61.538712
Recursion at depth 200: norm is 94.463455
Recursion at depth 300: norm is 111.585648
Recursion at depth 400: norm is 121.656265
Recursion at depth 500: norm is 127.390869
Recursion at depth 600: norm is 130.604050
Recursion at depth 700: norm is 131.884003
Recursion at depth 800: norm is 133.937057
Recursion at depth 900: norm is 134.109238
Recursion at depth 999: norm is 134.525131


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:08<00:00, 118.91it/s]


---Perturbing training guid 333---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.08batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.02batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.03batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.93batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.89batch/s]


Final 0.40578691094715147, 81.65137614678899
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.308875
Recursion at depth 100: norm is 64.083450
Recursion at depth 200: norm is 96.920120
Recursion at depth 300: norm is 114.562836
Recursion at depth 400: norm is 124.015320
Recursion at depth 500: norm is 130.261520
Recursion at depth 600: norm is 133.215820
Recursion at depth 700: norm is 135.428635
Recursion at depth 800: norm is 137.305130
Recursion at depth 900: norm is 137.890488
Recursion at depth 999: norm is 139.094238


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 155.14it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.254173
Recursion at depth 100: norm is 63.107948
Recursion at depth 200: norm is 96.878479
Recursion at depth 300: norm is 114.433388
Recursion at depth 400: norm is 124.787254
Recursion at depth 500: norm is 130.663773
Recursion at depth 600: norm is 133.969299
Recursion at depth 700: norm is 135.251434
Recursion at depth 800: norm is 137.358902
Recursion at depth 900: norm is 137.518433
Recursion at depth 999: norm is 138.036560


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:08<00:00, 115.74it/s]


---Perturbing training guid 537---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.21batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.80batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.69batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.61batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.56batch/s]


Final 0.405704633797207, 81.53669724770643
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.557305
Recursion at depth 100: norm is 67.802437
Recursion at depth 200: norm is 102.646263
Recursion at depth 300: norm is 121.372925
Recursion at depth 400: norm is 131.524567
Recursion at depth 500: norm is 138.124100
Recursion at depth 600: norm is 141.310989
Recursion at depth 700: norm is 143.652771
Recursion at depth 800: norm is 145.509705
Recursion at depth 900: norm is 146.265472
Recursion at depth 999: norm is 147.546951


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 156.27it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.498980
Recursion at depth 100: norm is 66.784882
Recursion at depth 200: norm is 102.601891
Recursion at depth 300: norm is 121.287964
Recursion at depth 400: norm is 132.217911
Recursion at depth 500: norm is 138.447220
Recursion at depth 600: norm is 141.922211
Recursion at depth 700: norm is 143.377274
Recursion at depth 800: norm is 145.739594
Recursion at depth 900: norm is 145.867691
Recursion at depth 999: norm is 146.317139


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:08<00:00, 115.46it/s]


---Perturbing training guid 911---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 31.17batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.74batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.53batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.65batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.53batch/s]


Final 0.4052045426429861, 81.65137614678899
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.767062
Recursion at depth 100: norm is 71.033165
Recursion at depth 200: norm is 107.521828
Recursion at depth 300: norm is 127.188332
Recursion at depth 400: norm is 137.848892
Recursion at depth 500: norm is 144.820190
Recursion at depth 600: norm is 148.367935
Recursion at depth 700: norm is 150.653336
Recursion at depth 800: norm is 152.634995
Recursion at depth 900: norm is 153.351837
Recursion at depth 999: norm is 154.744064


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 155.19it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.705225
Recursion at depth 100: norm is 70.038429
Recursion at depth 200: norm is 107.667404
Recursion at depth 300: norm is 127.245735
Recursion at depth 400: norm is 138.700851
Recursion at depth 500: norm is 145.083908
Recursion at depth 600: norm is 148.746140
Recursion at depth 700: norm is 150.630707
Recursion at depth 800: norm is 153.088013
Recursion at depth 900: norm is 153.075745
Recursion at depth 999: norm is 153.429398


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 125.00it/s]


---Perturbing training guid 679---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.30batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 31.42batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.71batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.82batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.87batch/s]


Final 0.4050941927103065, 81.76605504587155
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.850146
Recursion at depth 100: norm is 72.404839
Recursion at depth 200: norm is 109.514061
Recursion at depth 300: norm is 129.563202
Recursion at depth 400: norm is 140.525574
Recursion at depth 500: norm is 147.537659
Recursion at depth 600: norm is 151.138229
Recursion at depth 700: norm is 153.619125
Recursion at depth 800: norm is 155.559677
Recursion at depth 900: norm is 156.385361
Recursion at depth 999: norm is 157.820190


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 155.38it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 4.788602
Recursion at depth 100: norm is 71.298431
Recursion at depth 200: norm is 109.732124
Recursion at depth 300: norm is 129.706207
Recursion at depth 400: norm is 141.408386
Recursion at depth 500: norm is 147.839096
Recursion at depth 600: norm is 151.732971
Recursion at depth 700: norm is 153.533951
Recursion at depth 800: norm is 156.184708
Recursion at depth 900: norm is 156.099716
Recursion at depth 999: norm is 156.503937


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:08<00:00, 119.70it/s]


---Perturbing training guid 617---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.49batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.64batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.65batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.50batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.86batch/s]


Final 0.4052877769350794, 81.88073394495413
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 5.383257
Recursion at depth 100: norm is 80.388725
Recursion at depth 200: norm is 121.889137
Recursion at depth 300: norm is 144.695862
Recursion at depth 400: norm is 156.655380
Recursion at depth 500: norm is 164.565475
Recursion at depth 600: norm is 168.850739
Recursion at depth 700: norm is 171.318008
Recursion at depth 800: norm is 173.514130
Recursion at depth 900: norm is 174.468628
Recursion at depth 999: norm is 175.921509


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 154.53it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 5.315162
Recursion at depth 100: norm is 79.369843
Recursion at depth 200: norm is 122.113235
Recursion at depth 300: norm is 144.626007
Recursion at depth 400: norm is 157.396133
Recursion at depth 500: norm is 164.886673
Recursion at depth 600: norm is 169.255020
Recursion at depth 700: norm is 171.504471
Recursion at depth 800: norm is 174.144165
Recursion at depth 900: norm is 173.828583
Recursion at depth 999: norm is 174.379166


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 125.30it/s]


---Perturbing training guid 293---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.25batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.60batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.63batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.82batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.73batch/s]


Final 0.4046800692463592, 81.88073394495413
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 5.509279
Recursion at depth 100: norm is 82.287788
Recursion at depth 200: norm is 124.837006
Recursion at depth 300: norm is 148.151215
Recursion at depth 400: norm is 160.343307
Recursion at depth 500: norm is 168.454758
Recursion at depth 600: norm is 172.895065
Recursion at depth 700: norm is 175.449173
Recursion at depth 800: norm is 177.640167
Recursion at depth 900: norm is 178.640747
Recursion at depth 999: norm is 180.077225


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 153.61it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 5.445246
Recursion at depth 100: norm is 81.289658
Recursion at depth 200: norm is 125.013710
Recursion at depth 300: norm is 148.064499
Recursion at depth 400: norm is 161.111496
Recursion at depth 500: norm is 168.800980
Recursion at depth 600: norm is 173.255356
Recursion at depth 700: norm is 175.552261
Recursion at depth 800: norm is 178.308258
Recursion at depth 900: norm is 177.936935
Recursion at depth 999: norm is 178.517288


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:08<00:00, 119.02it/s]


---Perturbing training guid 843---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.28batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.57batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 31.20batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.81batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.65batch/s]


Final 0.40599794562017494, 81.76605504587155
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 5.723131
Recursion at depth 100: norm is 85.718185
Recursion at depth 200: norm is 129.875565
Recursion at depth 300: norm is 154.039871
Recursion at depth 400: norm is 166.901352
Recursion at depth 500: norm is 175.192505
Recursion at depth 600: norm is 179.940109
Recursion at depth 700: norm is 182.449600
Recursion at depth 800: norm is 184.754150
Recursion at depth 900: norm is 185.881989
Recursion at depth 999: norm is 187.404724


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 146.53it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 5.653793
Recursion at depth 100: norm is 84.523460
Recursion at depth 200: norm is 129.943069
Recursion at depth 300: norm is 153.993195
Recursion at depth 400: norm is 167.522720
Recursion at depth 500: norm is 175.483688
Recursion at depth 600: norm is 180.414780
Recursion at depth 700: norm is 182.603516
Recursion at depth 800: norm is 185.570145
Recursion at depth 900: norm is 185.186203
Recursion at depth 999: norm is 185.734680


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 126.47it/s]


---Perturbing training guid 123---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 28.55batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 31.14batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.59batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 31.39batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.55batch/s]


Final 0.4061710361592129, 81.88073394495413
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 5.797697
Recursion at depth 100: norm is 86.834892
Recursion at depth 200: norm is 131.618866
Recursion at depth 300: norm is 156.152405
Recursion at depth 400: norm is 169.137146
Recursion at depth 500: norm is 177.535751
Recursion at depth 600: norm is 182.312698
Recursion at depth 700: norm is 184.850372
Recursion at depth 800: norm is 187.211441
Recursion at depth 900: norm is 188.378082
Recursion at depth 999: norm is 189.868942


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 162.42it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 5.729347
Recursion at depth 100: norm is 85.629501
Recursion at depth 200: norm is 131.678726
Recursion at depth 300: norm is 156.011093
Recursion at depth 400: norm is 169.728363
Recursion at depth 500: norm is 177.809952
Recursion at depth 600: norm is 182.788940
Recursion at depth 700: norm is 185.082199
Recursion at depth 800: norm is 188.024719
Recursion at depth 900: norm is 187.623642
Recursion at depth 999: norm is 188.206329


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:08<00:00, 124.29it/s]


---Perturbing training guid 198---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 28.76batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.36batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.76batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.98batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.78batch/s]


Final 0.40602399776891884, 81.88073394495413
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 5.947155
Recursion at depth 100: norm is 89.129723
Recursion at depth 200: norm is 135.055359
Recursion at depth 300: norm is 160.272842
Recursion at depth 400: norm is 173.608734
Recursion at depth 500: norm is 182.460327
Recursion at depth 600: norm is 187.183685
Recursion at depth 700: norm is 189.839966
Recursion at depth 800: norm is 192.263565
Recursion at depth 900: norm is 193.490829
Recursion at depth 999: norm is 194.938950


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 144.82it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 5.880088
Recursion at depth 100: norm is 87.954781
Recursion at depth 200: norm is 135.231644
Recursion at depth 300: norm is 160.180130
Recursion at depth 400: norm is 174.332626
Recursion at depth 500: norm is 182.563950
Recursion at depth 600: norm is 187.739471
Recursion at depth 700: norm is 190.031189
Recursion at depth 800: norm is 193.079681
Recursion at depth 900: norm is 192.645935
Recursion at depth 999: norm is 193.475754


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 127.56it/s]


---Perturbing training guid 475---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.29batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.61batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.71batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.76batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.92batch/s]


Final 0.40711668229050285, 81.9954128440367
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 6.085615
Recursion at depth 100: norm is 91.222031
Recursion at depth 200: norm is 138.399170
Recursion at depth 300: norm is 164.389511
Recursion at depth 400: norm is 178.163376
Recursion at depth 500: norm is 187.030563
Recursion at depth 600: norm is 191.916031
Recursion at depth 700: norm is 194.520950
Recursion at depth 800: norm is 197.171127
Recursion at depth 900: norm is 198.346329
Recursion at depth 999: norm is 199.906982


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 153.24it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 6.019671
Recursion at depth 100: norm is 90.125275
Recursion at depth 200: norm is 138.642578
Recursion at depth 300: norm is 164.207565
Recursion at depth 400: norm is 178.624863
Recursion at depth 500: norm is 187.050903
Recursion at depth 600: norm is 192.388123
Recursion at depth 700: norm is 194.764008
Recursion at depth 800: norm is 197.904938
Recursion at depth 900: norm is 197.434250
Recursion at depth 999: norm is 198.388199


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 127.19it/s]


---Perturbing training guid 468---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 28.69batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 31.38batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.74batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.63batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 31.07batch/s]


Final 0.4074564576307773, 82.11009174311927
---Computing Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 6.163350
Recursion at depth 100: norm is 92.453560
Recursion at depth 200: norm is 140.265411
Recursion at depth 300: norm is 166.610580
Recursion at depth 400: norm is 180.568741
Recursion at depth 500: norm is 189.556839
Recursion at depth 600: norm is 194.519928
Recursion at depth 700: norm is 197.134140
Recursion at depth 800: norm is 199.827347
Recursion at depth 900: norm is 201.067719
Recursion at depth 999: norm is 202.600281


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 149.28it/s]


---Computing Input Influence Function---
LiSSA reps: 1 and num_iterations: 1000
Recursion at depth 0: norm is 6.098562
Recursion at depth 100: norm is 91.304008
Recursion at depth 200: norm is 140.497284
Recursion at depth 300: norm is 166.437561
Recursion at depth 400: norm is 181.042679
Recursion at depth 500: norm is 189.545715
Recursion at depth 600: norm is 194.938980
Recursion at depth 700: norm is 197.387009
Recursion at depth 800: norm is 200.562332
Recursion at depth 900: norm is 200.110458
Recursion at depth 999: norm is 201.062729


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 127.04it/s]


---Perturbing training guid 237---
---Retraining on perturbed data---


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Initial 0.41442395658465236, 80.27522935779817


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.45batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:01<00:00, 31.67batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.78batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.80batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.91batch/s]


Final 0.4088614702345444, 81.9954128440367


In [51]:
df = pd.concat(hist["loss_df"])
df[df.test_guid == TEST_GUID]

Unnamed: 0,test_guid,logits,pred,label,loss,perturbed_guid,iter
835,835,"[-0.2794927, 0.1258931]",1,1,0.510857,283,0
835,835,"[-0.23010865, 0.07639963]",1,1,0.551591,674,1
835,835,"[-0.21975267, 0.06545623]",1,1,0.560677,636,2
835,835,"[-0.22391771, 0.06937532]",1,1,0.557215,861,3
835,835,"[-0.19686149, 0.042281836]",1,1,0.580707,91,4
835,835,"[-0.18992358, 0.03470982]",1,1,0.587125,486,5
835,835,"[-0.15941639, 0.004119546]",1,1,0.614719,281,6
835,835,"[-0.1041629, -0.051154472]",1,1,0.666994,94,7
835,835,"[-0.0863075, -0.06904605]",1,1,0.684554,693,8
835,835,"[-0.028598854, -0.12658453]",0,1,0.74334,746,9


In [25]:
df = pd.concat(hist["loss_df"])
df[df.test_guid == TEST_GUID]

Unnamed: 0,test_guid,logits,pred,label,loss,perturbed_guid,iter
175,175,"[0.674652, -0.7781812]",0,1,1.663018,455,0
175,175,"[0.72075385, -0.82425386]",0,1,1.73836,861,1
175,175,"[0.745123, -0.84857845]",0,1,1.778663,693,2
175,175,"[0.7943765, -0.8980169]",0,1,1.861358,486,3
175,175,"[0.8068535, -0.90997225]",0,1,1.882031,94,4
175,175,"[0.81793123, -0.9210918]",0,1,1.90088,91,5
175,175,"[0.82862556, -0.93167734]",0,1,1.919008,636,6
175,175,"[0.8473495, -0.95056677]",0,1,1.95119,686,7
175,175,"[0.8550431, -0.9582516]",0,1,1.964397,910,8
175,175,"[0.86175734, -0.9652028]",0,1,1.976157,837,9


True

In [None]:
-

## Scratch

In [None]:
model2, loss_df2 = perform_attack(model, config, train_dataset, test_dataset, TEST_GUID)
loss_df2[loss_df2.test_guid == TEST_GUID]

In [None]:
model3, loss_df3 = perform_attack(
    model2, config, train_dataset, test_dataset, TEST_GUID
)
loss_df3[loss_df3.test_guid == TEST_GUID]

In [None]:
-

## Compute Influence Function

In [None]:
TEST_GUID = 716

infl = influence.compute_influence(
    full_model,
    TEST_GUID,
    param_influence=list(full_model.classifier.parameters()),
    train_dataset=train_dataset,
    test_dataset=test_dataset,
    use_bert_embeddings=USE_BERT_EMBEDDINGS,
    lissa_r=2,
    lissa_depth=1,
    damping=5e-3,
    scale=100,
)

In [None]:
# Most negative influence is most helpful
helpful_idxs = np.argsort(infl)[:10]
helpful_idxs

In [None]:
np.take(infl, helpful_idxs)

## Compute Input Influence Function

In [None]:
input_infl = influence.compute_input_influence(
    full_model,
    TEST_GUID,
    param_influence=list(full_model.classifier.parameters()),
    train_dataset=train_dataset,
    test_dataset=test_dataset,
    use_bert_embeddings=USE_BERT_EMBEDDINGS,
    lissa_r=2,
    lissa_depth=1,
    damping=5e-3,
    scale=100,
    training_indices=helpful_idxs,
)

In [None]:
best_idx = helpful_idxs[0]
best_idx

## Perturb the Best Idx

In [None]:
def get_guid(dataset, data_guid):
    pass


def perturb_datapoint(dataset, data_guid, perturbation):
    """This modifies the dataset in place"""
    device = utils.get_device()
    guid, inputs, attn_mask, labels = [t[data_guid] for t in train_dataset.tensors]
    assert guid.squeeze() == data_guid

    inputs_before = inputs.detach().clone()
    inputs += perturbation.to(device)
    return inputs_before, inputs

In [None]:
alpha = 1e-2

perturb = alpha * input_infl[best_idx]
before, after = perturb_datapoint(train_dataset, best_idx, perturb)

## Retrain model with new dataset

In [None]:
model, df, full_test_loss, full_test_acc = train_utils.train_bert_model(
    train_dataset=train_dataset,
    test_dataset=test_dataset,
    config=config,
    use_bert_embeddings=USE_BERT_EMBEDDINGS,
)

In [None]:
test_loss = df[df.test_guid == TEST_GUID].loss.squeeze()
test_loss

In [None]:
df[df.test_guid == TEST_GUID]

In [None]:
--

In [None]:
before

In [None]:
after

In [None]:
guid, inputs, attn_mask, labels = [t[data_guid] for t in train_dataset.tensors]

inputs[0][0]

In [None]:
guid, inputs, attn_mask, labels = [t[262] for t in train_dataset.tensors]

In [None]:
inputs[0][0]

In [None]:
# inputs[0]

In [None]:
inputs.shape

In [None]:
inputs += perturb.to(device)

In [None]:
inputs

In [None]:
input_infl[262]

In [None]:
import pickle

with open("input_infl.pkl", "wb") as fh:
    pickle.dump(input_infl, fh)

In [None]:
262 in helpful_idxs

In [None]:
-

## Word2Vec

In [None]:
from gensim.models import KeyedVectors, Word2Vec

w2v = KeyedVectors.load("word2vec/glove-twitter-100.kv")

In [None]:
w2v["spielberg"]