In [1]:
%load_ext autoreload
%autoreload 2
import os

# os.environ["WANDB_SILENT"] = "true"

In [2]:
import argparse
import os
import statistics
from pathlib import Path

import numpy as np
import pandas as pd
import torch
import yaml
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset, TensorDataset
from tqdm import tqdm

import wandb
from src import BertClassifier
from src import datasets as data_utils
from src import influence as inf_utils
from src import train_utils, utils
from src.datasets import create_loo_dataset, create_test_sst2, create_train_sst2

device = utils.get_device()

config = utils.load_config(
    "model_params/bert_classifier.yaml", epochs=5, num_training_examples=1000
)

# Create datasets
train_dataset = create_train_sst2(
    num_samples=config["num_training_examples"],
    tokenizer_name=config["bert_model_name"],
    max_seq_len=config["max_sequence_length"],
    device=device,
)

test_dataset = create_test_sst2(
    tokenizer_name=config["bert_model_name"],
    max_seq_len=config["max_sequence_length"],
    device=device,
)
test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=1)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 14311.86it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 872/872 [00:00<00:00, 10798.86it/s]


In [27]:
import os

import numpy as np
import pandas as pd

from src import influence as inf_utils
from src import train_utils, utils, BertClassifier
from src.datasets import (create_loo_dataset, create_test_sst2,
                          create_train_sst2)

device = utils.get_device()

model, config = BertClassifier.load_model(
    "model_params/bert-classifier-epoch5-1000.pt"
)

# Create datasets
train_dataset = create_train_sst2(
    num_samples=config["num_training_examples"],
    tokenizer_name=config["bert_model_name"],
    max_seq_len=config["max_sequence_length"],
    device=device,
)

test_dataset = create_test_sst2(
    tokenizer_name=config["bert_model_name"],
    max_seq_len=config["max_sequence_length"],
    device=device,
)

# TODO thread pool

param_infl = list(model.classifier.parameters())

influences = []
for test_guid in range(872):
    infl = inf_utils.compute_influence(
        full_model=model,
        test_guid=test_guid,
        param_influence=param_infl,
        train_dataset=train_dataset,
        test_dataset=test_dataset,
        lissa_r=2,
        lissa_depth=1,
        damping=5e-3,
        scale=100,        
    )
    df = pd.DataFrame(data=infl, index=range(len(infl)), columns=['influence'])
    df = df.rename_axis('train_guid').reset_index()
    df['test_guid'] = test_guid
    df.to_csv(f"{output_dir}/influence-testguid-{test_guid}",index=False)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 9523.94it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████

LiSSA reps: 2 and num_iterations: 1000
Recursion at depth 0: norm is 0.674420
Recursion at depth 200: norm is 11.000627
Recursion at depth 400: norm is 14.677641
Recursion at depth 600: norm is 15.936044
Recursion at depth 800: norm is 16.382391
Recursion at depth 999: norm is 16.561853
Recursion at depth 0: norm is 0.658539
Recursion at depth 200: norm is 10.982577
Recursion at depth 400: norm is 14.674032
Recursion at depth 600: norm is 15.948548
Recursion at depth 800: norm is 16.419153
Recursion at depth 999: norm is 16.566097


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:07<00:00, 136.78it/s]


LiSSA reps: 2 and num_iterations: 1000
Recursion at depth 0: norm is 2.898361
Recursion at depth 200: norm is 48.623352
Recursion at depth 400: norm is 64.839676
Recursion at depth 600: norm is 69.729927
Recursion at depth 800: norm is 71.471901
Recursion at depth 999: norm is 72.188927
Recursion at depth 0: norm is 2.738004
Recursion at depth 200: norm is 48.691811
Recursion at depth 400: norm is 64.725838
Recursion at depth 600: norm is 69.571373
Recursion at depth 800: norm is 71.711586
Recursion at depth 999: norm is 72.354538


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 144.78it/s]


LiSSA reps: 2 and num_iterations: 1000
Recursion at depth 0: norm is 0.487176


KeyboardInterrupt: 

In [32]:
d = pd.concat(influences)
d[d['train_guid'] == 0]

Unnamed: 0,train_guid,influence,test_guid
0,0,0.000415,0
0,0,0.114924,1


## Train Model on Full Data

In [3]:
full_model, fdf, full_test_loss, full_test_acc = train_utils.train_bert_model(
    train_dataset, test_dataset, config
)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
[34m[1mwandb[0m: Currently logged in as: [33mpatcao[0m. Use [1m`wandb login --relogin`[0m to force relogin


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 25.43batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 30.30batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.48batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.63batch/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:02<00:00, 29.72batch/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
train/accuracy,▁▆███
train/batch_loss,▆▇▆█▅▅▄▅▄▄▄▃▄▄▃▂▃▃▄▆▄▂▂▄▃▃▁▄▁▂▄▁▅▁▅▃▃▂▃▅
train/loss,█▄▂▁▁

0,1
epoch,5.0
test/accuracy,80.27523
test/loss,0.41442
train/accuracy,85.21825
train/batch_loss,0.13145
train/loss,0.36353


In [7]:
model, config = BertClassifier.load_model(
    "model_params/bert-classifier-epoch5-1000.pt"
)

df, loss, acc = train_utils.evaluate_loss(model, test_dataloader)
df, loss, acc

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


(     test_guid                     logits  pred  label      loss
 0            0     [-2.065672, 1.6855112]     1      1  0.023218
 1            1     [1.207626, -1.0739517]     0      0  0.097238
 2            2     [-2.231233, 1.8515978]     1      1  0.016719
 3            3    [-1.3567762, 1.1909542]     1      1  0.075348
 4            4  [0.46830642, -0.72701955]     0      0  0.264366
 ..         ...                        ...   ...    ...       ...
 867        867    [-0.9934867, 0.8819245]     1      0  2.018032
 868        868  [-0.49477586, 0.55733645]     1      1  0.299511
 869        869   [-0.8512296, 0.39946464]     1      0  1.502469
 870        870  [-0.0993969, -0.13528356]     0      0  0.675365
 871        871    [-1.6719589, 1.1752533]     1      1  0.056386
 
 [872 rows x 5 columns],
 0.41442395658465236,
 80.27522935779817)

In [6]:
-

SyntaxError: invalid syntax (476313318.py, line 1)

In [None]:
# Create Bert model
model = BertClassifier.create_bert_classifier(
    config["bert_model_name"],
    classifier_type=config["classifier_type"],
    classifier_hidden_size=config["classifier_hidden_size"],
    classifier_drop_out=config["classifier_drop_out"],
    classifier_init_state_path=config["classifier_init_state_path"],
    freeze_bert=True,
)

In [None]:
TEST_GUID = 716

fdf[fdf.test_guid == TEST_GUID]

In [None]:
np.argsort(fdf.loss)[300]

In [None]:
fdf[fdf.test_guid == 716]

## Compute Loss Influence

In [None]:
param_infl = list(full_model.classifier.parameters())
infl = inf_utils.compute_influence(
    full_model=full_model,
    test_guid=TEST_GUID,
    param_influence=param_infl,
    train_dataset=train_dataset,
    test_dataset=test_dataset,
    lissa_r=2,
    lissa_depth=1,
    damping=5e-3,
    scale=100,
    # training_indices=list(range(15)),
)

## Compute LOO Chart

In [None]:
def centered_percentile_idxs(infl, remove_length):
    half = int(len(infl) / 2)
    start_index = max(0, half - int(remove_length / 2))
    end_index = start_index + remove_length
    return np.argsort(infl)[start_index:end_index]

In [None]:
def compute_loo_sweep(test_guid: int) -> pd.DataFrame:
    loo_dfs = []
    # np.arange(0.05, 0.8, 0.05)
    for remove_pct in [0.1]:
        remove_length = int(remove_pct * len(train_dataset))

        # Remove random indices
        remove_idxs = np.random.randint(
            low=0, high=len(train_dataset), size=remove_length
        )
        loo_dataset = create_loo_dataset(train_dataset, remove_idxs)
        _, rdf, rad_test_loss, rand_test_acc = train_utils.train_bert_model(
            loo_dataset,
            test_dataset,
            config,
        )
        rdf["type"] = "rand"

        # Remove top influence score
        remove_idxs = np.argsort(-infl)[:remove_length]
        loo_dataset = create_loo_dataset(train_dataset, remove_idxs)
        _, tdf, rad_test_loss, rand_test_acc = train_utils.train_bert_model(
            loo_dataset, test_dataset, config
        )
        tdf["type"] = "top"

        # Remove bottom influence score
        remove_idxs = np.argsort(infl)[:remove_length]
        loo_dataset = create_loo_dataset(train_dataset, remove_idxs)
        _, bdf, rad_test_loss, rand_test_acc = train_utils.train_bert_model(
            loo_dataset, test_dataset, config
        )
        bdf["type"] = "bot"

        # Remove near 0 influence score
        remove_idxs = centered_percentile_idxs(infl, remove_length)
        loo_dataset = create_loo_dataset(train_dataset, remove_idxs)
        _, zdf, rad_test_loss, rand_test_acc = train_utils.train_bert_model(
            loo_dataset, test_dataset, config
        )
        zdf["type"] = "zero"

        df = pd.concat([rdf, tdf, bdf, zdf], axis=0)
        df["remove_pct"] = remove_pct

        loo_dfs.append(df)
    return pd.concat(loo_dfs)

In [None]:
df = compute_loo_sweep(716)
# df.to_csv('loo_dfs_0.csv', index=False)

In [None]:
df[(df.type == "rand") & (df.test_guid == TEST_GUID)]

In [None]:
df[(df.type == "top") & (df.test_guid == TEST_GUID)]

In [None]:
df[(df.type == "bot") & (df.test_guid == TEST_GUID)]

In [None]:
df[(df.type == "zero") & (df.test_guid == TEST_GUID)]

### Plot

In [None]:
TEST_GUID = 716
base_line_loss = fdf[fdf.test_guid == TEST_GUID].loss.squeeze()

rand = df[(df.type == "rand") & (df.test_guid == TEST_GUID)]
rand = rand[["remove_pct", "loss"]]

zero = df[(df.type == "zero") & (df.test_guid == TEST_GUID)]
zero = zero[["remove_pct", "loss"]]

top = df[(df.type == "top") & (df.test_guid == TEST_GUID)]
top = top[["remove_pct", "loss"]]

bot = df[(df.type == "bot") & (df.test_guid == TEST_GUID)]
bot = bot[["remove_pct", "loss"]]

In [None]:
base_line_loss

In [None]:
from matplotlib import pyplot as plt

plt.plot(rand.remove_pct, rand.loss, "g-")
plt.plot(zero.remove_pct, zero.loss, "g--")

# plt.plot(top.remove_pct, top.loss, 'r-')
plt.plot(bot.remove_pct, bot.loss, "b-")

In [None]:
rand

## Remove Random 10%

In [None]:
remove_pct = 0.1


remove_length = int(remove_pct * len(train_dataset))


remove_idxs = np.random.randint(low=0, high=max_idx, size=num_indices)
loo_dataset = create_loo_dataset(train_dataset, remove_idxs)

rand_model, rdf, rad_test_loss, rand_test_acc = train_model(
    loo_dataset, test_dataset, config
)

In [None]:
rdf[rdf.test_guid == TEST_GUID]

## Remove Top 10% Influences

In [None]:
remove_pct = 0.1
remove_length = int(remove_pct * len(train_dataset))

top_indxs = np.argsort(-infl)[:remove_length]
loo_dataset = create_loo_dataset(train_dataset, top_indxs)

t_model, tdf, top_test_loss, top_test_acc = train_model(
    loo_dataset, test_dataset, config
)

In [None]:
tdf[tdf.test_guid == TEST_GUID]

## Remove Bottom 10% Influences

In [None]:
remove_pct = 0.1
remove_length = int(remove_pct * len(train_dataset))

top_indxs = np.argsort(infl)[:remove_length]
loo_dataset = create_loo_dataset(train_dataset, top_indxs)

b_model, bdf, bot_test_loss, bot_test_acc = train_model(
    loo_dataset, test_dataset, config
)

In [None]:
fdf[fdf.test_guid == TEST_GUID]

In [None]:
rdf[rdf.test_guid == TEST_GUID]

In [None]:
tdf[tdf.test_guid == TEST_GUID]

In [None]:
bdf[bdf.test_guid == TEST_GUID]

In [None]:
import src.datasets as datasets

datasets.get_test_example(TEST_GUID).sentence.squeeze()

In [None]:
fdf.sort_values("loss", ascending=False)