In [1]:
import captum
import torch
import thermostat
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from captum.attr import IntegratedGradients, FeatureAblation
import onnxruntime as ort
from tqdm import tqdm
from helpers import load_distilbert
import json
from baselines import ZeroBaselineFactory
from evaluators import ProportionalityEvaluator


In [2]:
with open("../data/imdb-distilbert-1000.json", "r") as fp:
    dataset = json.load(fp)

In [3]:
model = load_distilbert(from_notebook=1)
evaluator = ProportionalityEvaluator(model=model, baseline_factory=ZeroBaselineFactory)




In [9]:
forward_func = lambda x: torch.tensor([model(np.array(x)[0])])
method = FeatureAblation(forward_func)

In [10]:
attributions = []
for sample in tqdm(dataset[:10]):
    input_ids = torch.tensor([sample["input_ids"]]).long()
    target_class = sample["label"]
    attribution = method.attribute(input_ids,target=target_class, method='gausslegendre')[0]\
        .detach().numpy()

    tpn= evaluator.compute_tpn(observation=input_ids[0].detach().numpy(),
                               attribution_values=attribution)
    tps= evaluator.compute_tps(observation=input_ids[0].detach().numpy(),
                               attribution_values=attribution)

    attributions.append(dict(idx=sample['idx'], attribution_values=attribution.tolist(),
                             tpn=tpn, tps=tps))

100%|██████████| 10/10 [00:02<00:00,  3.60it/s]


In [11]:
with open("../data/feature-ablation.json", "w") as fp:
    json.dump(attributions, fp)