In [1]:
WRITE_TOKEN = "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"
READ_TOKEN = "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"
PATH = 'saradiaz/distilbert-base-uncased-simpleEng-classifier'

# TruLens Observability

In [3]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
import torch 

# Wrap all of the necessary components.
class SimpleEnglishClassifier:
    model_name = PATH.split('/')[-1]

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 

    tokenizer = DistilBertTokenizer.from_pretrained(PATH, use_safetensors=True )
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})

    model = DistilBertForSequenceClassification.from_pretrained(PATH, use_safetensors=True).to(device)

    labels = ["simple", "complex"]

    SIMPLE = labels.index('simple')
    COMPLEX = labels.index('complex')

task = SimpleEnglishClassifier()

Model Wrapper

In [42]:
# %pip install trulens

In [4]:
from trulens.nn.models import get_model_wrapper
from trulens.nn.quantities import ClassQoI
from trulens.nn.attribution import IntegratedGradients
from trulens.nn.attribution import Cut, OutputCut
from trulens.utils.typing import ModelInputs
from IPython.display import display

task.wrapper = get_model_wrapper(task.model, device=task.device)

INFO: lib level=1
INFO: root level=30
INFO: Detected pytorch backend for <class 'transformers.models.distilbert.modeling_distilbert.DistilBertForSequenceClassification'>.
INFO: Changing backend from None to Backend.PYTORCH.
INFO: If this seems incorrect, you can force the correct backend by passing the `backend` parameter directly into your get_model_wrapper call.


Attributions

In [5]:
task.wrapper.print_layer_names()

'distilbert_embeddings_word_embeddings':	Embedding(30522, 768, padding_idx=0)
'distilbert_embeddings_position_embeddings':	Embedding(512, 768)
'distilbert_embeddings_LayerNorm':	LayerNorm((768,), eps=1e-12, elementwise_affine=True)
'distilbert_embeddings_dropout':	Dropout(p=0.1, inplace=False)
'distilbert_transformer_layer_0_attention_dropout':	Dropout(p=0.1, inplace=False)
'distilbert_transformer_layer_0_attention_q_lin':	Linear(in_features=768, out_features=768, bias=True)
'distilbert_transformer_layer_0_attention_k_lin':	Linear(in_features=768, out_features=768, bias=True)
'distilbert_transformer_layer_0_attention_v_lin':	Linear(in_features=768, out_features=768, bias=True)
'distilbert_transformer_layer_0_attention_out_lin':	Linear(in_features=768, out_features=768, bias=True)
'distilbert_transformer_layer_0_sa_layer_norm':	LayerNorm((768,), eps=1e-12, elementwise_affine=True)
'distilbert_transformer_layer_0_ffn_dropout':	Dropout(p=0.1, inplace=False)
'distilbert_transformer_layer_0

Parameters

In [6]:
infl_max = IntegratedGradients(
    model = task.wrapper,
    doi_cut=Cut('distilbert_embeddings_word_embeddings'),
    qoi_cut=OutputCut(accessor=lambda o: o['logits'])
)

# Alternatively we can look at a particular class:
infl_complex = IntegratedGradients(
    model = task.wrapper,
    doi_cut=Cut('distilbert_embeddings_word_embeddings'),
    qoi=ClassQoI(task.COMPLEX),
    qoi_cut=OutputCut(accessor=lambda o: o['logits'])
)

infl_simple = IntegratedGradients(
    model = task.wrapper,
    doi_cut=Cut('distilbert_embeddings_word_embeddings'),
    qoi=ClassQoI(task.SIMPLE),
    qoi_cut=OutputCut(accessor=lambda o: o['logits'])
)

Visualization

In [9]:
from trulens.visualizations import NLP

V = NLP(
    wrapper=task.wrapper,
    labels=task.labels,
    decode=lambda x: task.tokenizer.decode(x),
    tokenize=lambda sentences: ModelInputs(kwargs=task.tokenizer(sentences, padding=True, return_tensors='pt')).map(lambda t: t.to(task.device)),
    # huggingface models can take as input the keyword args as per produced by their tokenizers.

    input_accessor=lambda x: x.kwargs['input_ids'],
    # for huggingface models, input/token ids are under input_ids key in the input dictionary

    output_accessor=lambda x: x['logits'],
    # and logits under 'logits' key in the output dictionary

    hidden_tokens=set([task.tokenizer.pad_token_id])
    # do not display these tokens
)

In [10]:
texts = [
    "Gothic cathedrals are important examples of gothic architecture. Gothic architecture began in Western Europe in the Late Middle Ages. Gothic architecture started in the 12th century, and lasted until the 16th century. Important features of the style are pointed arches, ribbed vaults and flying buttresses.",
    "Gothic architecture is an architectural style that was prevalent in Europe from the late 12th to the 16th century, during the High and Late Middle Ages, surviving into the 17th and 18th centuries in some areas.[1] It evolved from Romanesque architecture and was succeeded by Renaissance architecture. It originated in the Île-de-France and Picardy regions of northern France. The style at the time was sometimes known as opus Francigenum (lit. 'French work');[2] the term Gothic was first applied contemptuously during the later Renaissance, by those ambitious to revive the architecture of classical antiquity."
    ]

print("QOI = MAX PREDICTION")
display(V.token_attribution(texts, infl_max))

print("QOI = COMPLEX")
display(V.token_attribution(texts, infl_complex))

print("QOI = SIMPLE")
display(V.token_attribution(texts, infl_simple))

QOI = MAX PREDICTION


: 

Baselines

In [None]:
from trulens.utils.nlp import token_baseline

inputs_baseline_ids, inputs_baseline_embeddings = token_baseline(
    keep_tokens=set([task.tokenizer.cls_token_id, task.tokenizer.sep_token_id]),
    # Which tokens to preserve.

    replacement_token=task.tokenizer.pad_token_id,
    # What to replace tokens with.

    input_accessor=lambda x: x.kwargs['input_ids'],

    ids_to_embeddings=task.model.get_input_embeddings()
    # Callable to produce embeddings from token ids.
    )


In [70]:
print("originals=", task.tokenizer.batch_decode(inputs['input_ids']))

baseline_word_ids = inputs_baseline_ids(model_inputs=ModelInputs(args=[], kwargs=inputs))
print("baselines=", task.tokenizer.batch_decode(baseline_word_ids))

originals= ['[CLS] however, ringe notes that while this etymology is semantically plausible, a word meaning " brown " of this form cannot be found in proto - indo - european. he suggests instead that " bear " is from the proto - indo - european word * gʰwer - ~ * gʰwer " wild animal ". [SEP]', '[CLS] the neolithic revolution was the first agricultural revolution. it was a gradual change from nomadic hunting and gathering communities to agriculture and settlement. [ 1 ] it changed the way of life of the communities which made the change. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]']
baselines= ['[CLS] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD

In [79]:
infl_complex_baseline = IntegratedGradients(
    model = task.wrapper,
    resolution=10,
    baseline = inputs_baseline_embeddings,
    doi_cut=Cut('distilbert_embeddings_word_embeddings'),
    qoi=ClassQoI(task.COMPLEX),
    qoi_cut=OutputCut(accessor=lambda o: o['logits'])
)

infl_simple_baseline = IntegratedGradients(
    model = task.wrapper,
    resolution=10,
    baseline = inputs_baseline_embeddings,
    doi_cut=Cut('distilbert_embeddings_word_embeddings'),
    qoi=ClassQoI(task.SIMPLE),
    qoi_cut=OutputCut(accessor=lambda o: o['logits'])
)

print("QOI = COMPLEX WITH BASELINE")
display(V.token_attribution(texts, infl_complex_baseline))

print("QOI = SIMPLE WITH BASELINE")
display(V.token_attribution(texts, infl_simple_baseline))

QOI = COMPLEX WITH BASELINE


QOI = SIMPLE WITH BASELINE
