# Install requirements / Clone repository

In [14]:
! git clone "https://github.com/mohsenfayyaz/DecompX"
! pip install datasets==1.18.3
! pip install transformers==4.18.0

fatal: destination path 'DecompX' already exists and is not an empty directory.


# Config (Change model and sentence here)

In [37]:
import torch
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import matplotlib
from IPython.display import display, HTML
from transformers import AutoTokenizer
from DecompX.src.decompx_utils import DecompXConfig
from DecompX.src.modeling_bert import BertForSequenceClassification
from DecompX.src.modeling_roberta import RobertaForSequenceClassification

BERT_MODELS = ["lyeonii/bert-tiny", "lyeonii/bert-mini", "lyeonii/bert-small", "lyeonii/bert-medium", "google-bert/bert-base-uncased", "google-bert/bert-large-uncased"]
ROBERTA_MODELS = ["smallbenchnlp/roberta-small","JackBAI/roberta-medium","FacebookAI/roberta-base", "FacebookAI/roberta-large"]
SENTENCES = [
    "A deep and meaningful film.",
    "a good piece of work more often than not.",
]
CONFIGS = {
    "DecompX":
        DecompXConfig(
            include_biases=True,
            bias_decomp_type="absdot",
            include_LN1=True,
            include_FFN=True,
            FFN_approx_type="GeLU_ZO",
            include_LN2=True,
            aggregation="vector",
            include_classifier_w_pooler=True,
            tanh_approx_type="ZO",
            output_all_layers=True,
            output_attention=None,
            output_res1=None,
            output_LN1=None,
            output_FFN=None,
            output_res2=None,
            output_encoder=None,
            output_aggregated="norm",
            output_pooler="norm",
            output_classifier=True,
        ),
}

# Load corresponding model/tokenizer

In [16]:
def load_model_and_tokenizer(model_name, input_sentences):
  model = None
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenized_sentence = tokenizer(input_sentences, return_tensors="pt", padding=True)
  batch_lengths = tokenized_sentence['attention_mask'].sum(dim=-1)
  if "roberta" in model_name:
      model = RobertaForSequenceClassification.from_pretrained(model_name)
  elif "bert" in model_name:
      model = BertForSequenceClassification.from_pretrained(model_name)
  else:
      raise Exception(f"Not implented model: {model_name}")
  return model, tokenizer, tokenized_sentence, batch_lengths

# Compute DecompX

In [30]:
def compute_decompx(model, tokenizer, tokenized_sentence, batch_lengths):
  # logits ~ (8, 2)
  # hidden_states ~ (13, 8, 55, 768)
  # decompx_last_layer_outputs.aggregated ~ (1, 8, 55, 55)
  # decompx_last_layer_outputs.pooler ~ (1, 8, 55)
  # decompx_last_layer_outputs.classifier ~ (8, 55, 2)
  # decompx_all_layers_outputs.aggregated ~ (12, 8, 55, 55)
  with torch.no_grad():
    model.eval()
    logits, hidden_states, decompx_last_layer_outputs, decompx_all_layers_outputs = model(
        **tokenized_sentence,
        output_attentions=False,
        return_dict=False,
        output_hidden_states=True,
        decompx_config=CONFIGS["DecompX"]
    )

  predictions = torch.argmax(logits, dim=1).cpu().tolist()  # Predicted class
  decompx_outputs = {
    "tokens": [tokenizer.convert_ids_to_tokens(tokenized_sentence["input_ids"][i][:batch_lengths[i]]) for i in range(len(labels))],
    "logits": logits.cpu().detach().numpy().tolist(),  # (batch, classes)
    "cls": hidden_states[-1][:, 0, :].cpu().detach().numpy().tolist(),# Last layer & only CLS -> (batch, emb_dim)
    "predictions": predictions
  }

  ### decompx_last_layer_outputs.aggregated ~ (1, 8, 55, 55) ###
  importance = np.array([g.squeeze().cpu().detach().numpy() for g in decompx_last_layer_outputs.aggregated]).squeeze()  # (batch, seq_len, seq_len)
  importance = [importance[j][:batch_lengths[j],:batch_lengths[j]] for j in range(len(importance))]
  decompx_outputs["importance_last_layer_aggregated"] = importance

  ### decompx_last_layer_outputs.pooler ~ (1, 8, 55) ###
  importance = np.array([g.squeeze().cpu().detach().numpy() for g in decompx_last_layer_outputs.pooler]).squeeze()  # (batch, seq_len)
  importance = [importance[j][:batch_lengths[j]] for j in range(len(importance))]
  decompx_outputs["importance_last_layer_pooler"] = importance

  ### decompx_last_layer_outputs.classifier ~ (8, 55, 2) ###
  importance = np.array([g.squeeze().cpu().detach().numpy() for g in decompx_last_layer_outputs.classifier]).squeeze()  # (batch, seq_len, classes)
  importance = [importance[j][:batch_lengths[j], :] for j in range(len(importance))]
  decompx_outputs["importance_last_layer_classifier"] = importance

  ### decompx_all_layers_outputs.aggregated ~ (12, 8, 55, 55) ###
  importance = np.array([g.squeeze().cpu().detach().numpy() for g in decompx_all_layers_outputs.aggregated])  # (layers, batch, seq_len, seq_len)
  importance = np.einsum('lbij->blij', importance)  # (batch, layers, seq_len, seq_len)
  importance = [importance[j][:, :batch_lengths[j], :batch_lengths[j]] for j in range(len(importance))]
  decompx_outputs["importance_all_layers_aggregated"] = importance

  decompx_outputs_df = pd.DataFrame(decompx_outputs)

  return decompx_outputs_df, importance

# Visualization

In [31]:
def print_importance(importance, tokenized_text, discrete=False, prefix="", no_cls_sep=False):
    """
    importance: (sent_len)
    """
    if no_cls_sep:
        importance = importance[1:-1]
        tokenized_text = tokenized_text[1:-1]
    importance = importance / np.abs(importance).max() / 1.5  # Normalize
    if discrete:
        importance = np.argsort(np.argsort(importance)) / len(importance) / 1.6

    html = "<pre style='color:black; padding: 3px;'>"+prefix
    for i in range(len(tokenized_text)):
        if importance[i] >= 0:
            rgba = matplotlib.colormaps.get_cmap('Greens')(importance[i])   # Wistia
        else:
            rgba = matplotlib.colormaps.get_cmap('Reds')(np.abs(importance[i]))   # Wistia
        text_color = "color: rgba(255, 255, 255, 1.0); " if np.abs(importance[i]) > 0.9 else ""
        color = f"background-color: rgba({rgba[0]*255}, {rgba[1]*255}, {rgba[2]*255}, {rgba[3]}); " + text_color
        html += (f"<span style='"
                 f"{color}"
                 f"border-radius: 5px; padding: 3px;"
                 f"font-weight: {int(800)};"
                 "'>")
        html += tokenized_text[i].replace('<', "[").replace(">", "]")
        html += "</span> "
    display(HTML(html))
#     print(html)
    return html

def print_preview(model, tokenizer, tokenized_sentence, batch_lengths, labels, idx=0, discrete=False):
    NO_CLS_SEP = False
    df, _ = compute_decompx(model, tokenizer, tokenized_sentence, batch_lengths)

    actual_label = labels[idx]
    predicted_label = df["predictions"][idx]
    print(f"Actual Label: {actual_label} (Non-toxic: 0, Toxic: 1)")
    print(f"Predicted Label: {predicted_label} (Non-toxic: 0, Toxic: 1)")

    for col in ["importance_last_layer_aggregated", "importance_last_layer_classifier"]:
        if col in df and df[col][idx] is not None:
            if "aggregated" in col:
                sentence_importance = df[col].iloc[idx][0, :]
            if "classifier" in col:
                for label in range(df[col].iloc[idx].shape[-1]):
                    sentence_importance = df[col].iloc[idx][:, label]
                    print_importance(
                        sentence_importance,
                        df["tokens"].iloc[idx],
                        prefix=f"{col.split('_')[-1]} Label{label}:".ljust(20),
                        no_cls_sep=NO_CLS_SEP,
                        discrete=False
                    )
                break
                sentence_importance = df[col].iloc[idx][:, df["label"].iloc[idx]]
            if "pooler" in col:
                sentence_importance = df[col].iloc[idx]
            print_importance(
                sentence_importance,
                df["tokens"].iloc[idx],
                prefix=f"{col.split('_')[-1]}:".ljust(20),
                no_cls_sep=NO_CLS_SEP,
                discrete=discrete
            )
    print("------------------------------------")
    return df

In [19]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset("shlomihod/civil-comments-wilds")

# Print the size of the dataset
for split in dataset:
    print(f"Split: {split}, Size: {len(dataset[split])}")



  0%|          | 0/3 [00:00<?, ?it/s]

Split: train, Size: 269038
Split: validation, Size: 45180
Split: test, Size: 133782


In [20]:
dataset['train'][5:13]

{'text': ['JackJohnson5:\n"And they say Trump supporters are uneducated!"\n\n-\n\nNo, not necessarily uneducated, just too emotionally immature to admit that they\'ve bought a lie.\n\nBeing wrong hurts, so they choose to accept only the information that confirms what they already believe.',
  "Trump could accidently push a button and Canada \nwould be on it's back with a boot and knife on it's\nthroat. That will be the outcome of your wise crack.",
  "CNN fake News Network Jim Acosta is a douchebag the United States has five intelligence agencies and 17 branches there was three agencies said Russia did it CIA FBI NSA like I said he's a douchebag",
  'Are you a Pilgrim?\nWhy arn\'t you growing your own veggies? \nAnd do you haul them in a covered wagon?\n\nThankfully it isn\'t up to "white man" to determine what is traditional for the First Peoples of Alaska.\nBut they try to all the time and it looks pathetic.',
  'Any way you put it or say it,  or con-cock it,  word play, semantics, p

In [32]:
def visual_evaluation(model_name, examples, labels):
    correct_predictions = 0
    total_predictions = 0

    # Load the model and tokenizer
    model, tokenizer, tokenized_sentence, batch_lengths = load_model_and_tokenizer(model_name, examples)

    # Evaluate each example
    for i in range(len(examples)):
        df = print_preview(model, tokenizer, tokenized_sentence, batch_lengths, labels, idx=i)

        # Compute accuracy
        if df["predictions"][i] == labels[i]:
            correct_predictions += 1
        total_predictions += 1

    # Print accuracy
    accuracy = correct_predictions / total_predictions
    print(f"Accuracy for {model_name}: {accuracy:.2%}")

In [35]:
examples = dataset['train'][10:12]['text']
labels = dataset['train'][10:12]['label']

In [38]:
for model_name in BERT_MODELS + ROBERTA_MODELS:
    print(f"Evaluating Model: {model_name}")
    visual_evaluation(model_name, examples, labels)

Evaluating Model: google-bert/bert-base-uncased


Some weights of the model checkpoint at google-bert/bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model c

Actual Label: 0 (Non-toxic: 0, Toxic: 1)
Predicted Label: 0 (Non-toxic: 0, Toxic: 1)


------------------------------------


KeyboardInterrupt: 