# Configs

In [1]:
config = {
    'model_id': 'Equall/Saul-7B-Base',
    'sequence_id': 'facet-1',
    'text_sequence': "Section 2(s) of Industrial Disputes Act: 'workman' means any person (including an apprentice) employed in any industry to do any manual, unskilled, skilled, technical, operational, clerical or supervisory work for hire or reward"
}
safe_model_id = config['model_id'].replace('/', '')
safe_sequence_id = config['sequence_id'].replace('/', '')

# Installs and Secrets

In [2]:
!pip install -U bertviz -q

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/157.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m153.6/157.6 kB[0m [31m11.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m157.6/157.6 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.5/12.5 MB[0m [31m90.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.7/82.7 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
from transformers import AutoTokenizer, AutoModel, AutoModelForCausalLM, utils
import numpy as np
import torch

In [4]:
hf_token = ''

# Helper Functions
From https://github.com/jessevig/bertviz/blob/master/bertviz/util.py, as implemented in Vig (2019), [A Multiscale Visualization of Attention in the Transformer Model](https://aclanthology.org/P19-3007.pdf) (ACL System Demonstrations 2019).

In [5]:
def num_layers(attention):
    return len(attention)


def num_heads(attention):
    return attention[0][0].size(0)


def format_special_chars(tokens):
    return [t.replace('Ġ', ' ').replace('▁', ' ').replace('</w>', '') for t in tokens]


def format_attention(attention, layers=None, heads=None): # takes a set of attention matrices (one per layer), removes the batch dimension (if it exists), and then stacks them into a single tensor with shape (num_layers, num_heads, seq_len, seq_len)
    if layers:
        attention = [attention[layer_index] for layer_index in layers]
    squeezed = []
    for layer_attention in attention:
        # 1 x num_heads x seq_len x seq_len
        if len(layer_attention.shape) != 4:
            raise ValueError("The attention tensor does not have the correct number of dimensions. Make sure you set "
                             "output_attentions=True when initializing your model.")
        layer_attention = layer_attention.squeeze(0)
        if heads:
            layer_attention = layer_attention[heads]
        squeezed.append(layer_attention)
    # num_layers x num_heads x seq_len x seq_len
    return torch.stack(squeezed)

# Instantiate Model and Components

In [6]:
tokenizer = AutoTokenizer.from_pretrained(config['model_id'], use_auth_token=hf_token)
model = AutoModelForCausalLM.from_pretrained(config['model_id'], output_attentions=True, use_auth_token=hf_token)



tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/690 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/6 [00:00<?, ?it/s]

model-00001-of-00006.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00006.safetensors:   0%|          | 0.00/4.90G [00:00<?, ?B/s]

model-00003-of-00006.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00006.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00005-of-00006.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00006-of-00006.safetensors:   0%|          | 0.00/4.25G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

# Forward Pass on the Text Sequence

In [7]:
text_sequence = config['text_sequence']
inputs = tokenizer.encode(text_sequence, return_tensors='pt')              # Converts the text_sequence into token IDs and returns them as a PyTorch tensor.
outputs = model(inputs)                                                    
attention_matrices = format_attention(outputs[-1])                         # Obtain the stacked attention matrices (one for each layer)
output_tokens = tokenizer.convert_ids_to_tokens(inputs[0])                 # Converts the token IDs back into their corresponding token strings. Output: ['<s>', '▁Question', ':', '▁What', '▁is', '▁the', '_time', '?']



In [8]:
print(f"Model Output Tokens: {output_tokens}\n")
print(f"Model Output Sequence Length: {len(output_tokens)}\n")
print(f"Model Attention Matrices: {type(attention_matrices)} of shape {attention_matrices.shape}\n")
print("Preview of attention scores in layer 0 head 0:\n")
print(attention_matrices[0][0])

Model Output Tokens: ['<s>', '▁Section', '▁', '2', '(', 's', ')', '▁of', '▁Industrial', '▁Dis', 'put', 'es', '▁Act', ':', "▁'", 'work', 'man', "'", '▁means', '▁any', '▁person', '▁(', 'including', '▁an', '▁app', 'rent', 'ice', ')', '▁employed', '▁in', '▁any', '▁industry', '▁to', '▁do', '▁any', '▁manual', ',', '▁un', 'sk', 'illed', ',', '▁skilled', ',', '▁technical', ',', '▁operational', ',', '▁cler', 'ical', '▁or', '▁super', 'vis', 'ory', '▁work', '▁for', '▁hire', '▁or', '▁reward']

Model Output Sequence Length: 58

Model Attention Matrices: <class 'torch.Tensor'> of shape torch.Size([32, 32, 58, 58])

Preview of attention scores in layer 0 head 0:

tensor([[1.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [9.9615e-01, 3.8518e-03, 0.0000e+00,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [9.4380e-01, 3.0975e-03, 5.3103e-02,  ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        ...,
        [7.6195e-01, 6.3354e-04, 4.3106

### Save raw attention matrices

In [9]:
np.save(f'{safe_sequence_id}_{safe_model_id}_attentions_raw.npy', attention_matrices.detach().numpy())

# Exclude attention of tokens of low semantic importance, such as punctuations and the start-of-sequence token

### Inspect Model Output Tokens

In [10]:
print(f"Model Output Tokens: {output_tokens}\n")

Model Output Tokens: ['<s>', '▁Section', '▁', '2', '(', 's', ')', '▁of', '▁Industrial', '▁Dis', 'put', 'es', '▁Act', ':', "▁'", 'work', 'man', "'", '▁means', '▁any', '▁person', '▁(', 'including', '▁an', '▁app', 'rent', 'ice', ')', '▁employed', '▁in', '▁any', '▁industry', '▁to', '▁do', '▁any', '▁manual', ',', '▁un', 'sk', 'illed', ',', '▁skilled', ',', '▁technical', ',', '▁operational', ',', '▁cler', 'ical', '▁or', '▁super', 'vis', 'ory', '▁work', '▁for', '▁hire', '▁or', '▁reward']



### Specify tokens to be excluded

In [11]:
tokens_to_exclude = ["'", '(', ')', ',', '-', '.', ':', '<s>', '▁', '▁(', '▁...', '▁[...]']

### Identify indices of excluded tokens in the token sequence

In [12]:
indices_of_tokens_to_exclude = [i for i, token in enumerate(output_tokens) if token in tokens_to_exclude]
number_of_tokens_to_exclude = sum(output_tokens.count(token) for token in tokens_to_exclude)
print(indices_of_tokens_to_exclude)
assert len(indices_of_tokens_to_exclude) == number_of_tokens_to_exclude, "Number of indices retrieved does not match number of tokens to exclude."

[0, 2, 4, 6, 13, 17, 21, 27, 36, 40, 42, 44, 46]


### Reset attention scores of excluded token indices to 0

In [14]:
attention_matrices_filtered = attention_matrices.clone()

for idx in indices_of_tokens_to_exclude:
    attention_matrices_filtered[:, :, :, idx] = 0  # Setting attention for excluded tokens across all layers and heads

print(f"Filtered attention matrix of layer 0, head 0:\n")
print(attention_matrices_filtered[0][0])

Filtered attention matrix of layer 0, head 0:

tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0039, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0031, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0006, 0.0000,  ..., 0.0069, 0.0000, 0.0000],
        [0.0000, 0.0005, 0.0000,  ..., 0.0022, 0.0130, 0.0000],
        [0.0000, 0.0012, 0.0000,  ..., 0.0048, 0.0203, 0.0083]],
       grad_fn=<SelectBackward0>)


### Save filtered attention matrices

In [15]:
np.save(f'{safe_sequence_id}_{safe_model_id}_attentions_filtered.npy', attention_matrices_filtered.detach().numpy())

# Compute proportion of filtered attention given to tokens representing legal facets

### Inspect Tokens

In [16]:
print(f"Model Output Tokens: {output_tokens}\n")

Model Output Tokens: ['<s>', '▁Section', '▁', '2', '(', 's', ')', '▁of', '▁Industrial', '▁Dis', 'put', 'es', '▁Act', ':', "▁'", 'work', 'man', "'", '▁means', '▁any', '▁person', '▁(', 'including', '▁an', '▁app', 'rent', 'ice', ')', '▁employed', '▁in', '▁any', '▁industry', '▁to', '▁do', '▁any', '▁manual', ',', '▁un', 'sk', 'illed', ',', '▁skilled', ',', '▁technical', ',', '▁operational', ',', '▁cler', 'ical', '▁or', '▁super', 'vis', 'ory', '▁work', '▁for', '▁hire', '▁or', '▁reward']



### Specify tokens of indices representing legal facets

In [17]:
tokens_to_compute = ['work', 'man']

In [18]:
indices_of_tokens_to_compute = [i for i, token in enumerate(output_tokens) if token in tokens_to_compute]
number_of_tokens_to_compute = sum(output_tokens.count(token) for token in tokens_to_compute)
print(indices_of_tokens_to_compute)
assert len(indices_of_tokens_to_compute) == number_of_tokens_to_compute, "Number of indices retrieved does not match number of tokens to exclude."

[15, 16]


### Compute proportion of attention given to specified token indices

In [19]:
num_layers, num_heads, seq_len, _ = attention_matrices_filtered.shape

# Step 1: Create an empty NumPy array to store the proportion for each layer and head
proportion_matrix = np.zeros((num_layers, num_heads))

# Iterate over each layer and each head
for layer in range(num_layers):
    for head in range(num_heads):
        # Step 2: Extract the attention matrix for the current layer and head
        current_attention = attention_matrices_filtered[layer][head]

        # Step 3: Sum the attention directed to the indices of interest
        attention_sum_indices = current_attention[:, indices_of_tokens_to_compute].sum().item()

        # Step 4: Sum all the attention values for the current head
        total_attention_sum = current_attention.sum().item()

        # Step 5: Compute the proportion of attention to the indices of interest
        proportion = attention_sum_indices / total_attention_sum

        # Step 6: Store the proportion in the NumPy proportion matrix
        proportion_matrix[layer, head] = proportion

### Save proportion matrix to local

In [20]:
np.save(f'{safe_sequence_id}_{safe_model_id}_proportions.npy', proportion_matrix)