In [3]:
import torch
import torch.nn as nn
from model_mq import Transformer
from config import get_config, get_weights_file_path
from train import get_model, get_ds, greedy_decode
# import altair as alt
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

AttributeError: module 'pyarrow.lib' has no attribute 'Bool8Type'

In [39]:
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [None]:
config = get_config()
train_dataloader, val_dataloader, vocab_src, vocab_tgt = get_ds(config)
model = get_model(config, vocab_src.get_vocab_size(), vocab_tgt.get_vocab_size()).to(device)

# Load the pretrained weights
model_filename = get_weights_file_path(config, f"03")
state = torch.load(model_filename)
model.load_state_dict(state['model_state_dict'])

Max length of source sentence: 36
Max length of target sentence: 40


<All keys matched successfully>

In [41]:
def load_next_batch():
    # Load a sample batch from the validation set
    batch = next(iter(val_dataloader))
    encoder_input = batch["encoder_input"].to(device)
    encoder_mask = batch["encoder_mask"].to(device)
    decoder_input = batch["decoder_input"].to(device)
    decoder_mask = batch["decoder_mask"].to(device)

    encoder_input_tokens = [vocab_src.id_to_token(idx) for idx in encoder_input[0].cpu().numpy()]
    decoder_input_tokens = [vocab_tgt.id_to_token(idx) for idx in decoder_input[0].cpu().numpy()]

    # check that the batch size is 1
    assert encoder_input.size(
        0) == 1, "Batch size must be 1 for validation"

    model_out = greedy_decode(
        model, encoder_input, encoder_mask, vocab_src, vocab_tgt, config['seq_len'], device)
    
    return batch, encoder_input_tokens, decoder_input_tokens

In [42]:
def get_attn_map(attn_type: str, layer: int, head: int):
    if attn_type == "encoder":
        attn = model.encoder.layers[layer].self_attention_block.attention_scores
    elif attn_type == "decoder":
        attn = model.decoder.layers[layer].self_attention_block.attention_scores
    elif attn_type == "encoder-decoder":
        attn = model.decoder.layers[layer].cross_attention_block.attention_scores
    return attn[0, head].data

def attn_map_matplotlib(attn_type, layer, head, row_tokens, col_tokens, max_sentence_len):
    # Extract the attention matrix
    attn_matrix = get_attn_map(attn_type, layer, head)
    attn_matrix = attn_matrix[:max_sentence_len, :max_sentence_len].cpu().numpy()

    # Plot heatmap
    plt.figure(figsize=(10, 8))
    sns.heatmap(
        attn_matrix,
        xticklabels=col_tokens[:max_sentence_len],
        yticklabels=row_tokens[:max_sentence_len],
        cmap="viridis",
        cbar=True,
        annot=False,
    )
    plt.title(f"Layer {layer} Head {head}", fontsize=14)
    plt.xlabel("Target Tokens")
    plt.ylabel("Source Tokens")
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    plt.show()

# Generate all attention maps
def get_all_attention_maps_matplotlib(attn_type, layers, heads, row_tokens, col_tokens, max_sentence_len):
    for layer in layers:
        for head in heads:
            attn_map_matplotlib(attn_type, layer, head, row_tokens, col_tokens, max_sentence_len)

In [43]:
batch, encoder_input_tokens, decoder_input_tokens = load_next_batch()
print(f'Source: {batch["src_text"][0]}')
print(f'Target: {batch["tgt_text"][0]}')
sentence_len = encoder_input_tokens.index("[PAD]")

Source: এক লোক রাস্তা পারাপারের জন্য ট্রাফিকের  অপেক্ষা করছে
Target: a man waiting for traffic to stop at road crossing


In [44]:
layers = [0, 1, 2]
heads = [0, 1, 2, 3, 4, 5, 6, 7]


In [1]:
# Encoder Self-Attention
print("Generating Encoder Self-Attention Maps...")
encoder_figures = get_all_attention_maps_matplotlib(
    "encoder", layers, heads, encoder_input_tokens, encoder_input_tokens, min(20, sentence_len)
)
for fig in encoder_figures:
    fig.show()

Generating Encoder Self-Attention Maps...


NameError: name 'get_all_attention_maps_matplotlib' is not defined

In [None]:
# Decoder Self-Attention
print("Generating Decoder Self-Attention Maps...")
decoder_figures = get_all_attention_maps_plotly(
    "decoder", layers, heads, decoder_input_tokens, decoder_input_tokens, min(20, sentence_len)
)
for fig in decoder_figures:
    fig.show()

In [None]:
# Encoder-Decoder Attention
print("Generating Encoder-Decoder Attention Maps...")
encoder_decoder_figures = get_all_attention_maps_plotly(
    "encoder-decoder", layers, heads, encoder_input_tokens, decoder_input_tokens, min(20, sentence_len)
)
for fig in encoder_decoder_figures:
    fig.show()

In [None]:
# Encoder Self-Attention
get_all_attention_maps("decoder", layers, heads, decoder_input_tokens, decoder_input_tokens, min(20, sentence_len))

In [None]:
# Encoder Self-Attention
get_all_attention_maps("encoder-decoder", layers, heads, encoder_input_tokens, decoder_input_tokens, min(20, sentence_len))