# See a document through BERT's eyes

## Parameters

In [5]:
# Set parameters
dir_path = '../data/alice/sample3'
dimensionality_reduction = 'NMF'
dimension = 6 

In [6]:
# Imports
%load_ext autoreload
%autoreload 2
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pickle
import numpy as np
import os
from lucid.misc.io.showing import _display_html
from utils import acts_util, vis_util

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
# Checking parameters are valid
name = dir_path.split('/')[-1]
print(f'Directory: \'{name}\'')

tokens_path = os.path.join(dir_path, "tokens.pickle")
print(f'Path to tokens: \'{tokens_path}\'')
assert os.path.exists(tokens_path), f'File does not exist: {tokens_path}'

acts_path = os.path.join(dir_path, f"activations.npz")
print(f'Path to reduced activations: \'{acts_path}\'')
assert os.path.exists(acts_path), f'File does not exist: {acts_path}'

Directory: 'sample3'
Path to tokens: '../data/alice/sample3/tokens.pickle'
Path to reduced activations: '../data/alice/sample3/activations.npz'


## Load tokens and activations

In [9]:
with open(tokens_path, 'rb') as f:
    tokens = pickle.load(f)

print('Tokens:')
print(tokens)

doc = ' '.join(tokens)
print('\nDocument:')
print(doc)

layer_to_acts = np.load(acts_path)
layers = layer_to_acts.files
print(f'\nLayers: {", ".join(layers)}')

Tokens:
['[CLS]', 'alice', 'was', 'beginning', 'to', 'get', 'very', 'tired', 'of', 'sitting', 'by', 'her', 'sister', 'on', 'the', 'bank', ',', 'and', 'of', 'having', 'nothing', 'to', 'do', ':', 'once', 'or', 'twice', 'she', 'had', 'pee', '##ped', 'into', 'the', 'book', 'her', 'sister', 'was', 'reading', ',', 'but', 'it', 'had', 'no', 'pictures', 'or', 'conversations', 'in', 'it', ',', "'", 'and', 'what', 'is', 'the', 'use', 'of', 'a', 'book', ',', "'", 'thought', 'alice', "'", 'without', 'pictures', 'or', 'conversation', '?', "'", '[SEP]', 'so', 'she', 'was', 'considering', 'in', 'her', 'own', 'mind', '(', 'as', 'well', 'as', 'she', 'could', ',', 'for', 'the', 'hot', 'day', 'made', 'her', 'feel', 'very', 'sleepy', 'and', 'stupid', ')', ',', 'whether', 'the', 'pleasure', 'of', 'making', 'a', 'daisy', '-', 'chain', 'would', 'be', 'worth', 'the', 'trouble', 'of', 'getting', 'up', 'and', 'picking', 'the', 'dai', '##sies', ',', 'when', 'suddenly', 'a', 'white', 'rabbit', 'with', 'pink', 'ey

## Reduce activations

In [10]:
layer_to_reduced_acts = {layer:acts_util.reduce_activations(acts, dimensionality_reduction, dimension) for (layer,acts) in layer_to_acts.items()}


## Visualize activations

In [13]:
layer_to_rgbs = {layer:vis_util.channels_to_rgbs(reduced_acts) for (layer,reduced_acts) in layer_to_reduced_acts.items()}


In [14]:
layer_to_html = {}
for layer, rgbs in layer_to_rgbs.items():
    html = ''
    for tok, rgb in zip(tokens, rgbs):
        html += f"<span style='background-color: rgba({rgb[0]},{rgb[1]},{rgb[2]},1);'> {tok} </span>"
    layer_to_html[layer] = html

In [15]:
for layer in layers:
    print(layer)
    _display_html(layer_to_html[layer])

arr_0


arr_1


arr_2


arr_3


arr_4


arr_5


arr_6


arr_7


arr_8


arr_9


arr_10


arr_11


arr_12
