# See a document through BERT's eyes

## Parameters

In [None]:
# Set parameters
dir_path = '../../data/alice/sample3'
dimensionality_reduction = 'NMF'
dimension = 6 

In [None]:
# Imports
%load_ext autoreload
%autoreload 2
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pickle
import numpy as np
import os
from lucid.misc.io.showing import _display_html
import sys
project_path = os.path.abspath('../..')
sys.path.insert(0, project_path)
from src.utils import acts_util, vis_util

In [None]:
# Checking parameters are valid
name = dir_path.split('/')[-1]
print(f'Directory: \'{name}\'')

tokens_path = os.path.join(dir_path, "tokens.pickle")
print(f'Path to tokens: \'{tokens_path}\'')
assert os.path.exists(tokens_path), f'File does not exist: {tokens_path}'

acts_path = os.path.join(dir_path, f"activations.npz")
print(f'Path to reduced activations: \'{acts_path}\'')
assert os.path.exists(acts_path), f'File does not exist: {acts_path}'

## Load tokens and activations

In [None]:
with open(tokens_path, 'rb') as f:
    tokens = pickle.load(f)

print('Tokens:')
print(tokens)

doc = ' '.join(tokens)
print('\nDocument:')
print(doc)

layer_to_acts = np.load(acts_path)
layers = layer_to_acts.files
print(f'\nLayers: {", ".join(layers)}')

## Reduce activations

In [None]:
layer_to_reduced_acts = {layer:acts_util.reduce_activations(acts, dimensionality_reduction, dimension) for (layer,acts) in layer_to_acts.items()}


## Visualize activations

In [None]:
layer_to_rgbs = {layer:vis_util.channels_to_rgbs(reduced_acts) for (layer,reduced_acts) in layer_to_reduced_acts.items()}


In [None]:
layer_to_html = {}
for layer, rgbs in layer_to_rgbs.items():
    html = ''
    for tok, rgb in zip(tokens, rgbs):
        html += f"<span style='background-color: rgba({rgb[0]},{rgb[1]},{rgb[2]},1);'> {tok} </span>"
    layer_to_html[layer] = html

In [None]:
for layer in layers:
    print(layer)
    _display_html(layer_to_html[layer])