In [1]:
import mlflow
import torch
from gpt import GPTLanguageModel
from autoencoder import Autoencoder
import os
from gpt_params import tokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
transformer_run_id = "1631cdf63904427fb5833afa9372b625"
autoencoder_run_id = "59895464989d4ab488bf5d1cd9d77c9f"

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"
mlflow.set_tracking_uri(uri="http://34.176.189.11:5000")
device = (
    "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
)

gpt = GPTLanguageModel.load_from_mlflow("Transformer", transformer_run_id, device)
autoencoder = Autoencoder.load_from_mlflow("Autoencoder", autoencoder_run_id, device)


Downloading artifacts: 100%|██████████| 1/1 [00:06<00:00,  6.96s/it]
  model = torch.load(local_model_path, map_location=device)
Downloading artifacts: 100%|██████████| 1/1 [00:01<00:00,  1.04s/it]
  model = torch.load(local_model_path, map_location=device)


In [3]:
autoencoder.eval()
gpt.eval()

idx = (
    torch.tensor(tokenizer.encode("The quick brown fox jumps over the lazy dog"), dtype=torch.long)
    .unsqueeze(0)
    .to(device)
)
x_embed = gpt.embed(idx)
activations = autoencoder.encode(x_embed)
activations

tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.5510],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0781],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.5303]]],
       grad_fn=<ReluBackward0>)

In [5]:
(abs(x_embed) > 0.01).sum(dim=(0, 1))

tensor([7, 9, 7, 9, 6, 6, 7, 8, 8, 6, 8, 6, 6, 6, 9, 7, 8, 6, 6, 9, 6, 7, 7, 8,
        8, 7, 6, 8, 7, 8, 9, 6, 7, 6, 8, 9, 5, 6, 6, 8, 9, 5, 5, 6, 7, 7, 8, 5,
        5, 8, 6, 8, 7, 9, 8, 7, 9, 8, 6, 8, 4, 5, 6, 7, 5, 8, 8, 7, 9, 6, 7, 9,
        7, 6, 7, 7, 8, 4, 8, 6, 8, 9, 6, 7, 5, 7, 9, 5, 7, 7, 7, 6, 7, 6, 7, 5,
        9, 8, 7, 7, 3, 8, 9, 5, 6, 6, 6, 7, 4, 6, 8, 8, 6, 9, 5, 7, 9, 8, 8, 7,
        6, 7, 8, 8, 6, 9, 6, 4])

In [9]:
eps = 1e-5
activations_count = (activations > eps).sum(dim=(0, 1))
activations_count

tensor([6, 0, 0, 0, 0, 2, 1, 0, 9, 0, 0, 9, 0, 7, 0, 0, 9, 2, 0, 0, 9, 9, 9, 0,
        9, 2, 9, 0, 0, 9, 8, 0, 3, 0, 5, 9, 0, 7, 8, 0, 0, 9, 9, 5, 1, 0, 3, 0,
        0, 1, 0, 0, 9, 0, 4, 7, 0, 9, 0, 0, 0, 4, 0, 7, 9, 8, 3, 0, 7, 3, 3, 5,
        6, 9, 9, 4, 9, 0, 8, 0, 1, 9, 0, 7, 0, 2, 8, 0, 8, 1, 8, 0, 0, 0, 9, 9,
        0, 0, 0, 9, 1, 0, 6, 0, 0, 0, 0, 0, 0, 0, 9, 2, 9, 1, 0, 9, 0, 0, 4, 0,
        9, 0, 0, 0, 0, 6, 9, 8, 7, 9, 9, 0, 3, 2, 0, 0, 0, 1, 9, 0, 8, 6, 9, 4,
        0, 5, 0, 0, 0, 1, 0, 9, 7, 0, 9, 0, 0, 0, 0, 0, 9, 9, 0, 5, 9, 1, 0, 9,
        0, 0, 5, 1, 0, 8, 0, 9, 2, 0, 0, 9, 0, 0, 1, 9, 0, 0, 1, 1, 0, 1, 0, 0,
        3, 0, 8, 0, 6, 0, 9, 0, 9, 0, 9, 4, 1, 0, 0, 9, 4, 9, 0, 0, 0, 2, 5, 0,
        9, 8, 3, 0, 0, 7, 7, 0, 0, 9, 1, 0, 0, 8, 2, 2, 0, 7, 2, 5, 1, 9, 0, 0,
        0, 9, 6, 9, 0, 0, 0, 9, 0, 6, 0, 9, 0, 6, 3, 9, 9, 7, 0, 0, 9, 2, 0, 0,
        3, 0, 0, 0, 0, 0, 8, 0, 0, 1, 9, 8, 4, 0, 9, 0, 0, 1, 9, 1, 9, 0, 0, 0,
        0, 0, 0, 2, 0, 2, 0, 0, 1, 3, 0,