In [1]:
import numpy as np
import torch
from transformer_lens import HookedTransformer
from sae_lens import SAE
from src.agents.SAEAgent import sae_encode_text, load_or_compute_sae_embeddings, sae_retrieve
from src.agents.RAGAgent import embed_text


  from .autonotebook import tqdm as notebook_tqdm


# RAG

In [2]:
from dotenv import load_dotenv

load_dotenv()

False

In [15]:
def rag_similarity(text_1, text_2):
    model = 'ollama/nomic-embed-text'
    z1 = embed_text(text_1, model)
    z2 = embed_text(text_2, model)

    z1_norm = z1 / np.linalg.norm(z1)
    z2_norm = z2 / np.linalg.norm(z2)
    print(f'RAG similarity: {np.dot(z1_norm, z2_norm)}')

# SAE

In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [9]:
sae_base_model_name = "EleutherAI/pythia-70m-deduped"
sae_release = "pythia-70m-deduped-res-sm"
sae_id = "blocks.5.hook_resid_post"
hook_name = "blocks.5.hook_resid_post" 

In [10]:
sae_base_model = HookedTransformer.from_pretrained_no_processing(
    sae_base_model_name,
    device=device,
    dtype=torch.bfloat16,
)

`torch_dtype` is deprecated! Use `dtype` instead!


Loaded pretrained model EleutherAI/pythia-70m-deduped into HookedTransformer


In [11]:
sae, sae_cfg, sparsity = SAE.from_pretrained(
    release=sae_release,
    sae_id=sae_id,
    device=device,
)
sae.eval()

  sae, sae_cfg, sparsity = SAE.from_pretrained(


StandardSAE(
  (activation_fn): ReLU()
  (hook_sae_input): HookPoint()
  (hook_sae_acts_pre): HookPoint()
  (hook_sae_acts_post): HookPoint()
  (hook_sae_output): HookPoint()
  (hook_sae_recons): HookPoint()
  (hook_sae_error): HookPoint()
)

In [18]:
def sae_similarity(text_1, text_2):
    z1 = sae_encode_text(sae_base_model, sae, text_1, hook_name)
    z2 = sae_encode_text(sae_base_model, sae, text_2, hook_name)

    z1_norm = z1 / np.linalg.norm(z1)
    z2_norm = z2 / np.linalg.norm(z2)
    print(f'SAE similarity: {np.dot(z1_norm, z2_norm)}')

# SAE vs RAG

In [20]:
def compare(text_1, text_2):
    rag_similarity(text_1, text_2)
    sae_similarity(text_1, text_2)

In [21]:
compare('She closed the window because the cold wind was coming in.',
        'She shut the window to keep the chilly breeze out.')

RAG similarity: 0.9285133402558516
SAE similarity: 0.5873537063598633


In [22]:
compare('He hurried to catch the last train before midnight.',
        'He rushed to board the final train that left just before twelve.')

RAG similarity: 0.9019049688465273
SAE similarity: 0.6499947905540466


In [23]:
compare('The book was so captivating that she finished it in one night.',
        'She read the whole novel in a single evening because it was irresistible.')

RAG similarity: 0.8380246617842546
SAE similarity: 0.6573800444602966


In [24]:
compare('The dog barked loudly at the mail carrier.',
        'The dog made a racket when the postal worker approached.')

RAG similarity: 0.8239608511256304
SAE similarity: 0.589231014251709


In [25]:
compare('The cat slept on the warm radiator.',
        'Quantum computers require extremely low temperatures to operate.')

RAG similarity: 0.5298888996725823
SAE similarity: 0.5248370170593262


In [26]:
compare('The nebula glowed faintly as the telescope adjusted its focus.',
        'I forgot to buy onions for the pasta sauce.')

RAG similarity: 0.34518207932378536
SAE similarity: 0.383361279964447


In [27]:
compare('The café down the street started selling homemade lemonade.',
        'Saturn’s rings are mostly composed of ice particles.')

RAG similarity: 0.31755464184461646
SAE similarity: 0.31813913583755493


In [28]:
compare('My shoes got soaked when it started raining unexpectedly.',
        'The server must be restarted after applying the new kernel patch.')

RAG similarity: 0.3717168859381351
SAE similarity: 0.43883946537971497


In [29]:
compare('What if my child just refuses to take responsibility for their actions? How do I get them to understand the importance of being accountable?',
        'How many days did I spend on camping trips in the United States this year?')

RAG similarity: 0.3164769270993337
SAE similarity: 0.33727002143859863


Aún cuando están relacionados conceptualmente y no semánticamente, RAG es mejor que SAE

In [30]:
compare('If you pack too many tools into a small backpack, something essential will always get left behind.',
        'A neural network with fixed width can’t memorize new patterns without forgetting some of the old ones.')

RAG similarity: 0.4902988564590743
SAE similarity: 0.3685685396194458


In [31]:
compare('A single spark landing in dry grass can turn into a wildfire by nightfall.',
        'One unstable nucleotide in a cell can initiate a cascade of replication errors.')

RAG similarity: 0.540741589430252
SAE similarity: 0.47643065452575684


In [32]:
compare('Watering a plant twice doesn’t make it grow twice as fast.',
        'Doubling your training epochs rarely doubles your model’s accuracy.')

RAG similarity: 0.5917122431442312
SAE similarity: 0.3543633222579956
