### Test notebook for development

In [1]:
try:
    import os
    from google.colab import drive
    drive.mount('/content/drive')
    os.chdir('/content/drive/MyDrive/School/DS-GA 1011/capstone')
except:
    pass

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# ! pip install -r requirements.txt

In [3]:
import os
try:
  from google.colab import userdata
  hf_token = userdata.get_secret('HUGGING_FACE_TOKEN')
except:
  import os
  hf_token = os.getenv('HUGGING_FACE_TOKEN')
hf_login_command = f'huggingface-cli login --token {hf_token} --add-to-git-credential'
os.system(hf_login_command)

256

**Testing the model**

In [4]:
from src.model import AutoencoderConfig, SparseAutoencoder
from src.trainer import TrainingConfig, MonosemanticityTrainer
import torch
from unittest.mock import Mock
import torch.multiprocessing as mp
from src.evaluation import plot_training_metrics
from src.dataset import TextDataset, DataConfig, GPT2ActivationExtractor, GemmaActivationExtractor

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_config = AutoencoderConfig(
     input_dim=768,
    hidden_dim=1024,
)
trainer_config = TrainingConfig(
    batch_size=64,
    num_epochs= 10,
    mixed_precision= 'fp16',
    run_path='tests/',
    learning_rate=1e-4
)

In [6]:
# sample data
normal_dist = torch.distributions.Normal(loc=0, scale=1)
samples = normal_dist.sample(sample_shape=torch.Size([512,model_config.input_dim])).to(device)
dataset = torch.utils.data.TensorDataset(samples)
train_loader = torch.utils.data.DataLoader(dataset, batch_size = trainer_config.batch_size, shuffle=True)
# Mock extractor
extractor_mock = Mock(name='BaseActivationExtractorMock')
extractor_mock.extract_activations.return_value = {'activations': next(iter(train_loader))[0]}

In [7]:
# model and optimizer
model = SparseAutoencoder(model_config)
optimizer = torch.optim.Adam(model.parameters(), lr=trainer_config.learning_rate)
trainer = MonosemanticityTrainer(model, optimizer=optimizer, extractor=extractor_mock, train_config=trainer_config)

In [8]:
# training
trainer.train(train_loader)

INFO:src.trainer:Saving model to: /content/drive/MyDrive/School/DS-GA 1011/capstone/tests/model.pkl
  0%|          | 0/10 [00:00<?, ?it/s]INFO:src.trainer:
Epoch 1/10
DEBUG:src.trainer:Training epoch
 10%|█         | 1/10 [00:00<00:04,  1.91it/s]INFO:src.trainer:
Epoch 2/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 3/10
DEBUG:src.trainer:Training epoch
 30%|███       | 3/10 [00:00<00:01,  5.02it/s]INFO:src.trainer:
Epoch 4/10
DEBUG:src.trainer:Training epoch
 40%|████      | 4/10 [00:00<00:01,  5.96it/s]INFO:src.trainer:
Epoch 5/10
DEBUG:src.trainer:Training epoch
 50%|█████     | 5/10 [00:00<00:00,  6.75it/s]INFO:src.trainer:
Epoch 6/10
DEBUG:src.trainer:Training epoch
 60%|██████    | 6/10 [00:01<00:00,  7.44it/s]INFO:src.trainer:
Epoch 7/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 8/10
DEBUG:src.trainer:Training epoch
 80%|████████  | 8/10 [00:01<00:00,  8.57it/s]INFO:src.trainer:
Epoch 9/10
DEBUG:src.trainer:Training epoch
 90%|█████████ | 9/10 [00:01

{'train_loss': [1.0213679075241089,
  0.913556694984436,
  0.8227568864822388,
  0.7365338802337646,
  0.648813009262085,
  0.557964563369751,
  0.4657718241214752,
  0.37623968720436096,
  0.29410064220428467,
  0.2233334332704544],
 'train_mse_loss': [0.9982592463493347,
  0.8908758163452148,
  0.7997006177902222,
  0.7123934030532837,
  0.6229921579360962,
  0.5300136804580688,
  0.43540245294570923,
  0.3433254063129425,
  0.25866568088531494,
  0.18553417921066284],
 'train_l1_regularization': [0.023108599707484245,
  0.02268088236451149,
  0.02305634878575802,
  0.02414051443338394,
  0.025820838287472725,
  0.02795090340077877,
  0.030369393527507782,
  0.03291428089141846,
  0.03543492406606674,
  0.03779926523566246]}

**Testing data generation**

In [9]:

dt_config = DataConfig(
    model_name='gpt2',
    use_flash_attention=False
)
extractor = GPT2ActivationExtractor(dt_config)


INFO:src.dataset:Using gpt2 extractor on cuda


In [10]:
gpt2_extractor = GPT2ActivationExtractor(dt_config)


INFO:src.dataset:Using gpt2 extractor on cuda


In [11]:
res = gpt2_extractor.extract_activations(['I love you'], 5)
res['activations'], res['activations'].shape

(tensor([[[0.0000, 0.0000, 0.0600,  ..., 0.0000, 0.0294, 0.0407],
          [0.0222, 0.0000, 0.0000,  ..., 0.0000, 0.1515, 0.1571],
          [0.6869, 0.3262, 0.0000,  ..., 0.0000, 0.1399, 0.3196]]],
        device='cuda:0'),
 torch.Size([1, 3, 768]))

In [12]:

try:
    mp.set_start_method('spawn')
except RuntimeError:
    pass # method has already been set


dataset = TextDataset(
    dataset_name="RealTimeData/wikitext_latest",
    split="train",
    text_column="text"
)
dataloader = dataset.get_dataloader(batch_size=64)

In [13]:
batch = next(iter(dataloader))
batch

['The 2024–25 Notre Dame Fighting Irish women\'s basketball team will represent the University of Notre Dame during the 2024–25 NCAA Division I women\'s basketball season. The Fighting Irish will be led by fifth-year head coach Niele Ivey and will play their home games at Purcell Pavilion in Notre Dame, Indiana as members of the Atlantic Coast Conference.\n\nPrevious season\n\nThe Fighting Irish finished the season 28–7 overall and 13–5 in ACC play to finish in a three-way tie for second place. As the fourth seed in the ACC tournament, they earned a bye into the Quarterfinals where they defeated fifth seed Louisville. They defeated first seed Virginia Tech in the Semifinals and second seed NC State to win the title. It was their sixth overall title and first since 2019. They received the ACC\'s automatic bid to the NCAA Tournament, marking the third straight time the Fighting Irish qualified for the tournament. As the second seed in the Albany 1 region they defeated fifteenth seed Kent

In [14]:
trainer = MonosemanticityTrainer(model, optimizer=optimizer, extractor=extractor_mock, train_config=trainer_config)
metrics = trainer.train(dataloader)
metrics

INFO:src.trainer:Saving model to: /content/drive/MyDrive/School/DS-GA 1011/capstone/tests/model.pkl
  0%|          | 0/10 [00:00<?, ?it/s]INFO:src.trainer:
Epoch 1/10
DEBUG:src.trainer:Training epoch
 10%|█         | 1/10 [00:00<00:04,  2.19it/s]INFO:src.trainer:
Epoch 2/10
DEBUG:src.trainer:Training epoch
 20%|██        | 2/10 [00:00<00:03,  2.33it/s]INFO:src.trainer:
Epoch 3/10
DEBUG:src.trainer:Training epoch
 30%|███       | 3/10 [00:01<00:03,  2.29it/s]INFO:src.trainer:
Epoch 4/10
DEBUG:src.trainer:Training epoch
 40%|████      | 4/10 [00:01<00:02,  2.37it/s]INFO:src.trainer:
Epoch 5/10
DEBUG:src.trainer:Training epoch
 50%|█████     | 5/10 [00:02<00:02,  2.38it/s]INFO:src.trainer:
Epoch 6/10
DEBUG:src.trainer:Training epoch
 60%|██████    | 6/10 [00:02<00:01,  2.22it/s]INFO:src.trainer:
Epoch 7/10
DEBUG:src.trainer:Training epoch
 70%|███████   | 7/10 [00:03<00:01,  2.03it/s]INFO:src.trainer:
Epoch 8/10
DEBUG:src.trainer:Training epoch
 80%|████████  | 8/10 [00:03<00:00,  2.15it/

{'train_loss': [0.15219615399837494,
  0.0948820412158966,
  0.06523624807596207,
  0.05233236774802208,
  0.04733245447278023,
  0.045296359807252884,
  0.04418759047985077,
  0.043347276747226715,
  0.04258923605084419,
  0.04186037927865982],
 'train_mse_loss': [0.11173000186681747,
  0.05195005238056183,
  0.02085583470761776,
  0.007387909572571516,
  0.0024646653328090906,
  0.0008908059680834413,
  0.0004300286527723074,
  0.00030132546089589596,
  0.00026368448743596673,
  0.00024866434978321195],
 'train_l1_regularization': [0.04046614095568657,
  0.04293198883533478,
  0.044380418956279755,
  0.04494445398449898,
  0.04486778751015663,
  0.04440555348992348,
  0.04375755786895752,
  0.043045952916145325,
  0.04232555255293846,
  0.04161171242594719]}

In [15]:
plot_training_metrics(metrics, 'test.png')