### Test notebook for development

In [1]:
try:
    import os
    from google.colab import drive
    drive.mount('/content/drive')
    os.chdir('/content/drive/MyDrive/School/DS-GA 1011/capstone')
except:
    pass

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# ! pip install -r requirements.txt

In [3]:
import os
try:
  from google.colab import userdata
  hf_token = userdata.get_secret('HUGGING_FACE_TOKEN')
except:
  import os
  hf_token = os.getenv('HUGGING_FACE_TOKEN')
hf_login_command = f'huggingface-cli login --token {hf_token} --add-to-git-credential'
os.system(hf_login_command)

256

**Testing the model**

In [4]:
from src.model import AutoencoderConfig, SparseAutoencoder
from src.trainer import TrainingConfig, MonosemanticityTrainer
import torch
from unittest.mock import Mock
import torch.multiprocessing as mp
from src.evaluation import plot_training_metrics
from src.dataset import TextDataset, DataConfig, GPT2ActivationExtractor, GemmaActivationExtractor

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_config = AutoencoderConfig(
     input_dim=768,
    hidden_dim=1024,
)
trainer_config = TrainingConfig(
    batch_size=64,
    num_epochs= 10,
    mixed_precision= 'fp16',
    run_path='tests/',
    learning_rate=1e-4
)

In [6]:
# sample data
normal_dist = torch.distributions.Normal(loc=0, scale=1)
samples = normal_dist.sample(sample_shape=torch.Size([512,model_config.input_dim])).to(device)
dataset = torch.utils.data.TensorDataset(samples)
train_loader = torch.utils.data.DataLoader(dataset, batch_size = trainer_config.batch_size, shuffle=True)
# Mock extractor
extractor_mock = Mock(name='BaseActivationExtractorMock')
extractor_mock.extract_activations.return_value = {'activations': next(iter(train_loader))[0]}

In [7]:
# model and optimizer
model = SparseAutoencoder(model_config)
optimizer = torch.optim.Adam(model.parameters(), lr=trainer_config.learning_rate)
trainer = MonosemanticityTrainer(model, optimizer=optimizer, extractor=extractor_mock, train_config=trainer_config)

In [8]:
# training
trainer.train(train_loader)

INFO:src.trainer:Saving model to: /content/drive/MyDrive/School/DS-GA 1011/capstone/tests/model.pkl
  0%|          | 0/10 [00:00<?, ?it/s]INFO:src.trainer:
Epoch 1/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 1/10Train - Loss: 2.3956, MSE: 1.0117, L1: 1.3839
 10%|█         | 1/10 [00:00<00:04,  2.01it/s]INFO:src.trainer:
Epoch 2/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 2/10Train - Loss: 2.0677, MSE: 0.9467, L1: 1.1210
INFO:src.trainer:
Epoch 3/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 3/10Train - Loss: 1.7944, MSE: 0.9058, L1: 0.8887
 30%|███       | 3/10 [00:00<00:01,  5.31it/s]INFO:src.trainer:
Epoch 4/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 4/10Train - Loss: 1.5685, MSE: 0.8812, L1: 0.6873
INFO:src.trainer:
Epoch 5/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 5/10Train - Loss: 1.3883, MSE: 0.8700, L1: 0.5183
 50%|█████     | 5/10 [00:00<00:00,  7.56it/s]INFO:src.trainer:
Epoch 6/10
DEBUG:src.train

{'train_loss': [2.3955655097961426,
  2.067685604095459,
  1.794413447380066,
  1.5684659481048584,
  1.3883330821990967,
  1.2520873546600342,
  1.1536712646484375,
  1.0861930847167969,
  1.042426586151123,
  1.0152056217193604],
 'train_mse_loss': [1.0116697549819946,
  0.9467068910598755,
  0.9057527184486389,
  0.8811991810798645,
  0.8700121641159058,
  0.8693130612373352,
  0.8756059408187866,
  0.8853538632392883,
  0.8952502012252808,
  0.9027339220046997],
 'train_l1_regularization': [1.3838958740234375,
  1.120978832244873,
  0.888660728931427,
  0.6872668266296387,
  0.5183208584785461,
  0.382774293422699,
  0.2780653238296509,
  0.2008393257856369,
  0.14717644453048706,
  0.11247175186872482]}

**Testing data generation**

In [9]:

dt_config = DataConfig(
    dataset_name="RealTimeData/wikitext_latest",
    split="train",
    text_column="text",
    model_name='gpt2',
    use_flash_attention=False
)
extractor = GPT2ActivationExtractor(dt_config)


INFO:src.dataset:Using gpt2 extractor on cuda


In [10]:
gpt2_extractor = GPT2ActivationExtractor(dt_config)


INFO:src.dataset:Using gpt2 extractor on cuda


In [11]:

try:
    mp.set_start_method('spawn')
except RuntimeError:
    pass # method has already been set


dataset = TextDataset(
    tokenizer=gpt2_extractor.tokenizer,
    config=dt_config,
)
dataloader = dataset.get_dataloader(batch_size=64)

In [12]:
batch = next(iter(dataloader))
batch['input_ids'].shape

torch.Size([64, 1, 1, 1024])

In [13]:
trainer = MonosemanticityTrainer(model, optimizer=optimizer, extractor=extractor_mock, train_config=trainer_config)
metrics = trainer.train(dataloader)
metrics

INFO:src.trainer:Saving model to: /content/drive/MyDrive/School/DS-GA 1011/capstone/tests/model.pkl
  0%|          | 0/10 [00:00<?, ?it/s]INFO:src.trainer:
Epoch 1/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 1/10Train - Loss: 0.9937, MSE: 0.9062, L1: 0.0876
 10%|█         | 1/10 [00:09<01:28,  9.84s/it]INFO:src.trainer:
Epoch 2/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 2/10Train - Loss: 0.9750, MSE: 0.8993, L1: 0.0757
 20%|██        | 2/10 [00:15<00:59,  7.44s/it]INFO:src.trainer:
Epoch 3/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 3/10Train - Loss: 0.9558, MSE: 0.8776, L1: 0.0782
 30%|███       | 3/10 [00:22<00:50,  7.20s/it]INFO:src.trainer:
Epoch 4/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 4/10Train - Loss: 0.9294, MSE: 0.8391, L1: 0.0903
 40%|████      | 4/10 [00:31<00:46,  7.71s/it]INFO:src.trainer:
Epoch 5/10
DEBUG:src.trainer:Training epoch
INFO:src.trainer:
Epoch 5/10Train - Loss: 0.8916, MSE: 0.7820, L1: 0.109

{'train_loss': [0.9937474727630615,
  0.9750198125839233,
  0.9557759761810303,
  0.9294005632400513,
  0.8915999531745911,
  0.8401167988777161,
  0.77577143907547,
  0.7030993700027466,
  0.6293515563011169,
  0.5617977380752563],
 'train_mse_loss': [0.9061938524246216,
  0.8992867469787598,
  0.877566397190094,
  0.8391162157058716,
  0.7820315361022949,
  0.7057018280029297,
  0.6126011610031128,
  0.5095096826553345,
  0.4063563048839569,
  0.31319576501846313],
 'train_l1_regularization': [0.08755367249250412,
  0.07573307305574417,
  0.07820960134267807,
  0.09028429538011551,
  0.10956841707229614,
  0.13441495597362518,
  0.16317030787467957,
  0.19358965754508972,
  0.22299526631832123,
  0.24860192835330963]}

In [14]:
plot_training_metrics(metrics, 'test.png')

In [15]:
extractor._get_final_layer().normalized_shape

(768,)

In [16]:
print(extractor.model)

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2SdpaAttention(
          (c_attn): Linear4bit(in_features=768, out_features=2304, bias=True)
          (c_proj): Linear4bit(in_features=768, out_features=768, bias=True)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Linear4bit(in_features=768, out_features=3072, bias=True)
          (c_proj): Linear4bit(in_features=3072, out_features=768, bias=True)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_a