# Setup

In [None]:
%pip install -r requirements.txt

In [31]:
import torch 
import argparse
from utils import dotdict
from activation_dataset import setup_token_data
import wandb
import json
from datetime import datetime
from tqdm import tqdm
from einops import rearrange
import matplotlib.pyplot as plt
import os
from baukit import Trace, TraceDict
import torch.nn as nn
from autoencoders.learned_dict import TiedSAE, UntiedSAE, AnthropicSAE

def init_cfg():
    cfg = dotdict()
    # models: "EleutherAI/pythia-6.9b", "lomahony/eleuther-pythia6.9b-hh-sft", "usvsnsp/pythia-6.9b-ppo", "Dahoas/gptj-rm-static", "reciprocate/dahoas-gptj-rm-static"
    # cfg.model_name="lomahony/eleuther-pythia6.9b-hh-sft"
    # "EleutherAI/pythia-70m", "lomahony/pythia-70m-helpful-sft", "lomahony/eleuther-pythia70m-hh-sft"
    cfg.model_name="EleutherAI/pythia-70m-deduped"
    cfg.layers=[0, 1] # Change this to run multiple layers
    cfg.setting="residual"
    # cfg.tensor_name="gpt_neox.layers.{layer}" or "transformer.h.{layer}"
    cfg.tensor_name="gpt_neox.layers.{layer}.mlp"
    original_l1_alpha = 8e-4
    cfg.l1_alpha=original_l1_alpha
    cfg.l1_alphas=[8e-5, 1e-4, 2e-4, 4e-4, 8e-4, 1e-3, 2e-3, 4e-3, 8e-3]
    cfg.sparsity=None
    cfg.num_epochs=2
    cfg.model_batch_size=8
    cfg.lr=1e-3 # ORIGINAL: 1e-3
    cfg.kl=False
    cfg.reconstruction=False
    #cfg.dataset_name="NeelNanda/pile-10k"
    cfg.dataset_name="Elriggs/openwebtext-100k"
    cfg.device="cuda:0"
    cfg.ratio = 8
    cfg.seed = 0
    # cfg.device="cpu"

    return cfg

# Main Code

In [35]:
def setup_execute_training(model_name,
                          dataset_name,
                          ratio,
                          layers,
                          seed,
                          wandb_log,
                          split,
                          epoches):
    cfg = init_cfg()
    cfg.num_epochs = epoches
    cfg.model_name = model_name
    cfg.dataset_name = dataset_name
    cfg.ratio = ratio
    cfg.layers = layers
    cfg.seed = seed
    cfg.wandb_log = wandb_log

    model, tokenizer = load_model(cfg)
    get_activation_size(cfg, model, tokenizer)

    # naming
    start_time = datetime.now().strftime("%Y%m%d-%H%M%S")
    wandb_run_name = f"{cfg.model_name}_{cfg.dataset_name}_s{cfg.seed}_dim{cfg.ratio*cfg.activation_size}_{start_time[4:]}"
    model_name_path = cfg.model_name.replace("/", "_")
    dataset_name_path = cfg.dataset_name.split("/")[-1]
    storage_path = f"{model_name_path}/{dataset_name_path}_s{cfg.seed}"
    filename = f"{cfg.ratio*cfg.activation_size}_{start_time[4:]}"
    token_loader = init_dataloader(cfg, model, tokenizer, split)
    autoencoders, optimizers = init_autoencoder(cfg)
    
    if wandb_log:
        setup_wandb(cfg, wandb_run_name)
    
    training_run(cfg, model, optimizers, autoencoders, token_loader)

    for layer in range(len(cfg.layers)):
        model_save(cfg, autoencoders[layer], storage_path, filename, cfg.layers[layer])

In [36]:
# Code that actually starts a full training run!

model_name = "EleutherAI/pythia-160m"
dataset_name = "Elriggs/openwebtext-100k" # "Elriggs/openwebtext-100k"
ratio = 32
layers = [0, 1, 2, 3, 4, 5]
wandb_log = False
seed = 0
split = "train"
epoches = 1

setup_execute_training(model_name,
                       dataset_name,
                       ratio,
                       layers,
                       seed,
                       wandb_log=wandb_log,
                       split=split,
                      epoches=epoches)

KeyboardInterrupt: 

In [39]:
# Code that actually starts a full training run!

model_name = "EleutherAI/pythia-160m"
dataset_name = "Elriggs/openwebtext-100k" # "Elriggs/openwebtext-100k"
ratio = 16
layers = [0, 1, 2, 3, 4, 5]
wandb_log = False
seed = 0
split = "train"
epoches = 1

setup_execute_training(model_name,
                       dataset_name,
                       ratio,
                       layers,
                       seed,
                       wandb_log=wandb_log,
                       split=split,
                      epoches=epoches)

Activation size: 512


Found cached dataset parquet (/root/.cache/huggingface/datasets/NeelNanda___parquet/NeelNanda--pile-10k-72f566e9f7c464ab/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Loading cached processed dataset at /root/.cache/huggingface/datasets/NeelNanda___parquet/NeelNanda--pile-10k-72f566e9f7c464ab/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-c8791bdc71185487_*_of_00008.arrow


Number of tokens: 2441216


  0%|          | 3/1192 [00:00<00:47, 24.81it/s]

Sparsity: 4090.7 | Dead Features: 8192 | Total Loss: 0.3287 | Reconstruction Loss: 0.1124 | L1 Loss: 0.2163 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 1.0000
Sparsity: 4085.1 | Dead Features: 8192 | Total Loss: 0.2414 | Reconstruction Loss: 0.0772 | L1 Loss: 0.1643 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0003
Sparsity: 4072.4 | Dead Features: 8192 | Total Loss: 0.8384 | Reconstruction Loss: 0.6003 | L1 Loss: 0.2382 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0009
Sparsity: 4102.5 | Dead Features: 8192 | Total Loss: 0.6509 | Reconstruction Loss: 0.2983 | L1 Loss: 0.3525 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0004
Sparsity: 4076.5 | Dead Features: 8192 | Total Loss: 1.1337 | Reconstruction Loss: 0.6610 | L1 Loss: 0.4727 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0015
Sparsity: 4070.8 | Dead Features: 8192 | Total Loss: 9.9328 | Reconstruction Loss: 8.1406 | L1 Loss: 1.7922 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Sim

  9%|▉         | 105/1192 [00:04<00:44, 24.66it/s]

Sparsity: 80.4 | Dead Features: 0 | Total Loss: 0.0277 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0011
Sparsity: 32.8 | Dead Features: 0 | Total Loss: 0.0272 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0032 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0188
Sparsity: 40.5 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0302 | L1 Loss: 0.0098 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0165
Sparsity: 110.1 | Dead Features: 0 | Total Loss: 0.0693 | Reconstruction Loss: 0.0488 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0235
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.1034 | Reconstruction Loss: 0.0784 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0046
Sparsity: 206.8 | Dead Features: 0 | Total Loss: 0.3136 | Reconstruction Loss: 0.2266 | L1 Loss: 0.0869 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self

 17%|█▋        | 204/1192 [00:08<00:40, 24.59it/s]

Sparsity: 71.5 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0140 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0015
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0234 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0186
Sparsity: 43.5 | Dead Features: 0 | Total Loss: 0.0352 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0093 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0195
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0619 | Reconstruction Loss: 0.0389 | L1 Loss: 0.0229 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0241
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0920 | Reconstruction Loss: 0.0655 | L1 Loss: 0.0265 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0048
Sparsity: 136.4 | Dead Features: 0 | Total Loss: 0.2380 | Reconstruction Loss: 0.1695 | L1 Loss: 0.0685 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self

 25%|██▌       | 303/1192 [00:12<00:36, 24.51it/s]

Sparsity: 71.1 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0015
Sparsity: 57.2 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0184
Sparsity: 50.4 | Dead Features: 0 | Total Loss: 0.0305 | Reconstruction Loss: 0.0220 | L1 Loss: 0.0084 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0218
Sparsity: 126.2 | Dead Features: 0 | Total Loss: 0.0573 | Reconstruction Loss: 0.0338 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0242
Sparsity: 126.5 | Dead Features: 0 | Total Loss: 0.0796 | Reconstruction Loss: 0.0522 | L1 Loss: 0.0274 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0048
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.2107 | Reconstruction Loss: 0.1467 | L1 Loss: 0.0640 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self

 34%|███▍      | 405/1192 [00:16<00:31, 25.02it/s]

Sparsity: 58.9 | Dead Features: 0 | Total Loss: 0.0164 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0016
Sparsity: 61.0 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0140 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0180
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.0289 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0238
Sparsity: 128.2 | Dead Features: 0 | Total Loss: 0.0557 | Reconstruction Loss: 0.0319 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0245
Sparsity: 135.3 | Dead Features: 0 | Total Loss: 0.0770 | Reconstruction Loss: 0.0479 | L1 Loss: 0.0291 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0049
Sparsity: 124.2 | Dead Features: 0 | Total Loss: 0.2255 | Reconstruction Loss: 0.1610 | L1 Loss: 0.0645 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self

 42%|████▏     | 504/1192 [00:20<00:27, 24.93it/s]

Sparsity: 55.8 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0016
Sparsity: 60.4 | Dead Features: 0 | Total Loss: 0.0186 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0178
Sparsity: 52.3 | Dead Features: 0 | Total Loss: 0.0260 | Reconstruction Loss: 0.0177 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0258
Sparsity: 126.5 | Dead Features: 0 | Total Loss: 0.0513 | Reconstruction Loss: 0.0280 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0249
Sparsity: 136.0 | Dead Features: 0 | Total Loss: 0.0710 | Reconstruction Loss: 0.0423 | L1 Loss: 0.0287 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0050
Sparsity: 112.8 | Dead Features: 0 | Total Loss: 0.2005 | Reconstruction Loss: 0.1452 | L1 Loss: 0.0553 | l1_alpha: 8.0000e-04 | Tokens: 1024000 

 51%|█████     | 603/1192 [00:23<00:23, 24.93it/s]

Sparsity: 51.9 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0019
Sparsity: 61.2 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0175
Sparsity: 59.2 | Dead Features: 0 | Total Loss: 0.0271 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0088 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0281
Sparsity: 131.3 | Dead Features: 0 | Total Loss: 0.0535 | Reconstruction Loss: 0.0292 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0250
Sparsity: 145.5 | Dead Features: 0 | Total Loss: 0.0740 | Reconstruction Loss: 0.0432 | L1 Loss: 0.0309 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0051
Sparsity: 108.8 | Dead Features: 0 | Total Loss: 0.2079 | Reconstruction Loss: 0.1491 | L1 Loss: 0.0588 | l1_alpha: 8.0000e-04 | Tokens: 1228800 

 59%|█████▉    | 705/1192 [00:27<00:19, 25.06it/s]

Sparsity: 45.9 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0056 | L1 Loss: 0.0072 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0020
Sparsity: 60.0 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0110 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0168
Sparsity: 58.8 | Dead Features: 0 | Total Loss: 0.0253 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0302
Sparsity: 130.1 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0268 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0251
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0665 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0298 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0050
Sparsity: 90.2 | Dead Features: 0 | Total Loss: 0.1869 | Reconstruction Loss: 0.1271 | L1 Loss: 0.0598 | l1_alpha: 8.0000e-04 | Tokens: 1433600 |

 67%|██████▋   | 804/1192 [00:31<00:15, 24.90it/s]

Sparsity: 60.7 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0021
Sparsity: 65.8 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0163
Sparsity: 62.9 | Dead Features: 0 | Total Loss: 0.0251 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0089 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0322
Sparsity: 133.3 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0250
Sparsity: 149.8 | Dead Features: 0 | Total Loss: 0.0662 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0296 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0051
Sparsity: 100.2 | Dead Features: 0 | Total Loss: 0.1768 | Reconstruction Loss: 0.1227 | L1 Loss: 0.0541 | l1_alpha: 8.0000e-04 | Tokens: 1638400 

 76%|███████▌  | 903/1192 [00:35<00:11, 24.98it/s]

Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0024
Sparsity: 63.0 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0107 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0158
Sparsity: 60.9 | Dead Features: 0 | Total Loss: 0.0243 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0087 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0343
Sparsity: 134.0 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0252
Sparsity: 150.1 | Dead Features: 0 | Total Loss: 0.0654 | Reconstruction Loss: 0.0352 | L1 Loss: 0.0301 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0050
Sparsity: 93.1 | Dead Features: 0 | Total Loss: 0.1800 | Reconstruction Loss: 0.1240 | L1 Loss: 0.0560 | l1_alpha: 8.0000e-04 | Tokens: 1843200 |

 84%|████████▍ | 1005/1192 [00:39<00:07, 24.95it/s]

Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0025
Sparsity: 63.5 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0153
Sparsity: 64.0 | Dead Features: 0 | Total Loss: 0.0244 | Reconstruction Loss: 0.0155 | L1 Loss: 0.0089 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0368
Sparsity: 133.1 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0229 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0252
Sparsity: 151.1 | Dead Features: 0 | Total Loss: 0.0629 | Reconstruction Loss: 0.0338 | L1 Loss: 0.0291 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0050
Sparsity: 79.7 | Dead Features: 0 | Total Loss: 0.1665 | Reconstruction Loss: 0.1125 | L1 Loss: 0.0539 | l1_alpha: 8.0000e-04 | Tokens: 2048000 |

 93%|█████████▎| 1104/1192 [00:43<00:03, 24.92it/s]

Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0057 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0026
Sparsity: 66.0 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0150
Sparsity: 67.0 | Dead Features: 0 | Total Loss: 0.0242 | Reconstruction Loss: 0.0150 | L1 Loss: 0.0092 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0394
Sparsity: 137.5 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0250
Sparsity: 158.8 | Dead Features: 0 | Total Loss: 0.0657 | Reconstruction Loss: 0.0347 | L1 Loss: 0.0310 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0050
Sparsity: 90.7 | Dead Features: 0 | Total Loss: 0.1870 | Reconstruction Loss: 0.1304 | L1 Loss: 0.0566 | l1_alpha: 8.0000e-04 | Tokens: 2252800 |

100%|██████████| 1192/1192 [00:47<00:00, 25.17it/s]
  0%|          | 3/1192 [00:00<00:48, 24.68it/s]

Sparsity: 61.3 | Dead Features: 8192 | Total Loss: 0.0155 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0026
Sparsity: 72.9 | Dead Features: 8192 | Total Loss: 0.0180 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0148
Sparsity: 75.1 | Dead Features: 8192 | Total Loss: 0.0258 | Reconstruction Loss: 0.0165 | L1 Loss: 0.0093 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0416
Sparsity: 144.7 | Dead Features: 8192 | Total Loss: 0.0524 | Reconstruction Loss: 0.0266 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0251
Sparsity: 164.0 | Dead Features: 8192 | Total Loss: 0.0683 | Reconstruction Loss: 0.0370 | L1 Loss: 0.0313 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0051
Sparsity: 134.3 | Dead Features: 8192 | Total Loss: 0.2029 | Reconstruction Loss: 0.1267 | L1 Loss: 0.0762 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity:

  9%|▉         | 105/1192 [00:04<00:43, 25.08it/s]

Sparsity: 68.6 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0084 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0027
Sparsity: 67.0 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0145
Sparsity: 69.0 | Dead Features: 0 | Total Loss: 0.0241 | Reconstruction Loss: 0.0151 | L1 Loss: 0.0089 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0437
Sparsity: 139.9 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0253
Sparsity: 158.2 | Dead Features: 0 | Total Loss: 0.0637 | Reconstruction Loss: 0.0339 | L1 Loss: 0.0299 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0050
Sparsity: 292.8 | Dead Features: 0 | Total Loss: 0.3259 | Reconstruction Loss: 0.1398 | L1 Loss: 0.1861 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self

 17%|█▋        | 204/1192 [00:08<00:37, 26.36it/s]

Sparsity: 50.2 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0062 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0028
Sparsity: 68.3 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0143
Sparsity: 70.2 | Dead Features: 0 | Total Loss: 0.0245 | Reconstruction Loss: 0.0155 | L1 Loss: 0.0090 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0464
Sparsity: 141.3 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0254
Sparsity: 165.5 | Dead Features: 0 | Total Loss: 0.0645 | Reconstruction Loss: 0.0345 | L1 Loss: 0.0300 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0051
Sparsity: 99.6 | Dead Features: 0 | Total Loss: 0.1747 | Reconstruction Loss: 0.1189 | L1 Loss: 0.0558 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self 

 25%|██▌       | 303/1192 [00:11<00:35, 24.82it/s]

Sparsity: 41.8 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0049 | L1 Loss: 0.0069 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0029
Sparsity: 59.0 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0135
Sparsity: 62.0 | Dead Features: 0 | Total Loss: 0.0221 | Reconstruction Loss: 0.0134 | L1 Loss: 0.0087 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0489
Sparsity: 131.7 | Dead Features: 0 | Total Loss: 0.0430 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0256
Sparsity: 155.3 | Dead Features: 0 | Total Loss: 0.0576 | Reconstruction Loss: 0.0291 | L1 Loss: 0.0285 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0053
Sparsity: 93.9 | Dead Features: 0 | Total Loss: 0.1644 | Reconstruction Loss: 0.1078 | L1 Loss: 0.0567 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self 

 34%|███▍      | 405/1192 [00:16<00:31, 24.99it/s]

Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0060 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0030
Sparsity: 68.9 | Dead Features: 0 | Total Loss: 0.0163 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0132
Sparsity: 70.7 | Dead Features: 0 | Total Loss: 0.0236 | Reconstruction Loss: 0.0147 | L1 Loss: 0.0089 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0515
Sparsity: 143.3 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0230 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0257
Sparsity: 164.9 | Dead Features: 0 | Total Loss: 0.0628 | Reconstruction Loss: 0.0328 | L1 Loss: 0.0300 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0054
Sparsity: 91.6 | Dead Features: 0 | Total Loss: 0.1699 | Reconstruction Loss: 0.1172 | L1 Loss: 0.0527 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self 

 42%|████▏     | 504/1192 [00:19<00:27, 25.20it/s]

Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0104 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0031
Sparsity: 54.8 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0128
Sparsity: 65.5 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0088 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0538
Sparsity: 137.2 | Dead Features: 0 | Total Loss: 0.0445 | Reconstruction Loss: 0.0213 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0262
Sparsity: 152.4 | Dead Features: 0 | Total Loss: 0.0581 | Reconstruction Loss: 0.0292 | L1 Loss: 0.0289 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0054
Sparsity: 85.8 | Dead Features: 0 | Total Loss: 0.1664 | Reconstruction Loss: 0.1072 | L1 Loss: 0.0592 | l1_alpha: 8.0000e-04 | Tokens: 1024000 |

 51%|█████     | 603/1192 [00:23<00:23, 24.89it/s]

Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0110 | Reconstruction Loss: 0.0044 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0031
Sparsity: 60.9 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0125
Sparsity: 66.2 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0086 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0562
Sparsity: 137.2 | Dead Features: 0 | Total Loss: 0.0437 | Reconstruction Loss: 0.0212 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0265
Sparsity: 153.8 | Dead Features: 0 | Total Loss: 0.0566 | Reconstruction Loss: 0.0291 | L1 Loss: 0.0275 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0054
Sparsity: 92.5 | Dead Features: 0 | Total Loss: 0.1588 | Reconstruction Loss: 0.1023 | L1 Loss: 0.0566 | l1_alpha: 8.0000e-04 | Tokens: 1228800 |

 59%|█████▉    | 705/1192 [00:27<00:19, 24.93it/s]

Sparsity: 38.5 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0046 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0032
Sparsity: 57.0 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0118
Sparsity: 67.0 | Dead Features: 0 | Total Loss: 0.0230 | Reconstruction Loss: 0.0136 | L1 Loss: 0.0094 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0582
Sparsity: 136.3 | Dead Features: 0 | Total Loss: 0.0450 | Reconstruction Loss: 0.0219 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0268
Sparsity: 161.0 | Dead Features: 0 | Total Loss: 0.0583 | Reconstruction Loss: 0.0288 | L1 Loss: 0.0295 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0055
Sparsity: 100.9 | Dead Features: 0 | Total Loss: 0.1667 | Reconstruction Loss: 0.1066 | L1 Loss: 0.0601 | l1_alpha: 8.0000e-04 | Tokens: 1433600 

 67%|██████▋   | 804/1192 [00:31<00:15, 24.87it/s]

Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0058 | L1 Loss: 0.0072 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0032
Sparsity: 64.7 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0115
Sparsity: 72.2 | Dead Features: 0 | Total Loss: 0.0225 | Reconstruction Loss: 0.0137 | L1 Loss: 0.0089 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0607
Sparsity: 143.0 | Dead Features: 0 | Total Loss: 0.0455 | Reconstruction Loss: 0.0225 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0279
Sparsity: 168.1 | Dead Features: 0 | Total Loss: 0.0608 | Reconstruction Loss: 0.0311 | L1 Loss: 0.0298 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0058
Sparsity: 90.1 | Dead Features: 0 | Total Loss: 0.1654 | Reconstruction Loss: 0.1128 | L1 Loss: 0.0526 | l1_alpha: 8.0000e-04 | Tokens: 1638400 |

 76%|███████▌  | 903/1192 [00:35<00:11, 24.98it/s]

Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0047 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0033
Sparsity: 62.4 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0107
Sparsity: 67.9 | Dead Features: 0 | Total Loss: 0.0218 | Reconstruction Loss: 0.0134 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0628
Sparsity: 142.0 | Dead Features: 0 | Total Loss: 0.0452 | Reconstruction Loss: 0.0219 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0286
Sparsity: 163.0 | Dead Features: 0 | Total Loss: 0.0588 | Reconstruction Loss: 0.0295 | L1 Loss: 0.0293 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0058
Sparsity: 88.4 | Dead Features: 0 | Total Loss: 0.1640 | Reconstruction Loss: 0.1076 | L1 Loss: 0.0564 | l1_alpha: 8.0000e-04 | Tokens: 1843200 |

 84%|████████▍ | 1005/1192 [00:39<00:07, 25.03it/s]

Sparsity: 39.9 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0050 | L1 Loss: 0.0068 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0034
Sparsity: 63.4 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0105
Sparsity: 72.1 | Dead Features: 0 | Total Loss: 0.0227 | Reconstruction Loss: 0.0136 | L1 Loss: 0.0091 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0649
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0451 | Reconstruction Loss: 0.0217 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0289
Sparsity: 165.4 | Dead Features: 0 | Total Loss: 0.0601 | Reconstruction Loss: 0.0301 | L1 Loss: 0.0300 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0057
Sparsity: 132.5 | Dead Features: 0 | Total Loss: 0.1736 | Reconstruction Loss: 0.1134 | L1 Loss: 0.0602 | l1_alpha: 8.0000e-04 | Tokens: 2048000 

 93%|█████████▎| 1104/1192 [00:43<00:03, 24.88it/s]

Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0106 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0035
Sparsity: 59.9 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0098
Sparsity: 69.2 | Dead Features: 0 | Total Loss: 0.0214 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0087 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0666
Sparsity: 141.1 | Dead Features: 0 | Total Loss: 0.0431 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0295
Sparsity: 159.0 | Dead Features: 0 | Total Loss: 0.0568 | Reconstruction Loss: 0.0289 | L1 Loss: 0.0279 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0056
Sparsity: 81.6 | Dead Features: 0 | Total Loss: 0.1599 | Reconstruction Loss: 0.1035 | L1 Loss: 0.0564 | l1_alpha: 8.0000e-04 | Tokens: 2252800 |

100%|██████████| 1192/1192 [00:47<00:00, 25.28it/s]


Saved file at: trained_models/EleutherAI_pythia-70m/pile-10k_s0/layer_0/L0_8192_1212-175906.pt
Saved file at: trained_models/EleutherAI_pythia-70m/pile-10k_s0/layer_1/L1_8192_1212-175906.pt
Saved file at: trained_models/EleutherAI_pythia-70m/pile-10k_s0/layer_2/L2_8192_1212-175906.pt
Saved file at: trained_models/EleutherAI_pythia-70m/pile-10k_s0/layer_3/L3_8192_1212-175906.pt
Saved file at: trained_models/EleutherAI_pythia-70m/pile-10k_s0/layer_4/L4_8192_1212-175906.pt
Saved file at: trained_models/EleutherAI_pythia-70m/pile-10k_s0/layer_5/L5_8192_1212-175906.pt


In [None]:
# Code that actually starts a full training run!

model_name = "EleutherAI/pythia-160m"
dataset_name = "Elriggs/openwebtext-100k" # "Elriggs/openwebtext-100k"
ratio = 2
layers = [0, 1, 2, 3, 4, 5]
wandb_log = False
seed = 0
split = "train"
epoches = 1

setup_execute_training(model_name,
                       dataset_name,
                       ratio,
                       layers,
                       seed,
                       wandb_log=wandb_log,
                       split=split,
                      epoches=epoches)

In [29]:
# Code that actually starts a full training run!

model_name = "EleutherAI/pythia-70m"
dataset_name = "Elriggs/openwebtext-100k" # "Elriggs/openwebtext-100k"
ratio = 2
layers = [0, 1, 2, 3, 4, 5]
wandb_log = False
seed = 0
split = "train"
epoches = 1

setup_execute_training(model_name,
                       dataset_name,
                       ratio,
                       layers,
                       seed,
                       wandb_log=wandb_log,
                       split=split,
                      epoches=epoches)

Activation size: 512


Found cached dataset parquet (/root/.cache/huggingface/datasets/Elriggs___parquet/Elriggs--openwebtext-100k-79076ecafee8a6d5/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Loading cached processed dataset at /root/.cache/huggingface/datasets/Elriggs___parquet/Elriggs--openwebtext-100k-79076ecafee8a6d5/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-d3de196f0681d12e_*_of_00008.arrow


Number of tokens: 112750592


  0%|          | 2/55054 [00:00<47:23, 19.36it/s]

Sparsity: 508.8 | Dead Features: 1024 | Total Loss: 0.1624 | Reconstruction Loss: 0.0987 | L1 Loss: 0.0637 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 1.0000
Sparsity: 507.4 | Dead Features: 1024 | Total Loss: 0.1081 | Reconstruction Loss: 0.0615 | L1 Loss: 0.0465 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0004
Sparsity: 511.9 | Dead Features: 1024 | Total Loss: 0.5248 | Reconstruction Loss: 0.4554 | L1 Loss: 0.0694 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0012
Sparsity: 515.2 | Dead Features: 1024 | Total Loss: 0.3679 | Reconstruction Loss: 0.2639 | L1 Loss: 0.1041 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0006
Sparsity: 527.3 | Dead Features: 1024 | Total Loss: 0.7415 | Reconstruction Loss: 0.5943 | L1 Loss: 0.1471 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0013
Sparsity: 518.9 | Dead Features: 1024 | Total Loss: 8.2464 | Reconstruction Loss: 7.6876 | L1 Loss: 0.5588 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similari

  0%|          | 106/55054 [00:02<24:32, 37.30it/s]

Sparsity: 44.3 | Dead Features: 0 | Total Loss: 0.0301 | Reconstruction Loss: 0.0224 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0004
Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0264 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0029 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0011
Sparsity: 26.8 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0302 | L1 Loss: 0.0109 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0016
Sparsity: 94.2 | Dead Features: 0 | Total Loss: 0.0706 | Reconstruction Loss: 0.0500 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0062
Sparsity: 90.8 | Dead Features: 0 | Total Loss: 0.1036 | Reconstruction Loss: 0.0750 | L1 Loss: 0.0287 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0044
Sparsity: 94.8 | Dead Features: 0 | Total Loss: 0.2881 | Reconstruction Loss: 0.1841 | L1 Loss: 0.1040 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Sim

  0%|          | 207/55054 [00:05<25:30, 35.83it/s]

Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0232 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0001
Sparsity: 30.3 | Dead Features: 0 | Total Loss: 0.0215 | Reconstruction Loss: 0.0173 | L1 Loss: 0.0043 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0012
Sparsity: 28.0 | Dead Features: 0 | Total Loss: 0.0323 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0084 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0051
Sparsity: 96.8 | Dead Features: 0 | Total Loss: 0.0586 | Reconstruction Loss: 0.0365 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0073
Sparsity: 86.4 | Dead Features: 0 | Total Loss: 0.0801 | Reconstruction Loss: 0.0542 | L1 Loss: 0.0259 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0038
Sparsity: 43.8 | Dead Features: 0 | Total Loss: 0.2030 | Reconstruction Loss: 0.1369 | L1 Loss: 0.0661 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Sim

  1%|          | 305/55054 [00:07<20:52, 43.70it/s]

Sparsity: 32.6 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0135 | L1 Loss: 0.0072 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0002
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0153 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0018
Sparsity: 30.8 | Dead Features: 0 | Total Loss: 0.0292 | Reconstruction Loss: 0.0220 | L1 Loss: 0.0072 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0086
Sparsity: 98.8 | Dead Features: 0 | Total Loss: 0.0557 | Reconstruction Loss: 0.0334 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0084
Sparsity: 92.3 | Dead Features: 0 | Total Loss: 0.0773 | Reconstruction Loss: 0.0504 | L1 Loss: 0.0268 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0036
Sparsity: 317.0 | Dead Features: 0 | Total Loss: 1.0259 | Reconstruction Loss: 0.5483 | L1 Loss: 0.4776 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self S

  1%|          | 405/55054 [00:10<22:21, 40.73it/s]

Sparsity: 31.4 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0008
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0139 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0022
Sparsity: 33.0 | Dead Features: 0 | Total Loss: 0.0284 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0119
Sparsity: 99.1 | Dead Features: 0 | Total Loss: 0.0533 | Reconstruction Loss: 0.0313 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0089
Sparsity: 94.0 | Dead Features: 0 | Total Loss: 0.0734 | Reconstruction Loss: 0.0466 | L1 Loss: 0.0267 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0034
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.2071 | Reconstruction Loss: 0.1479 | L1 Loss: 0.0592 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Si

  1%|          | 504/55054 [00:12<26:12, 34.70it/s]

Sparsity: 31.6 | Dead Features: 0 | Total Loss: 0.0185 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0072 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0014
Sparsity: 41.2 | Dead Features: 0 | Total Loss: 0.0186 | Reconstruction Loss: 0.0133 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0022
Sparsity: 34.4 | Dead Features: 0 | Total Loss: 0.0273 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0152
Sparsity: 102.2 | Dead Features: 0 | Total Loss: 0.0527 | Reconstruction Loss: 0.0302 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0090
Sparsity: 98.7 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0450 | L1 Loss: 0.0271 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0030
Sparsity: 30.8 | Dead Features: 0 | Total Loss: 0.1969 | Reconstruction Loss: 0.1413 | L1 Loss: 0.0555 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | 

  1%|          | 604/55054 [00:15<26:44, 33.93it/s]

Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.0189 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0016
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0133 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0025
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0278 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0184
Sparsity: 104.4 | Dead Features: 0 | Total Loss: 0.0536 | Reconstruction Loss: 0.0303 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0091
Sparsity: 101.2 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0452 | L1 Loss: 0.0280 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0025
Sparsity: 29.0 | Dead Features: 0 | Total Loss: 0.1889 | Reconstruction Loss: 0.1372 | L1 Loss: 0.0517 | l1_alpha: 8.0000e-04 | Tokens: 1228800 |

  1%|▏         | 707/55054 [00:18<26:26, 34.25it/s]

Sparsity: 30.8 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0071 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0020
Sparsity: 40.9 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0026
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0260 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0217
Sparsity: 102.5 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0272 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0091
Sparsity: 100.8 | Dead Features: 0 | Total Loss: 0.0672 | Reconstruction Loss: 0.0399 | L1 Loss: 0.0272 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0023
Sparsity: 29.8 | Dead Features: 0 | Total Loss: 0.1879 | Reconstruction Loss: 0.1361 | L1 Loss: 0.0517 | l1_alpha: 8.0000e-04 | Tokens: 1433600 |

  1%|▏         | 807/55054 [00:21<25:56, 34.85it/s]

Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0185 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0021
Sparsity: 45.6 | Dead Features: 0 | Total Loss: 0.0181 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0021
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0266 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0241
Sparsity: 104.3 | Dead Features: 0 | Total Loss: 0.0512 | Reconstruction Loss: 0.0281 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0097
Sparsity: 104.8 | Dead Features: 0 | Total Loss: 0.0710 | Reconstruction Loss: 0.0426 | L1 Loss: 0.0284 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0020
Sparsity: 30.0 | Dead Features: 0 | Total Loss: 0.1859 | Reconstruction Loss: 0.1352 | L1 Loss: 0.0507 | l1_alpha: 8.0000e-04 | Tokens: 1638400 |

  2%|▏         | 907/55054 [00:24<26:02, 34.66it/s]

Sparsity: 28.8 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0069 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0022
Sparsity: 42.1 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0018
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0255 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0261
Sparsity: 104.3 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0100
Sparsity: 104.5 | Dead Features: 0 | Total Loss: 0.0625 | Reconstruction Loss: 0.0353 | L1 Loss: 0.0272 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0016
Sparsity: 31.1 | Dead Features: 0 | Total Loss: 0.1845 | Reconstruction Loss: 0.1302 | L1 Loss: 0.0543 | l1_alpha: 8.0000e-04 | Tokens: 1843200 |

  2%|▏         | 1007/55054 [00:27<25:57, 34.71it/s]

Sparsity: 33.8 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0025
Sparsity: 44.5 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0009
Sparsity: 38.8 | Dead Features: 0 | Total Loss: 0.0251 | Reconstruction Loss: 0.0174 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0282
Sparsity: 104.7 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0102
Sparsity: 106.9 | Dead Features: 0 | Total Loss: 0.0662 | Reconstruction Loss: 0.0385 | L1 Loss: 0.0276 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0011
Sparsity: 30.5 | Dead Features: 0 | Total Loss: 0.1837 | Reconstruction Loss: 0.1336 | L1 Loss: 0.0501 | l1_alpha: 8.0000e-04 | Tokens: 2048000 |

  2%|▏         | 1105/55054 [00:29<25:40, 35.02it/s]

Sparsity: 31.6 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0072 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0028
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0002
Sparsity: 40.6 | Dead Features: 0 | Total Loss: 0.0254 | Reconstruction Loss: 0.0175 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0298
Sparsity: 105.3 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0105
Sparsity: 107.6 | Dead Features: 0 | Total Loss: 0.0666 | Reconstruction Loss: 0.0385 | L1 Loss: 0.0281 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0007
Sparsity: 30.6 | Dead Features: 0 | Total Loss: 0.1849 | Reconstruction Loss: 0.1348 | L1 Loss: 0.0501 | l1_alpha: 8.0000e-04 | Tokens: 2252800 |

  2%|▏         | 1207/55054 [00:32<22:02, 40.72it/s]

Sparsity: 31.1 | Dead Features: 0 | Total Loss: 0.0162 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0071 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0030
Sparsity: 43.4 | Dead Features: 0 | Total Loss: 0.0161 | Reconstruction Loss: 0.0107 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0005
Sparsity: 41.3 | Dead Features: 0 | Total Loss: 0.0247 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0309
Sparsity: 105.5 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0227 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0106
Sparsity: 109.3 | Dead Features: 0 | Total Loss: 0.0632 | Reconstruction Loss: 0.0356 | L1 Loss: 0.0276 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0003
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.1699 | Reconstruction Loss: 0.1205 | L1 Loss: 0.0494 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | 

  2%|▏         | 1305/55054 [00:35<25:47, 34.73it/s]

Sparsity: 31.1 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0070 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0031
Sparsity: 42.5 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0017
Sparsity: 40.8 | Dead Features: 0 | Total Loss: 0.0236 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0324
Sparsity: 103.8 | Dead Features: 0 | Total Loss: 0.0450 | Reconstruction Loss: 0.0226 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0108
Sparsity: 107.1 | Dead Features: 0 | Total Loss: 0.0600 | Reconstruction Loss: 0.0331 | L1 Loss: 0.0269 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0002
Sparsity: 27.1 | Dead Features: 0 | Total Loss: 0.1698 | Reconstruction Loss: 0.1224 | L1 Loss: 0.0474 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | S

  3%|▎         | 1405/55054 [00:38<25:42, 34.77it/s]

Sparsity: 28.1 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0033
Sparsity: 44.8 | Dead Features: 0 | Total Loss: 0.0161 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0030
Sparsity: 44.4 | Dead Features: 0 | Total Loss: 0.0250 | Reconstruction Loss: 0.0169 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0333
Sparsity: 107.9 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0109
Sparsity: 109.5 | Dead Features: 0 | Total Loss: 0.0644 | Reconstruction Loss: 0.0358 | L1 Loss: 0.0286 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0010
Sparsity: 33.1 | Dead Features: 0 | Total Loss: 0.1801 | Reconstruction Loss: 0.1280 | L1 Loss: 0.0521 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | S

  3%|▎         | 1504/55054 [00:40<21:50, 40.86it/s]

Sparsity: 30.4 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0069 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0035
Sparsity: 43.1 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0041
Sparsity: 41.8 | Dead Features: 0 | Total Loss: 0.0233 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0342
Sparsity: 104.8 | Dead Features: 0 | Total Loss: 0.0446 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0109
Sparsity: 109.1 | Dead Features: 0 | Total Loss: 0.0593 | Reconstruction Loss: 0.0323 | L1 Loss: 0.0270 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0016
Sparsity: 32.7 | Dead Features: 0 | Total Loss: 0.1653 | Reconstruction Loss: 0.1156 | L1 Loss: 0.0497 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | S

  3%|▎         | 1605/55054 [00:43<22:16, 39.99it/s]

Sparsity: 31.0 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0070 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: -0.0037
Sparsity: 45.4 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: 0.0057
Sparsity: 44.1 | Dead Features: 0 | Total Loss: 0.0241 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: 0.0348
Sparsity: 105.8 | Dead Features: 0 | Total Loss: 0.0457 | Reconstruction Loss: 0.0229 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: 0.0111
Sparsity: 110.9 | Dead Features: 0 | Total Loss: 0.0619 | Reconstruction Loss: 0.0342 | L1 Loss: 0.0277 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: 0.0024
Sparsity: 33.3 | Dead Features: 0 | Total Loss: 0.1770 | Reconstruction Loss: 0.1281 | L1 Loss: 0.0489 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | S

  3%|▎         | 1706/55054 [00:46<22:05, 40.25it/s]

Sparsity: 29.2 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0068 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: -0.0038
Sparsity: 42.7 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: 0.0071
Sparsity: 42.4 | Dead Features: 0 | Total Loss: 0.0228 | Reconstruction Loss: 0.0151 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: 0.0355
Sparsity: 104.9 | Dead Features: 0 | Total Loss: 0.0441 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: 0.0112
Sparsity: 109.0 | Dead Features: 0 | Total Loss: 0.0584 | Reconstruction Loss: 0.0318 | L1 Loss: 0.0266 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: 0.0033
Sparsity: 33.4 | Dead Features: 0 | Total Loss: 0.1655 | Reconstruction Loss: 0.1155 | L1 Loss: 0.0500 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | S

  3%|▎         | 1807/55054 [00:48<24:00, 36.96it/s]

Sparsity: 31.5 | Dead Features: 0 | Total Loss: 0.0159 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0071 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: -0.0040
Sparsity: 45.9 | Dead Features: 0 | Total Loss: 0.0160 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: 0.0085
Sparsity: 44.2 | Dead Features: 0 | Total Loss: 0.0239 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: 0.0356
Sparsity: 104.6 | Dead Features: 0 | Total Loss: 0.0445 | Reconstruction Loss: 0.0219 | L1 Loss: 0.0227 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: 0.0114
Sparsity: 109.3 | Dead Features: 0 | Total Loss: 0.0592 | Reconstruction Loss: 0.0322 | L1 Loss: 0.0270 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: 0.0041
Sparsity: 33.6 | Dead Features: 0 | Total Loss: 0.1646 | Reconstruction Loss: 0.1149 | L1 Loss: 0.0497 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | S

  3%|▎         | 1907/55054 [00:51<25:02, 35.37it/s]

Sparsity: 29.8 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: -0.0040
Sparsity: 45.2 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: 0.0101
Sparsity: 44.1 | Dead Features: 0 | Total Loss: 0.0227 | Reconstruction Loss: 0.0150 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: 0.0354
Sparsity: 105.5 | Dead Features: 0 | Total Loss: 0.0441 | Reconstruction Loss: 0.0217 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: 0.0118
Sparsity: 110.2 | Dead Features: 0 | Total Loss: 0.0590 | Reconstruction Loss: 0.0325 | L1 Loss: 0.0265 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: 0.0050
Sparsity: 34.1 | Dead Features: 0 | Total Loss: 0.1628 | Reconstruction Loss: 0.1120 | L1 Loss: 0.0507 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | S

  4%|▎         | 2006/55054 [00:53<22:10, 39.87it/s]

Sparsity: 29.8 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0068 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: -0.0037
Sparsity: 43.9 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: 0.0118
Sparsity: 43.5 | Dead Features: 0 | Total Loss: 0.0220 | Reconstruction Loss: 0.0144 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: 0.0351
Sparsity: 104.9 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: 0.0120
Sparsity: 109.6 | Dead Features: 0 | Total Loss: 0.0565 | Reconstruction Loss: 0.0302 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: 0.0058
Sparsity: 32.9 | Dead Features: 0 | Total Loss: 0.1621 | Reconstruction Loss: 0.1142 | L1 Loss: 0.0479 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | S

  4%|▍         | 2109/55054 [00:56<20:41, 42.63it/s]

Sparsity: 29.1 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: -0.0039
Sparsity: 44.7 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0137
Sparsity: 45.5 | Dead Features: 0 | Total Loss: 0.0225 | Reconstruction Loss: 0.0147 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0355
Sparsity: 107.9 | Dead Features: 0 | Total Loss: 0.0441 | Reconstruction Loss: 0.0212 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0119
Sparsity: 111.0 | Dead Features: 0 | Total Loss: 0.0574 | Reconstruction Loss: 0.0304 | L1 Loss: 0.0270 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0065
Sparsity: 31.0 | Dead Features: 0 | Total Loss: 0.1733 | Reconstruction Loss: 0.1262 | L1 Loss: 0.0470 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | S

  4%|▍         | 2205/55054 [00:59<25:16, 34.85it/s]

Sparsity: 27.7 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: -0.0038
Sparsity: 42.7 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0154
Sparsity: 44.8 | Dead Features: 0 | Total Loss: 0.0227 | Reconstruction Loss: 0.0148 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0354
Sparsity: 106.3 | Dead Features: 0 | Total Loss: 0.0433 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0117
Sparsity: 109.1 | Dead Features: 0 | Total Loss: 0.0573 | Reconstruction Loss: 0.0313 | L1 Loss: 0.0260 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0076
Sparsity: 28.2 | Dead Features: 0 | Total Loss: 0.1865 | Reconstruction Loss: 0.1373 | L1 Loss: 0.0492 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | S

  4%|▍         | 2307/55054 [01:02<25:25, 34.57it/s]

Sparsity: 28.0 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: -0.0036
Sparsity: 44.3 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0168
Sparsity: 45.5 | Dead Features: 0 | Total Loss: 0.0227 | Reconstruction Loss: 0.0146 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0351
Sparsity: 107.3 | Dead Features: 0 | Total Loss: 0.0434 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0118
Sparsity: 111.8 | Dead Features: 0 | Total Loss: 0.0569 | Reconstruction Loss: 0.0304 | L1 Loss: 0.0265 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0083
Sparsity: 32.5 | Dead Features: 0 | Total Loss: 0.1656 | Reconstruction Loss: 0.1141 | L1 Loss: 0.0514 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | S

  4%|▍         | 2407/55054 [01:04<25:34, 34.30it/s]

Sparsity: 30.4 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0068 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: -0.0053
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0179
Sparsity: 45.9 | Dead Features: 0 | Total Loss: 0.0226 | Reconstruction Loss: 0.0146 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0347
Sparsity: 107.7 | Dead Features: 0 | Total Loss: 0.0434 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0226 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0119
Sparsity: 112.3 | Dead Features: 0 | Total Loss: 0.0574 | Reconstruction Loss: 0.0305 | L1 Loss: 0.0268 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0088
Sparsity: 33.7 | Dead Features: 0 | Total Loss: 0.1620 | Reconstruction Loss: 0.1116 | L1 Loss: 0.0505 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | S

  5%|▍         | 2506/55054 [01:07<21:43, 40.31it/s]

Sparsity: 26.3 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: -0.0052
Sparsity: 42.6 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0194
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0230 | Reconstruction Loss: 0.0148 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0351
Sparsity: 109.6 | Dead Features: 0 | Total Loss: 0.0443 | Reconstruction Loss: 0.0214 | L1 Loss: 0.0229 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0117
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0579 | Reconstruction Loss: 0.0310 | L1 Loss: 0.0269 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0096
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.1669 | Reconstruction Loss: 0.1171 | L1 Loss: 0.0499 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | S

  5%|▍         | 2604/55054 [01:10<23:21, 37.41it/s]

Sparsity: 27.4 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: -0.0053
Sparsity: 43.6 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0204
Sparsity: 45.3 | Dead Features: 0 | Total Loss: 0.0221 | Reconstruction Loss: 0.0143 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0352
Sparsity: 108.3 | Dead Features: 0 | Total Loss: 0.0428 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0116
Sparsity: 113.4 | Dead Features: 0 | Total Loss: 0.0558 | Reconstruction Loss: 0.0293 | L1 Loss: 0.0265 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0103
Sparsity: 34.2 | Dead Features: 0 | Total Loss: 0.1600 | Reconstruction Loss: 0.1093 | L1 Loss: 0.0507 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | S

  5%|▍         | 2704/55054 [01:13<24:15, 35.96it/s]

Sparsity: 28.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: -0.0054
Sparsity: 43.8 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0219
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0228 | Reconstruction Loss: 0.0147 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0348
Sparsity: 108.6 | Dead Features: 0 | Total Loss: 0.0432 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0115
Sparsity: 113.5 | Dead Features: 0 | Total Loss: 0.0569 | Reconstruction Loss: 0.0302 | L1 Loss: 0.0267 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0111
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.1633 | Reconstruction Loss: 0.1134 | L1 Loss: 0.0498 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | S

  5%|▌         | 2805/55054 [01:15<24:24, 35.69it/s]

Sparsity: 28.4 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: -0.0054
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0163 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0233
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0237 | Reconstruction Loss: 0.0155 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0344
Sparsity: 113.3 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0112
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0604 | Reconstruction Loss: 0.0315 | L1 Loss: 0.0289 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0117
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.1777 | Reconstruction Loss: 0.1249 | L1 Loss: 0.0528 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | S

  5%|▌         | 2909/55054 [01:18<20:35, 42.20it/s]

Sparsity: 28.3 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: -0.0053
Sparsity: 43.2 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0246
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0221 | Reconstruction Loss: 0.0142 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0340
Sparsity: 108.3 | Dead Features: 0 | Total Loss: 0.0424 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0110
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0559 | Reconstruction Loss: 0.0296 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0127
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.1590 | Reconstruction Loss: 0.1095 | L1 Loss: 0.0495 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | S

  5%|▌         | 3007/55054 [01:21<24:14, 35.79it/s]

Sparsity: 26.6 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: -0.0055
Sparsity: 42.0 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0260
Sparsity: 45.6 | Dead Features: 0 | Total Loss: 0.0226 | Reconstruction Loss: 0.0145 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0339
Sparsity: 109.6 | Dead Features: 0 | Total Loss: 0.0424 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0107
Sparsity: 113.8 | Dead Features: 0 | Total Loss: 0.0556 | Reconstruction Loss: 0.0295 | L1 Loss: 0.0260 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0137
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.1611 | Reconstruction Loss: 0.1126 | L1 Loss: 0.0485 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | S

  6%|▌         | 3107/55054 [01:24<24:01, 36.03it/s]

Sparsity: 28.6 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: -0.0057
Sparsity: 44.5 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0272
Sparsity: 45.7 | Dead Features: 0 | Total Loss: 0.0223 | Reconstruction Loss: 0.0143 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0334
Sparsity: 109.1 | Dead Features: 0 | Total Loss: 0.0424 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0103
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0550 | Reconstruction Loss: 0.0286 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0144
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.1591 | Reconstruction Loss: 0.1087 | L1 Loss: 0.0504 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | S

  6%|▌         | 3207/55054 [01:26<24:05, 35.86it/s]

Sparsity: 26.9 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: -0.0055
Sparsity: 44.3 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0284
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0227 | Reconstruction Loss: 0.0148 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0333
Sparsity: 105.7 | Dead Features: 0 | Total Loss: 0.0445 | Reconstruction Loss: 0.0220 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0102
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0565 | Reconstruction Loss: 0.0300 | L1 Loss: 0.0264 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0152
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.1631 | Reconstruction Loss: 0.1128 | L1 Loss: 0.0503 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | S

  6%|▌         | 3307/55054 [01:29<23:31, 36.65it/s]

Sparsity: 29.4 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: -0.0054
Sparsity: 45.7 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0292
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0223 | Reconstruction Loss: 0.0144 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0337
Sparsity: 109.7 | Dead Features: 0 | Total Loss: 0.0435 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0102
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0573 | Reconstruction Loss: 0.0300 | L1 Loss: 0.0273 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0156
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.1612 | Reconstruction Loss: 0.1117 | L1 Loss: 0.0495 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | S

  6%|▌         | 3407/55054 [01:32<23:56, 35.96it/s]

Sparsity: 28.4 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: -0.0054
Sparsity: 44.7 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: 0.0299
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0224 | Reconstruction Loss: 0.0145 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: 0.0340
Sparsity: 110.0 | Dead Features: 0 | Total Loss: 0.0432 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: 0.0108
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0555 | Reconstruction Loss: 0.0293 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: 0.0160
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.1563 | Reconstruction Loss: 0.1077 | L1 Loss: 0.0486 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | S

  6%|▋         | 3507/55054 [01:35<25:15, 34.02it/s]

Sparsity: 26.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: -0.0055
Sparsity: 43.4 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0311
Sparsity: 45.9 | Dead Features: 0 | Total Loss: 0.0219 | Reconstruction Loss: 0.0141 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0340
Sparsity: 110.2 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0102
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0547 | Reconstruction Loss: 0.0282 | L1 Loss: 0.0265 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0166
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.1523 | Reconstruction Loss: 0.1017 | L1 Loss: 0.0506 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | S

  7%|▋         | 3607/55054 [01:37<24:47, 34.59it/s]

Sparsity: 24.5 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: -0.0053
Sparsity: 40.8 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0323
Sparsity: 45.0 | Dead Features: 0 | Total Loss: 0.0214 | Reconstruction Loss: 0.0136 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0336
Sparsity: 109.8 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0098
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0522 | Reconstruction Loss: 0.0270 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0172
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.1501 | Reconstruction Loss: 0.0998 | L1 Loss: 0.0503 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | S

  7%|▋         | 3704/55054 [01:40<24:06, 35.50it/s]

Sparsity: 23.7 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: -0.0053
Sparsity: 40.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: 0.0334
Sparsity: 45.5 | Dead Features: 0 | Total Loss: 0.0219 | Reconstruction Loss: 0.0139 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: 0.0330
Sparsity: 110.5 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: 0.0094
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0530 | Reconstruction Loss: 0.0271 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: 0.0176
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.1566 | Reconstruction Loss: 0.1072 | L1 Loss: 0.0494 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | S

  7%|▋         | 3804/55054 [01:43<23:34, 36.23it/s]

Sparsity: 24.6 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: -0.0053
Sparsity: 43.4 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0342
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0220 | Reconstruction Loss: 0.0140 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0326
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0464 | Reconstruction Loss: 0.0217 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0090
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0546 | Reconstruction Loss: 0.0273 | L1 Loss: 0.0273 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0177
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.1712 | Reconstruction Loss: 0.1164 | L1 Loss: 0.0548 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | S

  7%|▋         | 3904/55054 [01:46<24:02, 35.47it/s]

Sparsity: 25.8 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: -0.0053
Sparsity: 43.6 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0347
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0227 | Reconstruction Loss: 0.0145 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0321
Sparsity: 112.8 | Dead Features: 0 | Total Loss: 0.0433 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0085
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0551 | Reconstruction Loss: 0.0277 | L1 Loss: 0.0274 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0183
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.1541 | Reconstruction Loss: 0.1049 | L1 Loss: 0.0492 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | S

  7%|▋         | 4004/55054 [01:49<23:29, 36.23it/s]

Sparsity: 27.6 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: -0.0052
Sparsity: 45.5 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0351
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0228 | Reconstruction Loss: 0.0149 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0320
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0446 | Reconstruction Loss: 0.0213 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0079
Sparsity: 123.0 | Dead Features: 0 | Total Loss: 0.0585 | Reconstruction Loss: 0.0312 | L1 Loss: 0.0274 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0177
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.1658 | Reconstruction Loss: 0.1152 | L1 Loss: 0.0506 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | S

  7%|▋         | 4104/55054 [01:51<23:38, 35.92it/s]

Sparsity: 27.8 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: -0.0050
Sparsity: 44.2 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: 0.0354
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0226 | Reconstruction Loss: 0.0145 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: 0.0315
Sparsity: 114.5 | Dead Features: 0 | Total Loss: 0.0430 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0226 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: 0.0072
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0546 | Reconstruction Loss: 0.0284 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: 0.0177
Sparsity: 39.2 | Dead Features: 0 | Total Loss: 0.1509 | Reconstruction Loss: 0.1020 | L1 Loss: 0.0489 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | S

  8%|▊         | 4204/55054 [01:54<24:00, 35.31it/s]

Sparsity: 23.7 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: -0.0051
Sparsity: 41.5 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0355
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0221 | Reconstruction Loss: 0.0139 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0308
Sparsity: 113.0 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0067
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0529 | Reconstruction Loss: 0.0272 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0171
Sparsity: 40.0 | Dead Features: 0 | Total Loss: 0.1516 | Reconstruction Loss: 0.1010 | L1 Loss: 0.0506 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | S

  8%|▊         | 4304/55054 [01:57<23:28, 36.03it/s]

Sparsity: 30.2 | Dead Features: 0 | Total Loss: 0.0163 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0070 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: -0.0051
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.0165 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0364
Sparsity: 50.8 | Dead Features: 0 | Total Loss: 0.0236 | Reconstruction Loss: 0.0152 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0305
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0449 | Reconstruction Loss: 0.0219 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0063
Sparsity: 127.5 | Dead Features: 0 | Total Loss: 0.0595 | Reconstruction Loss: 0.0314 | L1 Loss: 0.0281 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0171
Sparsity: 41.0 | Dead Features: 0 | Total Loss: 0.1673 | Reconstruction Loss: 0.1192 | L1 Loss: 0.0481 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | S

  8%|▊         | 4407/55054 [02:00<22:31, 37.48it/s]

Sparsity: 26.2 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: -0.0049
Sparsity: 45.1 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0366
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0226 | Reconstruction Loss: 0.0146 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0300
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0448 | Reconstruction Loss: 0.0218 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0056
Sparsity: 122.7 | Dead Features: 0 | Total Loss: 0.0583 | Reconstruction Loss: 0.0306 | L1 Loss: 0.0277 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0165
Sparsity: 38.8 | Dead Features: 0 | Total Loss: 0.1666 | Reconstruction Loss: 0.1171 | L1 Loss: 0.0494 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | S

  8%|▊         | 4507/55054 [02:02<23:42, 35.52it/s]

Sparsity: 26.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: -0.0049
Sparsity: 44.1 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0369
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0219 | Reconstruction Loss: 0.0140 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0296
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0051
Sparsity: 123.4 | Dead Features: 0 | Total Loss: 0.0554 | Reconstruction Loss: 0.0284 | L1 Loss: 0.0269 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0165
Sparsity: 41.4 | Dead Features: 0 | Total Loss: 0.1583 | Reconstruction Loss: 0.1079 | L1 Loss: 0.0504 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | S

  8%|▊         | 4605/55054 [02:05<23:19, 36.06it/s]

Sparsity: 23.8 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: -0.0051
Sparsity: 41.7 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0365
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0137 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0294
Sparsity: 114.1 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0046
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0525 | Reconstruction Loss: 0.0268 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0159
Sparsity: 41.9 | Dead Features: 0 | Total Loss: 0.1531 | Reconstruction Loss: 0.1029 | L1 Loss: 0.0502 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | S

  9%|▊         | 4705/55054 [02:08<23:08, 36.25it/s]

Sparsity: 24.7 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: -0.0051
Sparsity: 41.8 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0369
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0138 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0290
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0038
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0524 | Reconstruction Loss: 0.0270 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0156
Sparsity: 43.2 | Dead Features: 0 | Total Loss: 0.1533 | Reconstruction Loss: 0.1010 | L1 Loss: 0.0524 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | S

  9%|▊         | 4805/55054 [02:11<23:20, 35.88it/s]

Sparsity: 25.4 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: -0.0048
Sparsity: 42.3 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0373
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0223 | Reconstruction Loss: 0.0143 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0284
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0427 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0029
Sparsity: 124.3 | Dead Features: 0 | Total Loss: 0.0538 | Reconstruction Loss: 0.0276 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0154
Sparsity: 42.4 | Dead Features: 0 | Total Loss: 0.1472 | Reconstruction Loss: 0.0969 | L1 Loss: 0.0503 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | S

  9%|▉         | 4905/55054 [02:13<23:18, 35.87it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: -0.0049
Sparsity: 39.0 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0372
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0215 | Reconstruction Loss: 0.0135 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0276
Sparsity: 113.6 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0019
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0146
Sparsity: 43.0 | Dead Features: 0 | Total Loss: 0.1531 | Reconstruction Loss: 0.1011 | L1 Loss: 0.0520 | l1_alpha: 8.0000e-04 | Tokens: 100352

  9%|▉         | 5006/55054 [02:16<21:31, 38.74it/s]

Sparsity: 24.0 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: -0.0047
Sparsity: 39.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0373
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0219 | Reconstruction Loss: 0.0138 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0270
Sparsity: 114.5 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0014
Sparsity: 122.8 | Dead Features: 0 | Total Loss: 0.0521 | Reconstruction Loss: 0.0269 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0142
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.1791 | Reconstruction Loss: 0.1318 | L1 Loss: 0.0474 | l1_alpha: 8.0000e-04 | Tokens: 102400

  9%|▉         | 5106/55054 [02:19<23:04, 36.07it/s]

Sparsity: 23.5 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: -0.0048
Sparsity: 39.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0374
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0213 | Reconstruction Loss: 0.0136 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0267
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0005
Sparsity: 124.1 | Dead Features: 0 | Total Loss: 0.0527 | Reconstruction Loss: 0.0271 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0137
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.1567 | Reconstruction Loss: 0.1074 | L1 Loss: 0.0493 | l1_alpha: 8.0000e-04 | Tokens: 104448

  9%|▉         | 5204/55054 [02:22<23:51, 34.83it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: -0.0050
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: 0.0375
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: 0.0264
Sparsity: 114.5 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: -0.0002
Sparsity: 121.6 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: 0.0134
Sparsity: 41.5 | Dead Features: 0 | Total Loss: 0.1426 | Reconstruction Loss: 0.0928 | L1 Loss: 0.0498 | l1_alpha: 8.0000e-04 | Tokens: 10649

 10%|▉         | 5304/55054 [02:24<23:40, 35.03it/s]

Sparsity: 24.5 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: -0.0053
Sparsity: 40.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: 0.0379
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0132 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: 0.0259
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: -0.0005
Sparsity: 123.1 | Dead Features: 0 | Total Loss: 0.0517 | Reconstruction Loss: 0.0265 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: 0.0132
Sparsity: 42.7 | Dead Features: 0 | Total Loss: 0.1462 | Reconstruction Loss: 0.0981 | L1 Loss: 0.0480 | l1_alpha: 8.0000e-04 | Tokens: 10854

 10%|▉         | 5407/55054 [02:27<23:11, 35.68it/s]

Sparsity: 25.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: -0.0049
Sparsity: 41.9 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: 0.0371
Sparsity: 49.1 | Dead Features: 0 | Total Loss: 0.0221 | Reconstruction Loss: 0.0140 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: 0.0253
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0428 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: -0.0017
Sparsity: 125.5 | Dead Features: 0 | Total Loss: 0.0551 | Reconstruction Loss: 0.0286 | L1 Loss: 0.0265 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: 0.0130
Sparsity: 44.2 | Dead Features: 0 | Total Loss: 0.1567 | Reconstruction Loss: 0.1066 | L1 Loss: 0.0502 | l1_alpha: 8.0000e-04 | Tokens: 11059

 10%|█         | 5507/55054 [02:30<23:43, 34.80it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: -0.0046
Sparsity: 38.5 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: 0.0373
Sparsity: 45.9 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: 0.0246
Sparsity: 114.2 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: -0.0022
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: 0.0122
Sparsity: 44.5 | Dead Features: 0 | Total Loss: 0.1394 | Reconstruction Loss: 0.0898 | L1 Loss: 0.0496 | l1_alpha: 8.0000e-04 | Tokens: 11264

 10%|█         | 5607/55054 [02:33<23:40, 34.81it/s]

Sparsity: 24.7 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: -0.0047
Sparsity: 41.8 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: 0.0370
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0222 | Reconstruction Loss: 0.0141 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: 0.0243
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: -0.0025
Sparsity: 126.2 | Dead Features: 0 | Total Loss: 0.0539 | Reconstruction Loss: 0.0276 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: 0.0120
Sparsity: 45.3 | Dead Features: 0 | Total Loss: 0.1516 | Reconstruction Loss: 0.1018 | L1 Loss: 0.0498 | l1_alpha: 8.0000e-04 | Tokens: 11468

 10%|█         | 5707/55054 [02:36<23:43, 34.65it/s]

Sparsity: 24.3 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: -0.0049
Sparsity: 40.5 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: 0.0370
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0211 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: 0.0234
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: -0.0037
Sparsity: 123.8 | Dead Features: 0 | Total Loss: 0.0512 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: 0.0115
Sparsity: 44.1 | Dead Features: 0 | Total Loss: 0.1474 | Reconstruction Loss: 0.0972 | L1 Loss: 0.0501 | l1_alpha: 8.0000e-04 | Tokens: 11673

 11%|█         | 5807/55054 [02:39<23:17, 35.25it/s]

Sparsity: 26.2 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: -0.0047
Sparsity: 42.4 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: 0.0364
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0212 | Reconstruction Loss: 0.0135 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: 0.0230
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0417 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: -0.0043
Sparsity: 127.3 | Dead Features: 0 | Total Loss: 0.0527 | Reconstruction Loss: 0.0267 | L1 Loss: 0.0260 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: 0.0112
Sparsity: 44.8 | Dead Features: 0 | Total Loss: 0.1494 | Reconstruction Loss: 0.1005 | L1 Loss: 0.0488 | l1_alpha: 8.0000e-04 | Tokens: 11878

 11%|█         | 5907/55054 [02:42<23:37, 34.66it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: -0.0047
Sparsity: 39.2 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: 0.0369
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0134 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: 0.0225
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: -0.0050
Sparsity: 125.5 | Dead Features: 0 | Total Loss: 0.0514 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: 0.0105
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.1462 | Reconstruction Loss: 0.0978 | L1 Loss: 0.0483 | l1_alpha: 8.0000e-04 | Tokens: 12083

 11%|█         | 6004/55054 [02:44<21:08, 38.66it/s]

Sparsity: 24.0 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: -0.0047
Sparsity: 40.1 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: 0.0364
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0211 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: 0.0219
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: -0.0055
Sparsity: 124.2 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: 0.0098
Sparsity: 44.0 | Dead Features: 0 | Total Loss: 0.1399 | Reconstruction Loss: 0.0907 | L1 Loss: 0.0492 | l1_alpha: 8.0000e-04 | Tokens: 12288

 11%|█         | 6106/55054 [02:47<16:42, 48.81it/s]

Sparsity: 23.9 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: -0.0048
Sparsity: 40.1 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: 0.0360
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0216 | Reconstruction Loss: 0.0134 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: 0.0208
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: -0.0062
Sparsity: 125.9 | Dead Features: 0 | Total Loss: 0.0513 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: 0.0094
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.1397 | Reconstruction Loss: 0.0911 | L1 Loss: 0.0486 | l1_alpha: 8.0000e-04 | Tokens: 12492

 11%|█▏        | 6206/55054 [02:49<23:00, 35.37it/s]

Sparsity: 23.3 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: -0.0048
Sparsity: 38.8 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: 0.0355
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0213 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: 0.0204
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: -0.0072
Sparsity: 124.5 | Dead Features: 0 | Total Loss: 0.0511 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: 0.0088
Sparsity: 45.2 | Dead Features: 0 | Total Loss: 0.1479 | Reconstruction Loss: 0.0993 | L1 Loss: 0.0486 | l1_alpha: 8.0000e-04 | Tokens: 12697

 11%|█▏        | 6306/55054 [02:52<22:30, 36.10it/s]

Sparsity: 25.6 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: -0.0048
Sparsity: 40.9 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: 0.0355
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0136 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: 0.0198
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: -0.0079
Sparsity: 127.7 | Dead Features: 0 | Total Loss: 0.0537 | Reconstruction Loss: 0.0268 | L1 Loss: 0.0269 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: 0.0088
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.1500 | Reconstruction Loss: 0.1017 | L1 Loss: 0.0484 | l1_alpha: 8.0000e-04 | Tokens: 12902

 12%|█▏        | 6406/55054 [02:55<22:33, 35.95it/s]

Sparsity: 23.2 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: -0.0047
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: 0.0354
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: 0.0190
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: -0.0084
Sparsity: 123.8 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: 0.0083
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.1358 | Reconstruction Loss: 0.0880 | L1 Loss: 0.0478 | l1_alpha: 8.0000e-04 | Tokens: 13107

 12%|█▏        | 6506/55054 [02:58<22:26, 36.07it/s]

Sparsity: 30.6 | Dead Features: 0 | Total Loss: 0.0185 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0070 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: -0.0049
Sparsity: 41.3 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: 0.0354
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0222 | Reconstruction Loss: 0.0139 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: 0.0185
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0423 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: -0.0091
Sparsity: 129.6 | Dead Features: 0 | Total Loss: 0.0528 | Reconstruction Loss: 0.0273 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: 0.0077
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.1474 | Reconstruction Loss: 0.1015 | L1 Loss: 0.0459 | l1_alpha: 8.0000e-04 | Tokens: 13312

 12%|█▏        | 6609/55054 [03:01<20:18, 39.76it/s]

Sparsity: 23.1 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: -0.0044
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: 0.0354
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0216 | Reconstruction Loss: 0.0137 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: 0.0177
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: -0.0094
Sparsity: 125.9 | Dead Features: 0 | Total Loss: 0.0534 | Reconstruction Loss: 0.0276 | L1 Loss: 0.0259 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: 0.0076
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.1489 | Reconstruction Loss: 0.1000 | L1 Loss: 0.0489 | l1_alpha: 8.0000e-04 | Tokens: 13516

 12%|█▏        | 6707/55054 [03:03<23:10, 34.78it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: -0.0046
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: 0.0350
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: 0.0176
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: -0.0096
Sparsity: 125.7 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: 0.0072
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.1437 | Reconstruction Loss: 0.0940 | L1 Loss: 0.0497 | l1_alpha: 8.0000e-04 | Tokens: 13721

 12%|█▏        | 6807/55054 [03:06<22:58, 35.00it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: -0.0046
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: 0.0348
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0130 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: 0.0167
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: -0.0101
Sparsity: 124.4 | Dead Features: 0 | Total Loss: 0.0498 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: 0.0064
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.1374 | Reconstruction Loss: 0.0900 | L1 Loss: 0.0474 | l1_alpha: 8.0000e-04 | Tokens: 13926

 13%|█▎        | 6907/55054 [03:09<22:48, 35.19it/s]

Sparsity: 26.6 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: -0.0046
Sparsity: 41.1 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: 0.0349
Sparsity: 50.2 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: 0.0162
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: -0.0105
Sparsity: 128.6 | Dead Features: 0 | Total Loss: 0.0520 | Reconstruction Loss: 0.0262 | L1 Loss: 0.0259 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: 0.0061
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.1436 | Reconstruction Loss: 0.0967 | L1 Loss: 0.0470 | l1_alpha: 8.0000e-04 | Tokens: 14131

 13%|█▎        | 7007/55054 [03:12<22:57, 34.88it/s]

Sparsity: 25.6 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: -0.0050
Sparsity: 43.1 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: 0.0344
Sparsity: 53.6 | Dead Features: 0 | Total Loss: 0.0221 | Reconstruction Loss: 0.0136 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: 0.0158
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0424 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: -0.0111
Sparsity: 130.1 | Dead Features: 0 | Total Loss: 0.0516 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0264 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: 0.0053
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.1420 | Reconstruction Loss: 0.0946 | L1 Loss: 0.0473 | l1_alpha: 8.0000e-04 | Tokens: 14336

 13%|█▎        | 7107/55054 [03:15<23:13, 34.40it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: -0.0051
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: 0.0343
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: 0.0155
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: -0.0118
Sparsity: 123.5 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: 0.0053
Sparsity: 53.6 | Dead Features: 0 | Total Loss: 0.1433 | Reconstruction Loss: 0.0913 | L1 Loss: 0.0520 | l1_alpha: 8.0000e-04 | Tokens: 14540

 13%|█▎        | 7205/55054 [03:17<20:07, 39.62it/s]

Sparsity: 25.8 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: -0.0052
Sparsity: 41.5 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: 0.0335
Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: 0.0154
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: -0.0119
Sparsity: 131.0 | Dead Features: 0 | Total Loss: 0.0521 | Reconstruction Loss: 0.0264 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: 0.0046
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.1418 | Reconstruction Loss: 0.0948 | L1 Loss: 0.0470 | l1_alpha: 8.0000e-04 | Tokens: 14745

 13%|█▎        | 7305/55054 [03:20<21:57, 36.24it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: -0.0052
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: 0.0335
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: 0.0150
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: -0.0123
Sparsity: 125.4 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: 0.0045
Sparsity: 53.9 | Dead Features: 0 | Total Loss: 0.1392 | Reconstruction Loss: 0.0900 | L1 Loss: 0.0492 | l1_alpha: 8.0000e-04 | Tokens: 14950

 13%|█▎        | 7407/55054 [03:23<22:15, 35.68it/s]

Sparsity: 18.5 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0060 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: -0.0052
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: 0.0334
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: 0.0146
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: -0.0123
Sparsity: 123.7 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: 0.0040
Sparsity: 54.3 | Dead Features: 0 | Total Loss: 0.1310 | Reconstruction Loss: 0.0801 | L1 Loss: 0.0509 | l1_alpha: 8.0000e-04 | Tokens: 15155

 14%|█▎        | 7507/55054 [03:26<21:48, 36.33it/s]

Sparsity: 23.3 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: -0.0049
Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: 0.0333
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0130 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: 0.0142
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: -0.0131
Sparsity: 126.3 | Dead Features: 0 | Total Loss: 0.0508 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: 0.0037
Sparsity: 54.1 | Dead Features: 0 | Total Loss: 0.1385 | Reconstruction Loss: 0.0914 | L1 Loss: 0.0472 | l1_alpha: 8.0000e-04 | Tokens: 15360

 14%|█▍        | 7607/55054 [03:28<21:59, 35.96it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: -0.0052
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: 0.0327
Sparsity: 50.5 | Dead Features: 0 | Total Loss: 0.0211 | Reconstruction Loss: 0.0132 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: 0.0137
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: -0.0133
Sparsity: 128.1 | Dead Features: 0 | Total Loss: 0.0520 | Reconstruction Loss: 0.0262 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: 0.0033
Sparsity: 53.5 | Dead Features: 0 | Total Loss: 0.1380 | Reconstruction Loss: 0.0888 | L1 Loss: 0.0492 | l1_alpha: 8.0000e-04 | Tokens: 15564

 14%|█▍        | 7707/55054 [03:31<22:32, 35.02it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: -0.0052
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: 0.0327
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: 0.0132
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: -0.0137
Sparsity: 126.7 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: 0.0030
Sparsity: 53.3 | Dead Features: 0 | Total Loss: 0.1330 | Reconstruction Loss: 0.0846 | L1 Loss: 0.0483 | l1_alpha: 8.0000e-04 | Tokens: 15769

 14%|█▍        | 7807/55054 [03:34<22:02, 35.73it/s]

Sparsity: 24.8 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: -0.0052
Sparsity: 39.5 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: 0.0324
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0213 | Reconstruction Loss: 0.0134 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: 0.0131
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0424 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: -0.0141
Sparsity: 129.1 | Dead Features: 0 | Total Loss: 0.0521 | Reconstruction Loss: 0.0269 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: 0.0030
Sparsity: 52.9 | Dead Features: 0 | Total Loss: 0.1489 | Reconstruction Loss: 0.0994 | L1 Loss: 0.0495 | l1_alpha: 8.0000e-04 | Tokens: 15974

 14%|█▍        | 7904/55054 [03:36<21:50, 35.99it/s]

Sparsity: 27.3 | Dead Features: 0 | Total Loss: 0.0162 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: -0.0055
Sparsity: 41.5 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: 0.0320
Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.0221 | Reconstruction Loss: 0.0138 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: 0.0125
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: -0.0141
Sparsity: 134.5 | Dead Features: 0 | Total Loss: 0.0545 | Reconstruction Loss: 0.0274 | L1 Loss: 0.0272 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: 0.0026
Sparsity: 53.5 | Dead Features: 0 | Total Loss: 0.1474 | Reconstruction Loss: 0.1009 | L1 Loss: 0.0465 | l1_alpha: 8.0000e-04 | Tokens: 16179

 15%|█▍        | 8004/55054 [03:39<21:49, 35.93it/s]

Sparsity: 23.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: -0.0054
Sparsity: 39.4 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: 0.0320
Sparsity: 50.5 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0130 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: 0.0121
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: -0.0149
Sparsity: 128.1 | Dead Features: 0 | Total Loss: 0.0509 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: 0.0020
Sparsity: 57.2 | Dead Features: 0 | Total Loss: 0.1263 | Reconstruction Loss: 0.0793 | L1 Loss: 0.0470 | l1_alpha: 8.0000e-04 | Tokens: 16384

 15%|█▍        | 8105/55054 [03:42<22:03, 35.48it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: -0.0055
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: 0.0320
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: 0.0117
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: -0.0152
Sparsity: 125.0 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: 0.0018
Sparsity: 55.1 | Dead Features: 0 | Total Loss: 0.1306 | Reconstruction Loss: 0.0835 | L1 Loss: 0.0471 | l1_alpha: 8.0000e-04 | Tokens: 16588

 15%|█▍        | 8205/55054 [03:45<21:29, 36.34it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: -0.0055
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: 0.0316
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: 0.0113
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: -0.0154
Sparsity: 126.8 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: 0.0014
Sparsity: 56.6 | Dead Features: 0 | Total Loss: 0.1339 | Reconstruction Loss: 0.0855 | L1 Loss: 0.0484 | l1_alpha: 8.0000e-04 | Tokens: 16793

 15%|█▌        | 8305/55054 [03:48<21:43, 35.86it/s]

Sparsity: 23.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: -0.0056
Sparsity: 39.5 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: 0.0311
Sparsity: 50.1 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: 0.0115
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: -0.0155
Sparsity: 130.2 | Dead Features: 0 | Total Loss: 0.0511 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: 0.0016
Sparsity: 57.0 | Dead Features: 0 | Total Loss: 0.1374 | Reconstruction Loss: 0.0896 | L1 Loss: 0.0478 | l1_alpha: 8.0000e-04 | Tokens: 16998

 15%|█▌        | 8404/55054 [03:50<22:31, 34.53it/s]

Sparsity: 24.6 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: -0.0057
Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: 0.0309
Sparsity: 51.3 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: 0.0113
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: -0.0156
Sparsity: 128.3 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: 0.0015
Sparsity: 60.5 | Dead Features: 0 | Total Loss: 0.1324 | Reconstruction Loss: 0.0829 | L1 Loss: 0.0495 | l1_alpha: 8.0000e-04 | Tokens: 17203

 15%|█▌        | 8504/55054 [03:53<22:40, 34.21it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: -0.0057
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: 0.0304
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: 0.0113
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: -0.0162
Sparsity: 124.8 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: 0.0014
Sparsity: 60.4 | Dead Features: 0 | Total Loss: 0.1260 | Reconstruction Loss: 0.0780 | L1 Loss: 0.0480 | l1_alpha: 8.0000e-04 | Tokens: 17408

 16%|█▌        | 8604/55054 [03:56<22:17, 34.74it/s]

Sparsity: 23.4 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: -0.0058
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: 0.0304
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: 0.0109
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: -0.0162
Sparsity: 126.0 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: 0.0011
Sparsity: 60.8 | Dead Features: 0 | Total Loss: 0.1287 | Reconstruction Loss: 0.0804 | L1 Loss: 0.0483 | l1_alpha: 8.0000e-04 | Tokens: 17612

 16%|█▌        | 8704/55054 [03:59<22:14, 34.73it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: -0.0057
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: 0.0306
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: 0.0109
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: -0.0164
Sparsity: 124.2 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: 0.0012
Sparsity: 60.3 | Dead Features: 0 | Total Loss: 0.1264 | Reconstruction Loss: 0.0793 | L1 Loss: 0.0471 | l1_alpha: 8.0000e-04 | Tokens: 17817

 16%|█▌        | 8805/55054 [04:02<22:27, 34.33it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: -0.0056
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: 0.0301
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: 0.0104
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: -0.0166
Sparsity: 125.4 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: 0.0011
Sparsity: 42.3 | Dead Features: 0 | Total Loss: 0.1845 | Reconstruction Loss: 0.1443 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 18022

 16%|█▌        | 8905/55054 [04:05<22:22, 34.38it/s]

Sparsity: 24.4 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: -0.0059
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: 0.0301
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: 0.0103
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: -0.0168
Sparsity: 125.7 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: 0.0009
Sparsity: 54.8 | Dead Features: 0 | Total Loss: 0.1429 | Reconstruction Loss: 0.0960 | L1 Loss: 0.0470 | l1_alpha: 8.0000e-04 | Tokens: 18227

 16%|█▋        | 9005/55054 [04:07<22:08, 34.67it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: -0.0062
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: 0.0296
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: 0.0103
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: -0.0170
Sparsity: 127.0 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: 0.0008
Sparsity: 57.4 | Dead Features: 0 | Total Loss: 0.1298 | Reconstruction Loss: 0.0825 | L1 Loss: 0.0473 | l1_alpha: 8.0000e-04 | Tokens: 18432

 17%|█▋        | 9105/55054 [04:10<22:21, 34.26it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: -0.0067
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: 0.0294
Sparsity: 50.6 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: 0.0098
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: -0.0171
Sparsity: 127.8 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: 0.0007
Sparsity: 61.5 | Dead Features: 0 | Total Loss: 0.1309 | Reconstruction Loss: 0.0829 | L1 Loss: 0.0480 | l1_alpha: 8.0000e-04 | Tokens: 18636

 17%|█▋        | 9207/55054 [04:13<20:27, 37.35it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: -0.0069
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: 0.0289
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: 0.0099
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: -0.0176
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: 0.0005
Sparsity: 59.8 | Dead Features: 0 | Total Loss: 0.1202 | Reconstruction Loss: 0.0754 | L1 Loss: 0.0448 | l1_alpha: 8.0000e-04 | Tokens: 18841

 17%|█▋        | 9307/55054 [04:16<21:57, 34.71it/s]

Sparsity: 25.5 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: -0.0071
Sparsity: 38.7 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: 0.0292
Sparsity: 50.4 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: 0.0098
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: -0.0175
Sparsity: 129.4 | Dead Features: 0 | Total Loss: 0.0516 | Reconstruction Loss: 0.0261 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: 0.0002
Sparsity: 63.0 | Dead Features: 0 | Total Loss: 0.1303 | Reconstruction Loss: 0.0833 | L1 Loss: 0.0470 | l1_alpha: 8.0000e-04 | Tokens: 19046

 17%|█▋        | 9407/55054 [04:19<20:56, 36.34it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: -0.0072
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: 0.0287
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: 0.0097
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: -0.0176
Sparsity: 127.3 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: 0.0003
Sparsity: 57.1 | Dead Features: 0 | Total Loss: 0.1299 | Reconstruction Loss: 0.0856 | L1 Loss: 0.0442 | l1_alpha: 8.0000e-04 | Tokens: 19251

 17%|█▋        | 9507/55054 [04:21<21:09, 35.87it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: -0.0073
Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: 0.0284
Sparsity: 52.2 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: 0.0092
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: -0.0174
Sparsity: 125.4 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: 0.0003
Sparsity: 64.0 | Dead Features: 0 | Total Loss: 0.1283 | Reconstruction Loss: 0.0806 | L1 Loss: 0.0476 | l1_alpha: 8.0000e-04 | Tokens: 19456

 17%|█▋        | 9607/55054 [04:24<21:06, 35.89it/s]

Sparsity: 26.6 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: -0.0071
Sparsity: 44.0 | Dead Features: 0 | Total Loss: 0.0164 | Reconstruction Loss: 0.0107 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: 0.0281
Sparsity: 54.9 | Dead Features: 0 | Total Loss: 0.0220 | Reconstruction Loss: 0.0138 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: 0.0090
Sparsity: 127.2 | Dead Features: 0 | Total Loss: 0.0434 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: -0.0178
Sparsity: 134.8 | Dead Features: 0 | Total Loss: 0.0558 | Reconstruction Loss: 0.0284 | L1 Loss: 0.0274 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: -0.0001
Sparsity: 65.2 | Dead Features: 0 | Total Loss: 0.1399 | Reconstruction Loss: 0.0905 | L1 Loss: 0.0494 | l1_alpha: 8.0000e-04 | Tokens: 1966

 18%|█▊        | 9707/55054 [04:27<21:12, 35.63it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: -0.0072
Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: 0.0283
Sparsity: 49.9 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: 0.0093
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: -0.0177
Sparsity: 127.5 | Dead Features: 0 | Total Loss: 0.0509 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: -0.0005
Sparsity: 53.7 | Dead Features: 0 | Total Loss: 0.1394 | Reconstruction Loss: 0.0959 | L1 Loss: 0.0435 | l1_alpha: 8.0000e-04 | Tokens: 1986

 18%|█▊        | 9807/55054 [04:30<21:04, 35.80it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: -0.0073
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: 0.0280
Sparsity: 53.2 | Dead Features: 0 | Total Loss: 0.0222 | Reconstruction Loss: 0.0135 | L1 Loss: 0.0087 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: 0.0093
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: -0.0173
Sparsity: 127.5 | Dead Features: 0 | Total Loss: 0.0517 | Reconstruction Loss: 0.0266 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: -0.0004
Sparsity: 65.0 | Dead Features: 0 | Total Loss: 0.1348 | Reconstruction Loss: 0.0884 | L1 Loss: 0.0465 | l1_alpha: 8.0000e-04 | Tokens: 2007

 18%|█▊        | 9907/55054 [04:32<20:41, 36.37it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: -0.0075
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: 0.0276
Sparsity: 50.4 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: 0.0091
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: -0.0174
Sparsity: 127.5 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: -0.0001
Sparsity: 68.5 | Dead Features: 0 | Total Loss: 0.1264 | Reconstruction Loss: 0.0782 | L1 Loss: 0.0482 | l1_alpha: 8.0000e-04 | Tokens: 2027

 18%|█▊        | 10004/55054 [04:35<21:25, 35.05it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: -0.0076
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: 0.0279
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: 0.0088
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: -0.0178
Sparsity: 122.8 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: -0.0002
Sparsity: 65.5 | Dead Features: 0 | Total Loss: 0.1190 | Reconstruction Loss: 0.0737 | L1 Loss: 0.0453 | l1_alpha: 8.0000e-04 | Tokens: 2048

 18%|█▊        | 10104/55054 [04:38<21:10, 35.38it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: -0.0076
Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: 0.0277
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: 0.0088
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: -0.0174
Sparsity: 126.1 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: -0.0003
Sparsity: 67.2 | Dead Features: 0 | Total Loss: 0.1230 | Reconstruction Loss: 0.0766 | L1 Loss: 0.0465 | l1_alpha: 8.0000e-04 | Tokens: 2068

 19%|█▊        | 10204/55054 [04:41<20:54, 35.75it/s]

Sparsity: 25.9 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: -0.0074
Sparsity: 40.8 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: 0.0270
Sparsity: 52.7 | Dead Features: 0 | Total Loss: 0.0213 | Reconstruction Loss: 0.0132 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: 0.0086
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0417 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: -0.0177
Sparsity: 128.8 | Dead Features: 0 | Total Loss: 0.0533 | Reconstruction Loss: 0.0269 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: -0.0001
Sparsity: 70.3 | Dead Features: 0 | Total Loss: 0.1284 | Reconstruction Loss: 0.0810 | L1 Loss: 0.0474 | l1_alpha: 8.0000e-04 | Tokens: 2088

 19%|█▊        | 10305/55054 [04:43<21:04, 35.39it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: -0.0077
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: 0.0269
Sparsity: 50.5 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: 0.0083
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: -0.0173
Sparsity: 126.3 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: -0.0001
Sparsity: 71.3 | Dead Features: 0 | Total Loss: 0.1190 | Reconstruction Loss: 0.0720 | L1 Loss: 0.0470 | l1_alpha: 8.0000e-04 | Tokens: 2109

 19%|█▉        | 10405/55054 [04:46<20:38, 36.06it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: -0.0076
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: 0.0269
Sparsity: 52.3 | Dead Features: 0 | Total Loss: 0.0209 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: 0.0081
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: -0.0173
Sparsity: 126.5 | Dead Features: 0 | Total Loss: 0.0512 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: -0.0004
Sparsity: 69.7 | Dead Features: 0 | Total Loss: 0.1279 | Reconstruction Loss: 0.0794 | L1 Loss: 0.0485 | l1_alpha: 8.0000e-04 | Tokens: 2129

 19%|█▉        | 10504/55054 [04:49<18:31, 40.07it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: -0.0075
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: 0.0270
Sparsity: 50.6 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: 0.0082
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: -0.0174
Sparsity: 125.6 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: -0.0004
Sparsity: 72.8 | Dead Features: 0 | Total Loss: 0.1213 | Reconstruction Loss: 0.0735 | L1 Loss: 0.0478 | l1_alpha: 8.0000e-04 | Tokens: 2150

 19%|█▉        | 10605/55054 [04:52<20:29, 36.14it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: -0.0074
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: 0.0268
Sparsity: 50.3 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: 0.0083
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: -0.0173
Sparsity: 127.3 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: -0.0004
Sparsity: 72.3 | Dead Features: 0 | Total Loss: 0.1234 | Reconstruction Loss: 0.0762 | L1 Loss: 0.0472 | l1_alpha: 8.0000e-04 | Tokens: 2170

 19%|█▉        | 10705/55054 [04:54<20:35, 35.89it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: -0.0075
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: 0.0266
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: 0.0080
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: -0.0173
Sparsity: 124.2 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: -0.0002
Sparsity: 71.3 | Dead Features: 0 | Total Loss: 0.1214 | Reconstruction Loss: 0.0738 | L1 Loss: 0.0476 | l1_alpha: 8.0000e-04 | Tokens: 2191

 20%|█▉        | 10805/55054 [04:57<20:46, 35.51it/s]

Sparsity: 23.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: -0.0076
Sparsity: 39.5 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: 0.0261
Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: 0.0080
Sparsity: 123.4 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: -0.0172
Sparsity: 125.7 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: -0.0002
Sparsity: 72.3 | Dead Features: 0 | Total Loss: 0.1197 | Reconstruction Loss: 0.0733 | L1 Loss: 0.0464 | l1_alpha: 8.0000e-04 | Tokens: 2211

 20%|█▉        | 10904/55054 [05:00<20:12, 36.40it/s]

Sparsity: 23.7 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: -0.0078
Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: 0.0261
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: 0.0077
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: -0.0172
Sparsity: 128.9 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: -0.0003
Sparsity: 74.6 | Dead Features: 0 | Total Loss: 0.1183 | Reconstruction Loss: 0.0709 | L1 Loss: 0.0474 | l1_alpha: 8.0000e-04 | Tokens: 2232

 20%|█▉        | 11009/55054 [05:03<16:52, 43.48it/s]

Sparsity: 23.2 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: -0.0077
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: 0.0260
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: 0.0077
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: -0.0172
Sparsity: 128.5 | Dead Features: 0 | Total Loss: 0.0513 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: 0.0000
Sparsity: 72.1 | Dead Features: 0 | Total Loss: 0.1236 | Reconstruction Loss: 0.0775 | L1 Loss: 0.0461 | l1_alpha: 8.0000e-04 | Tokens: 22528

 20%|██        | 11107/55054 [05:05<20:07, 36.40it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: -0.0076
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: 0.0260
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: 0.0078
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: -0.0170
Sparsity: 123.1 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: -0.0002
Sparsity: 70.8 | Dead Features: 0 | Total Loss: 0.1203 | Reconstruction Loss: 0.0758 | L1 Loss: 0.0446 | l1_alpha: 8.0000e-04 | Tokens: 2273

 20%|██        | 11207/55054 [05:08<20:27, 35.73it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: -0.0078
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: 0.0257
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: 0.0076
Sparsity: 123.1 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: -0.0171
Sparsity: 127.7 | Dead Features: 0 | Total Loss: 0.0512 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: -0.0003
Sparsity: 70.9 | Dead Features: 0 | Total Loss: 0.1251 | Reconstruction Loss: 0.0780 | L1 Loss: 0.0472 | l1_alpha: 8.0000e-04 | Tokens: 2293

 21%|██        | 11307/55054 [05:11<20:24, 35.73it/s]

Sparsity: 24.8 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: -0.0075
Sparsity: 40.0 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: 0.0256
Sparsity: 53.0 | Dead Features: 0 | Total Loss: 0.0216 | Reconstruction Loss: 0.0133 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: 0.0074
Sparsity: 124.0 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0211 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: -0.0170
Sparsity: 130.2 | Dead Features: 0 | Total Loss: 0.0540 | Reconstruction Loss: 0.0280 | L1 Loss: 0.0260 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: -0.0004
Sparsity: 76.9 | Dead Features: 0 | Total Loss: 0.1246 | Reconstruction Loss: 0.0783 | L1 Loss: 0.0463 | l1_alpha: 8.0000e-04 | Tokens: 2314

 21%|██        | 11407/55054 [05:14<20:16, 35.88it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0075
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: 0.0253
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: 0.0071
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0168
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0001
Sparsity: 77.5 | Dead Features: 0 | Total Loss: 0.1112 | Reconstruction Loss: 0.0645 | L1 Loss: 0.0468 | l1_alpha: 8.0000e-04 | Tokens: 2334

 21%|██        | 11504/55054 [05:16<19:14, 37.71it/s]

Sparsity: 23.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: -0.0077
Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: 0.0251
Sparsity: 50.6 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: 0.0073
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: -0.0166
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: 0.0000
Sparsity: 74.9 | Dead Features: 0 | Total Loss: 0.1141 | Reconstruction Loss: 0.0686 | L1 Loss: 0.0455 | l1_alpha: 8.0000e-04 | Tokens: 23552

 21%|██        | 11604/55054 [05:19<20:04, 36.07it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: -0.0076
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: 0.0247
Sparsity: 51.0 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: 0.0067
Sparsity: 121.9 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: -0.0161
Sparsity: 125.9 | Dead Features: 0 | Total Loss: 0.0504 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: 0.0000
Sparsity: 71.9 | Dead Features: 0 | Total Loss: 0.1205 | Reconstruction Loss: 0.0754 | L1 Loss: 0.0452 | l1_alpha: 8.0000e-04 | Tokens: 23756

 21%|██▏       | 11704/55054 [05:22<20:03, 36.01it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: -0.0077
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: 0.0245
Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: 0.0065
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: -0.0160
Sparsity: 124.3 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: 0.0002
Sparsity: 78.3 | Dead Features: 0 | Total Loss: 0.1138 | Reconstruction Loss: 0.0672 | L1 Loss: 0.0467 | l1_alpha: 8.0000e-04 | Tokens: 23961

 21%|██▏       | 11804/55054 [05:25<20:13, 35.65it/s]

Sparsity: 23.9 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: -0.0078
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: 0.0245
Sparsity: 49.9 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: 0.0066
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: -0.0161
Sparsity: 124.8 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: 0.0001
Sparsity: 78.0 | Dead Features: 0 | Total Loss: 0.1123 | Reconstruction Loss: 0.0667 | L1 Loss: 0.0457 | l1_alpha: 8.0000e-04 | Tokens: 24166

 22%|██▏       | 11904/55054 [05:27<20:05, 35.80it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: -0.0077
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: 0.0242
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: 0.0064
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: -0.0157
Sparsity: 126.4 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: 0.0000
Sparsity: 79.6 | Dead Features: 0 | Total Loss: 0.1155 | Reconstruction Loss: 0.0692 | L1 Loss: 0.0463 | l1_alpha: 8.0000e-04 | Tokens: 24371

 22%|██▏       | 12006/55054 [05:30<19:58, 35.92it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: -0.0076
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: 0.0240
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: 0.0063
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: -0.0155
Sparsity: 124.9 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: 0.0001
Sparsity: 83.0 | Dead Features: 0 | Total Loss: 0.1172 | Reconstruction Loss: 0.0693 | L1 Loss: 0.0479 | l1_alpha: 8.0000e-04 | Tokens: 24576

 22%|██▏       | 12106/55054 [05:33<20:02, 35.72it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: -0.0079
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: 0.0237
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: 0.0059
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: -0.0153
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: -0.0001
Sparsity: 74.7 | Dead Features: 0 | Total Loss: 0.1116 | Reconstruction Loss: 0.0679 | L1 Loss: 0.0437 | l1_alpha: 8.0000e-04 | Tokens: 2478

 22%|██▏       | 12206/55054 [05:36<19:50, 36.01it/s]

Sparsity: 23.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: -0.0080
Sparsity: 39.3 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: 0.0236
Sparsity: 51.4 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: 0.0058
Sparsity: 124.4 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: -0.0149
Sparsity: 130.2 | Dead Features: 0 | Total Loss: 0.0521 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0259 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: 0.0001
Sparsity: 82.2 | Dead Features: 0 | Total Loss: 0.1202 | Reconstruction Loss: 0.0739 | L1 Loss: 0.0464 | l1_alpha: 8.0000e-04 | Tokens: 24985

 22%|██▏       | 12307/55054 [05:39<19:55, 35.77it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: -0.0083
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: 0.0235
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: 0.0057
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: -0.0149
Sparsity: 125.2 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: -0.0001
Sparsity: 86.3 | Dead Features: 0 | Total Loss: 0.1178 | Reconstruction Loss: 0.0695 | L1 Loss: 0.0483 | l1_alpha: 8.0000e-04 | Tokens: 2519

 23%|██▎       | 12404/55054 [05:41<19:42, 36.08it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: -0.0083
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: 0.0231
Sparsity: 50.6 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: 0.0055
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: -0.0150
Sparsity: 124.2 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: 0.0002
Sparsity: 81.3 | Dead Features: 0 | Total Loss: 0.1142 | Reconstruction Loss: 0.0683 | L1 Loss: 0.0458 | l1_alpha: 8.0000e-04 | Tokens: 25395

 23%|██▎       | 12507/55054 [05:44<20:24, 34.73it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: -0.0081
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: 0.0232
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: 0.0055
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: -0.0146
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: 0.0001
Sparsity: 80.1 | Dead Features: 0 | Total Loss: 0.1072 | Reconstruction Loss: 0.0625 | L1 Loss: 0.0447 | l1_alpha: 8.0000e-04 | Tokens: 25600

 23%|██▎       | 12607/55054 [05:47<20:18, 34.82it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: -0.0081
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: 0.0229
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: 0.0052
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: -0.0149
Sparsity: 124.9 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: -0.0000
Sparsity: 39.8 | Dead Features: 0 | Total Loss: 0.1658 | Reconstruction Loss: 0.1262 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 2580

 23%|██▎       | 12707/55054 [05:50<20:18, 34.75it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: -0.0077
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: 0.0225
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: 0.0052
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: -0.0147
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: 0.0003
Sparsity: 62.2 | Dead Features: 0 | Total Loss: 0.1146 | Reconstruction Loss: 0.0727 | L1 Loss: 0.0419 | l1_alpha: 8.0000e-04 | Tokens: 26009

 23%|██▎       | 12807/55054 [05:53<20:19, 34.65it/s]

Sparsity: 27.8 | Dead Features: 0 | Total Loss: 0.0163 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0069 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: -0.0084
Sparsity: 40.9 | Dead Features: 0 | Total Loss: 0.0160 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: 0.0227
Sparsity: 54.6 | Dead Features: 0 | Total Loss: 0.0220 | Reconstruction Loss: 0.0135 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: 0.0044
Sparsity: 125.2 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: -0.0147
Sparsity: 132.0 | Dead Features: 0 | Total Loss: 0.0545 | Reconstruction Loss: 0.0282 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: -0.0002
Sparsity: 73.2 | Dead Features: 0 | Total Loss: 0.1241 | Reconstruction Loss: 0.0796 | L1 Loss: 0.0445 | l1_alpha: 8.0000e-04 | Tokens: 2621

 23%|██▎       | 12907/55054 [05:55<20:16, 34.64it/s]

Sparsity: 26.3 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: -0.0085
Sparsity: 40.6 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: 0.0226
Sparsity: 53.4 | Dead Features: 0 | Total Loss: 0.0214 | Reconstruction Loss: 0.0133 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: 0.0048
Sparsity: 125.3 | Dead Features: 0 | Total Loss: 0.0423 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: -0.0145
Sparsity: 132.1 | Dead Features: 0 | Total Loss: 0.0527 | Reconstruction Loss: 0.0266 | L1 Loss: 0.0261 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: -0.0002
Sparsity: 75.5 | Dead Features: 0 | Total Loss: 0.1202 | Reconstruction Loss: 0.0759 | L1 Loss: 0.0443 | l1_alpha: 8.0000e-04 | Tokens: 2641

 24%|██▎       | 13007/55054 [05:58<20:04, 34.91it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: -0.0085
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: 0.0227
Sparsity: 50.3 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: 0.0051
Sparsity: 125.2 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: -0.0144
Sparsity: 130.3 | Dead Features: 0 | Total Loss: 0.0512 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: -0.0001
Sparsity: 76.8 | Dead Features: 0 | Total Loss: 0.1144 | Reconstruction Loss: 0.0685 | L1 Loss: 0.0459 | l1_alpha: 8.0000e-04 | Tokens: 2662

 24%|██▍       | 13107/55054 [06:01<17:20, 40.30it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: -0.0084
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: 0.0231
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: 0.0050
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: -0.0143
Sparsity: 123.4 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: -0.0000
Sparsity: 81.7 | Dead Features: 0 | Total Loss: 0.1083 | Reconstruction Loss: 0.0628 | L1 Loss: 0.0456 | l1_alpha: 8.0000e-04 | Tokens: 2682

 24%|██▍       | 13207/55054 [06:04<19:21, 36.02it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: -0.0083
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: 0.0226
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: 0.0050
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: -0.0141
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: 0.0002
Sparsity: 77.5 | Dead Features: 0 | Total Loss: 0.1096 | Reconstruction Loss: 0.0662 | L1 Loss: 0.0433 | l1_alpha: 8.0000e-04 | Tokens: 27033

 24%|██▍       | 13307/55054 [06:07<19:17, 36.07it/s]

Sparsity: 23.8 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: -0.0085
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: 0.0227
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: 0.0048
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: -0.0139
Sparsity: 123.4 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: 0.0002
Sparsity: 79.6 | Dead Features: 0 | Total Loss: 0.1102 | Reconstruction Loss: 0.0669 | L1 Loss: 0.0432 | l1_alpha: 8.0000e-04 | Tokens: 27238

 24%|██▍       | 13407/55054 [06:09<19:11, 36.17it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: -0.0086
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: 0.0226
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: 0.0045
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: -0.0138
Sparsity: 121.6 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: -0.0002
Sparsity: 84.7 | Dead Features: 0 | Total Loss: 0.1110 | Reconstruction Loss: 0.0649 | L1 Loss: 0.0461 | l1_alpha: 8.0000e-04 | Tokens: 2744

 25%|██▍       | 13507/55054 [06:12<19:00, 36.42it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: -0.0086
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: 0.0224
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: 0.0044
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: -0.0138
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: -0.0000
Sparsity: 81.6 | Dead Features: 0 | Total Loss: 0.1114 | Reconstruction Loss: 0.0670 | L1 Loss: 0.0445 | l1_alpha: 8.0000e-04 | Tokens: 2764

 25%|██▍       | 13605/55054 [06:15<19:01, 36.30it/s]

Sparsity: 25.9 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: -0.0086
Sparsity: 41.0 | Dead Features: 0 | Total Loss: 0.0160 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: 0.0220
Sparsity: 53.3 | Dead Features: 0 | Total Loss: 0.0216 | Reconstruction Loss: 0.0133 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: 0.0046
Sparsity: 124.6 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: -0.0136
Sparsity: 129.0 | Dead Features: 0 | Total Loss: 0.0532 | Reconstruction Loss: 0.0275 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: -0.0002
Sparsity: 87.0 | Dead Features: 0 | Total Loss: 0.1195 | Reconstruction Loss: 0.0734 | L1 Loss: 0.0460 | l1_alpha: 8.0000e-04 | Tokens: 2785

 25%|██▍       | 13705/55054 [06:18<19:08, 36.00it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: -0.0086
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: 0.0223
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: 0.0047
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: -0.0137
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: 0.0000
Sparsity: 88.0 | Dead Features: 0 | Total Loss: 0.1142 | Reconstruction Loss: 0.0664 | L1 Loss: 0.0478 | l1_alpha: 8.0000e-04 | Tokens: 28057

 25%|██▌       | 13808/55054 [06:20<16:52, 40.74it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: -0.0086
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: 0.0218
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: 0.0045
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: -0.0139
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: -0.0001
Sparsity: 86.4 | Dead Features: 0 | Total Loss: 0.1100 | Reconstruction Loss: 0.0635 | L1 Loss: 0.0465 | l1_alpha: 8.0000e-04 | Tokens: 2826

 25%|██▌       | 13907/55054 [06:23<15:54, 43.09it/s]

Sparsity: 23.8 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: -0.0089
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: 0.0217
Sparsity: 50.1 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: 0.0044
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: -0.0136
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: 0.0001
Sparsity: 86.9 | Dead Features: 0 | Total Loss: 0.1081 | Reconstruction Loss: 0.0631 | L1 Loss: 0.0450 | l1_alpha: 8.0000e-04 | Tokens: 28467

 25%|██▌       | 14006/55054 [06:25<18:52, 36.24it/s]

Sparsity: 23.3 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: -0.0087
Sparsity: 38.5 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: 0.0218
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: 0.0044
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: -0.0134
Sparsity: 127.0 | Dead Features: 0 | Total Loss: 0.0515 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: 0.0001
Sparsity: 89.0 | Dead Features: 0 | Total Loss: 0.1126 | Reconstruction Loss: 0.0657 | L1 Loss: 0.0469 | l1_alpha: 8.0000e-04 | Tokens: 28672

 26%|██▌       | 14106/55054 [06:28<18:57, 36.00it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: -0.0088
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: 0.0218
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: 0.0047
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0376 | Reconstruction Loss: 0.0178 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: -0.0135
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0463 | Reconstruction Loss: 0.0229 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: 0.0003
Sparsity: 88.1 | Dead Features: 0 | Total Loss: 0.1007 | Reconstruction Loss: 0.0571 | L1 Loss: 0.0437 | l1_alpha: 8.0000e-04 | Tokens: 28876

 26%|██▌       | 14206/55054 [06:31<18:57, 35.92it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: -0.0088
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: 0.0218
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: 0.0045
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: -0.0134
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: 0.0002
Sparsity: 88.9 | Dead Features: 0 | Total Loss: 0.1008 | Reconstruction Loss: 0.0571 | L1 Loss: 0.0438 | l1_alpha: 8.0000e-04 | Tokens: 29081

 26%|██▌       | 14308/55054 [06:34<17:05, 39.72it/s]

Sparsity: 25.3 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: -0.0088
Sparsity: 40.6 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: 0.0215
Sparsity: 52.2 | Dead Features: 0 | Total Loss: 0.0209 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: 0.0046
Sparsity: 123.1 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: -0.0132
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0508 | Reconstruction Loss: 0.0262 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: 0.0001
Sparsity: 87.1 | Dead Features: 0 | Total Loss: 0.1133 | Reconstruction Loss: 0.0699 | L1 Loss: 0.0433 | l1_alpha: 8.0000e-04 | Tokens: 29286

 26%|██▌       | 14406/55054 [06:36<18:51, 35.93it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: -0.0087
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: 0.0213
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: 0.0044
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: -0.0134
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: 0.0001
Sparsity: 90.1 | Dead Features: 0 | Total Loss: 0.1041 | Reconstruction Loss: 0.0599 | L1 Loss: 0.0443 | l1_alpha: 8.0000e-04 | Tokens: 29491

 26%|██▋       | 14506/55054 [06:39<16:08, 41.86it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: -0.0088
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: 0.0208
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: 0.0040
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: -0.0132
Sparsity: 123.0 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: 0.0001
Sparsity: 84.2 | Dead Features: 0 | Total Loss: 0.1146 | Reconstruction Loss: 0.0706 | L1 Loss: 0.0440 | l1_alpha: 8.0000e-04 | Tokens: 29696

 27%|██▋       | 14606/55054 [06:41<18:02, 37.35it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: -0.0091
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: 0.0212
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: 0.0043
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: -0.0132
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: -0.0002
Sparsity: 86.3 | Dead Features: 0 | Total Loss: 0.1049 | Reconstruction Loss: 0.0620 | L1 Loss: 0.0430 | l1_alpha: 8.0000e-04 | Tokens: 2990

 27%|██▋       | 14706/55054 [06:44<18:47, 35.80it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 30105600 | Self Similarity: -0.0092
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 30105600 | Self Similarity: 0.0214
Sparsity: 50.5 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 30105600 | Self Similarity: 0.0047
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 30105600 | Self Similarity: -0.0135
Sparsity: 125.8 | Dead Features: 0 | Total Loss: 0.0505 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 30105600 | Self Similarity: 0.0003
Sparsity: 93.3 | Dead Features: 0 | Total Loss: 0.1105 | Reconstruction Loss: 0.0652 | L1 Loss: 0.0453 | l1_alpha: 8.0000e-04 | Tokens: 30105

 27%|██▋       | 14806/55054 [06:47<18:38, 35.98it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 30310400 | Self Similarity: -0.0089
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 30310400 | Self Similarity: 0.0209
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 30310400 | Self Similarity: 0.0048
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 30310400 | Self Similarity: -0.0131
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 30310400 | Self Similarity: -0.0000
Sparsity: 90.8 | Dead Features: 0 | Total Loss: 0.1027 | Reconstruction Loss: 0.0578 | L1 Loss: 0.0449 | l1_alpha: 8.0000e-04 | Tokens: 3031

 27%|██▋       | 14906/55054 [06:50<18:42, 35.78it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 30515200 | Self Similarity: -0.0092
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 30515200 | Self Similarity: 0.0207
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 30515200 | Self Similarity: 0.0045
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 30515200 | Self Similarity: -0.0133
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 30515200 | Self Similarity: -0.0001
Sparsity: 92.1 | Dead Features: 0 | Total Loss: 0.1044 | Reconstruction Loss: 0.0599 | L1 Loss: 0.0444 | l1_alpha: 8.0000e-04 | Tokens: 3051

 27%|██▋       | 15006/55054 [06:52<18:39, 35.77it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 30720000 | Self Similarity: -0.0088
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 30720000 | Self Similarity: 0.0206
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 30720000 | Self Similarity: 0.0041
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 30720000 | Self Similarity: -0.0132
Sparsity: 122.7 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 30720000 | Self Similarity: 0.0000
Sparsity: 93.1 | Dead Features: 0 | Total Loss: 0.1011 | Reconstruction Loss: 0.0574 | L1 Loss: 0.0437 | l1_alpha: 8.0000e-04 | Tokens: 30720

 27%|██▋       | 15106/55054 [06:55<18:30, 35.97it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 30924800 | Self Similarity: -0.0091
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 30924800 | Self Similarity: 0.0207
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 30924800 | Self Similarity: 0.0039
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 30924800 | Self Similarity: -0.0133
Sparsity: 125.6 | Dead Features: 0 | Total Loss: 0.0504 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 30924800 | Self Similarity: -0.0002
Sparsity: 94.3 | Dead Features: 0 | Total Loss: 0.1070 | Reconstruction Loss: 0.0615 | L1 Loss: 0.0454 | l1_alpha: 8.0000e-04 | Tokens: 3092

 28%|██▊       | 15207/55054 [06:58<18:32, 35.81it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 31129600 | Self Similarity: -0.0087
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 31129600 | Self Similarity: 0.0202
Sparsity: 50.1 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 31129600 | Self Similarity: 0.0039
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 31129600 | Self Similarity: -0.0134
Sparsity: 125.2 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 31129600 | Self Similarity: -0.0003
Sparsity: 94.7 | Dead Features: 0 | Total Loss: 0.1029 | Reconstruction Loss: 0.0579 | L1 Loss: 0.0450 | l1_alpha: 8.0000e-04 | Tokens: 3112

 28%|██▊       | 15306/55054 [07:01<18:32, 35.72it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 31334400 | Self Similarity: -0.0088
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 31334400 | Self Similarity: 0.0202
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 31334400 | Self Similarity: 0.0040
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 31334400 | Self Similarity: -0.0133
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 31334400 | Self Similarity: -0.0004
Sparsity: 97.0 | Dead Features: 0 | Total Loss: 0.1013 | Reconstruction Loss: 0.0566 | L1 Loss: 0.0447 | l1_alpha: 8.0000e-04 | Tokens: 3133

 28%|██▊       | 15404/55054 [07:03<17:57, 36.80it/s]

Sparsity: 23.8 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 31539200 | Self Similarity: -0.0087
Sparsity: 39.4 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 31539200 | Self Similarity: 0.0204
Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 31539200 | Self Similarity: 0.0036
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 31539200 | Self Similarity: -0.0129
Sparsity: 125.4 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 31539200 | Self Similarity: -0.0000
Sparsity: 94.1 | Dead Features: 0 | Total Loss: 0.1065 | Reconstruction Loss: 0.0635 | L1 Loss: 0.0429 | l1_alpha: 8.0000e-04 | Tokens: 3153

 28%|██▊       | 15508/55054 [07:06<15:01, 43.86it/s]

Sparsity: 23.1 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 31744000 | Self Similarity: -0.0088
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 31744000 | Self Similarity: 0.0201
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 31744000 | Self Similarity: 0.0033
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 31744000 | Self Similarity: -0.0128
Sparsity: 123.3 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 31744000 | Self Similarity: -0.0005
Sparsity: 98.2 | Dead Features: 0 | Total Loss: 0.1011 | Reconstruction Loss: 0.0567 | L1 Loss: 0.0444 | l1_alpha: 8.0000e-04 | Tokens: 3174

 28%|██▊       | 15607/55054 [07:08<18:08, 36.22it/s]

Sparsity: 23.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 31948800 | Self Similarity: -0.0089
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 31948800 | Self Similarity: 0.0203
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 31948800 | Self Similarity: 0.0033
Sparsity: 114.1 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 31948800 | Self Similarity: -0.0130
Sparsity: 112.0 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0261 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 31948800 | Self Similarity: -0.0005
Sparsity: 90.9 | Dead Features: 0 | Total Loss: 0.1019 | Reconstruction Loss: 0.0595 | L1 Loss: 0.0424 | l1_alpha: 8.0000e-04 | Tokens: 3194

 29%|██▊       | 15704/55054 [07:11<17:43, 36.99it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 32153600 | Self Similarity: -0.0087
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 32153600 | Self Similarity: 0.0201
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 32153600 | Self Similarity: 0.0033
Sparsity: 115.8 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 32153600 | Self Similarity: -0.0128
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 32153600 | Self Similarity: -0.0006
Sparsity: 96.9 | Dead Features: 0 | Total Loss: 0.0982 | Reconstruction Loss: 0.0551 | L1 Loss: 0.0431 | l1_alpha: 8.0000e-04 | Tokens: 3215

 29%|██▊       | 15804/55054 [07:13<18:12, 35.93it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 32358400 | Self Similarity: -0.0087
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 32358400 | Self Similarity: 0.0195
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 32358400 | Self Similarity: 0.0033
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 32358400 | Self Similarity: -0.0132
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 32358400 | Self Similarity: -0.0005
Sparsity: 99.8 | Dead Features: 0 | Total Loss: 0.1018 | Reconstruction Loss: 0.0585 | L1 Loss: 0.0433 | l1_alpha: 8.0000e-04 | Tokens: 3235

 29%|██▉       | 15904/55054 [07:16<18:25, 35.41it/s]

Sparsity: 23.9 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 32563200 | Self Similarity: -0.0087
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 32563200 | Self Similarity: 0.0195
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 32563200 | Self Similarity: 0.0034
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 32563200 | Self Similarity: -0.0133
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 32563200 | Self Similarity: -0.0004
Sparsity: 98.8 | Dead Features: 0 | Total Loss: 0.0999 | Reconstruction Loss: 0.0573 | L1 Loss: 0.0426 | l1_alpha: 8.0000e-04 | Tokens: 3256

 29%|██▉       | 16004/55054 [07:19<17:53, 36.37it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 32768000 | Self Similarity: -0.0087
Sparsity: 39.8 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 32768000 | Self Similarity: 0.0192
Sparsity: 50.8 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 32768000 | Self Similarity: 0.0034
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 32768000 | Self Similarity: -0.0133
Sparsity: 125.0 | Dead Features: 0 | Total Loss: 0.0516 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 32768000 | Self Similarity: -0.0003
Sparsity: 94.4 | Dead Features: 0 | Total Loss: 0.1114 | Reconstruction Loss: 0.0672 | L1 Loss: 0.0441 | l1_alpha: 8.0000e-04 | Tokens: 3276

 29%|██▉       | 16104/55054 [07:22<18:14, 35.58it/s]

Sparsity: 23.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 32972800 | Self Similarity: -0.0087
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 32972800 | Self Similarity: 0.0193
Sparsity: 49.9 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 32972800 | Self Similarity: 0.0032
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 32972800 | Self Similarity: -0.0132
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 32972800 | Self Similarity: -0.0004
Sparsity: 97.4 | Dead Features: 0 | Total Loss: 0.1024 | Reconstruction Loss: 0.0600 | L1 Loss: 0.0424 | l1_alpha: 8.0000e-04 | Tokens: 3297

 29%|██▉       | 16206/55054 [07:24<16:03, 40.30it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 33177600 | Self Similarity: -0.0083
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 33177600 | Self Similarity: 0.0194
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 33177600 | Self Similarity: 0.0035
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 33177600 | Self Similarity: -0.0133
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 33177600 | Self Similarity: -0.0005
Sparsity: 97.4 | Dead Features: 0 | Total Loss: 0.1224 | Reconstruction Loss: 0.0762 | L1 Loss: 0.0462 | l1_alpha: 8.0000e-04 | Tokens: 3317

 30%|██▉       | 16304/55054 [07:27<17:57, 35.96it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 33382400 | Self Similarity: -0.0083
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 33382400 | Self Similarity: 0.0193
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 33382400 | Self Similarity: 0.0033
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 33382400 | Self Similarity: -0.0133
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 33382400 | Self Similarity: -0.0006
Sparsity: 98.1 | Dead Features: 0 | Total Loss: 0.0981 | Reconstruction Loss: 0.0551 | L1 Loss: 0.0430 | l1_alpha: 8.0000e-04 | Tokens: 3338

 30%|██▉       | 16407/55054 [07:30<16:48, 38.31it/s]

Sparsity: 27.0 | Dead Features: 0 | Total Loss: 0.0162 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 33587200 | Self Similarity: -0.0087
Sparsity: 42.5 | Dead Features: 0 | Total Loss: 0.0165 | Reconstruction Loss: 0.0110 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 33587200 | Self Similarity: 0.0192
Sparsity: 55.1 | Dead Features: 0 | Total Loss: 0.0219 | Reconstruction Loss: 0.0135 | L1 Loss: 0.0084 | l1_alpha: 8.0000e-04 | Tokens: 33587200 | Self Similarity: 0.0030
Sparsity: 125.3 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 33587200 | Self Similarity: -0.0131
Sparsity: 131.0 | Dead Features: 0 | Total Loss: 0.0545 | Reconstruction Loss: 0.0283 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 33587200 | Self Similarity: -0.0005
Sparsity: 101.4 | Dead Features: 0 | Total Loss: 0.1092 | Reconstruction Loss: 0.0672 | L1 Loss: 0.0420 | l1_alpha: 8.0000e-04 | Tokens: 335

 30%|██▉       | 16507/55054 [07:33<17:47, 36.11it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 33792000 | Self Similarity: -0.0090
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 33792000 | Self Similarity: 0.0193
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 33792000 | Self Similarity: 0.0031
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 33792000 | Self Similarity: -0.0131
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 33792000 | Self Similarity: -0.0005
Sparsity: 102.8 | Dead Features: 0 | Total Loss: 0.0998 | Reconstruction Loss: 0.0551 | L1 Loss: 0.0447 | l1_alpha: 8.0000e-04 | Tokens: 337

 30%|███       | 16606/55054 [07:35<17:25, 36.77it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 33996800 | Self Similarity: -0.0091
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 33996800 | Self Similarity: 0.0191
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 33996800 | Self Similarity: 0.0031
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 33996800 | Self Similarity: -0.0129
Sparsity: 122.8 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 33996800 | Self Similarity: -0.0004
Sparsity: 103.5 | Dead Features: 0 | Total Loss: 0.0976 | Reconstruction Loss: 0.0543 | L1 Loss: 0.0433 | l1_alpha: 8.0000e-04 | Tokens: 339

 30%|███       | 16706/55054 [07:38<17:45, 36.00it/s]

Sparsity: 26.2 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 34201600 | Self Similarity: -0.0087
Sparsity: 40.5 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 34201600 | Self Similarity: 0.0189
Sparsity: 53.8 | Dead Features: 0 | Total Loss: 0.0212 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 34201600 | Self Similarity: 0.0032
Sparsity: 126.5 | Dead Features: 0 | Total Loss: 0.0422 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 34201600 | Self Similarity: -0.0129
Sparsity: 131.6 | Dead Features: 0 | Total Loss: 0.0526 | Reconstruction Loss: 0.0268 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 34201600 | Self Similarity: -0.0006
Sparsity: 84.7 | Dead Features: 0 | Total Loss: 0.1138 | Reconstruction Loss: 0.0717 | L1 Loss: 0.0421 | l1_alpha: 8.0000e-04 | Tokens: 3420

 31%|███       | 16806/55054 [07:41<17:48, 35.80it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 34406400 | Self Similarity: -0.0091
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 34406400 | Self Similarity: 0.0186
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 34406400 | Self Similarity: 0.0028
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 34406400 | Self Similarity: -0.0129
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 34406400 | Self Similarity: -0.0005
Sparsity: 93.7 | Dead Features: 0 | Total Loss: 0.0977 | Reconstruction Loss: 0.0566 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 3440

 31%|███       | 16906/55054 [07:43<17:43, 35.89it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 34611200 | Self Similarity: -0.0093
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 34611200 | Self Similarity: 0.0187
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 34611200 | Self Similarity: 0.0029
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 34611200 | Self Similarity: -0.0129
Sparsity: 122.6 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 34611200 | Self Similarity: -0.0004
Sparsity: 97.0 | Dead Features: 0 | Total Loss: 0.0973 | Reconstruction Loss: 0.0551 | L1 Loss: 0.0422 | l1_alpha: 8.0000e-04 | Tokens: 3461

 31%|███       | 17006/55054 [07:46<17:14, 36.79it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 34816000 | Self Similarity: -0.0093
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 34816000 | Self Similarity: 0.0189
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 34816000 | Self Similarity: 0.0029
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 34816000 | Self Similarity: -0.0128
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 34816000 | Self Similarity: -0.0003
Sparsity: 100.1 | Dead Features: 0 | Total Loss: 0.0939 | Reconstruction Loss: 0.0522 | L1 Loss: 0.0417 | l1_alpha: 8.0000e-04 | Tokens: 348

 31%|███       | 17104/55054 [07:49<17:35, 35.94it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 35020800 | Self Similarity: -0.0094
Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 35020800 | Self Similarity: 0.0189
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 35020800 | Self Similarity: 0.0027
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 35020800 | Self Similarity: -0.0125
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 35020800 | Self Similarity: -0.0003
Sparsity: 101.7 | Dead Features: 0 | Total Loss: 0.0973 | Reconstruction Loss: 0.0548 | L1 Loss: 0.0425 | l1_alpha: 8.0000e-04 | Tokens: 350

 31%|███       | 17204/55054 [07:52<17:45, 35.51it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 35225600 | Self Similarity: -0.0092
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 35225600 | Self Similarity: 0.0190
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 35225600 | Self Similarity: 0.0030
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 35225600 | Self Similarity: -0.0126
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 35225600 | Self Similarity: -0.0003
Sparsity: 101.0 | Dead Features: 0 | Total Loss: 0.0961 | Reconstruction Loss: 0.0549 | L1 Loss: 0.0412 | l1_alpha: 8.0000e-04 | Tokens: 352

 31%|███▏      | 17304/55054 [07:54<17:32, 35.86it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 35430400 | Self Similarity: -0.0092
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 35430400 | Self Similarity: 0.0184
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 35430400 | Self Similarity: 0.0030
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 35430400 | Self Similarity: -0.0127
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 35430400 | Self Similarity: -0.0005
Sparsity: 99.9 | Dead Features: 0 | Total Loss: 0.0951 | Reconstruction Loss: 0.0550 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens: 3543

 32%|███▏      | 17404/55054 [07:57<17:39, 35.53it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 35635200 | Self Similarity: -0.0090
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 35635200 | Self Similarity: 0.0181
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 35635200 | Self Similarity: 0.0030
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 35635200 | Self Similarity: -0.0126
Sparsity: 124.2 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 35635200 | Self Similarity: -0.0004
Sparsity: 99.6 | Dead Features: 0 | Total Loss: 0.1003 | Reconstruction Loss: 0.0591 | L1 Loss: 0.0412 | l1_alpha: 8.0000e-04 | Tokens: 3563

 32%|███▏      | 17506/55054 [08:00<15:32, 40.28it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 35840000 | Self Similarity: -0.0093
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 35840000 | Self Similarity: 0.0185
Sparsity: 50.1 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 35840000 | Self Similarity: 0.0031
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 35840000 | Self Similarity: -0.0120
Sparsity: 124.3 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 35840000 | Self Similarity: -0.0005
Sparsity: 105.0 | Dead Features: 0 | Total Loss: 0.0951 | Reconstruction Loss: 0.0528 | L1 Loss: 0.0423 | l1_alpha: 8.0000e-04 | Tokens: 358

 32%|███▏      | 17606/55054 [08:02<17:13, 36.23it/s]

Sparsity: 24.4 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 36044800 | Self Similarity: -0.0091
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 36044800 | Self Similarity: 0.0184
Sparsity: 51.0 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 36044800 | Self Similarity: 0.0029
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 36044800 | Self Similarity: -0.0118
Sparsity: 128.2 | Dead Features: 0 | Total Loss: 0.0517 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0264 | l1_alpha: 8.0000e-04 | Tokens: 36044800 | Self Similarity: -0.0004
Sparsity: 108.1 | Dead Features: 0 | Total Loss: 0.1012 | Reconstruction Loss: 0.0571 | L1 Loss: 0.0441 | l1_alpha: 8.0000e-04 | Tokens: 360

 32%|███▏      | 17706/55054 [08:05<17:12, 36.17it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 36249600 | Self Similarity: -0.0091
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 36249600 | Self Similarity: 0.0183
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 36249600 | Self Similarity: 0.0030
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 36249600 | Self Similarity: -0.0123
Sparsity: 123.0 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 36249600 | Self Similarity: -0.0005
Sparsity: 102.7 | Dead Features: 0 | Total Loss: 0.0954 | Reconstruction Loss: 0.0540 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 362

 32%|███▏      | 17807/55054 [08:08<15:16, 40.65it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 36454400 | Self Similarity: -0.0091
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 36454400 | Self Similarity: 0.0183
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 36454400 | Self Similarity: 0.0030
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 36454400 | Self Similarity: -0.0124
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 36454400 | Self Similarity: -0.0003
Sparsity: 104.4 | Dead Features: 0 | Total Loss: 0.0907 | Reconstruction Loss: 0.0495 | L1 Loss: 0.0412 | l1_alpha: 8.0000e-04 | Tokens: 364

 33%|███▎      | 17904/55054 [08:11<17:19, 35.73it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 36659200 | Self Similarity: -0.0090
Sparsity: 38.7 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 36659200 | Self Similarity: 0.0182
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 36659200 | Self Similarity: 0.0032
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 36659200 | Self Similarity: -0.0123
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 36659200 | Self Similarity: -0.0004
Sparsity: 106.5 | Dead Features: 0 | Total Loss: 0.0966 | Reconstruction Loss: 0.0538 | L1 Loss: 0.0428 | l1_alpha: 8.0000e-04 | Tokens: 366

 33%|███▎      | 18004/55054 [08:13<16:59, 36.34it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 36864000 | Self Similarity: -0.0088
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 36864000 | Self Similarity: 0.0183
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 36864000 | Self Similarity: 0.0032
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 36864000 | Self Similarity: -0.0123
Sparsity: 124.6 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 36864000 | Self Similarity: -0.0003
Sparsity: 109.4 | Dead Features: 0 | Total Loss: 0.1015 | Reconstruction Loss: 0.0594 | L1 Loss: 0.0421 | l1_alpha: 8.0000e-04 | Tokens: 368

 33%|███▎      | 18104/55054 [08:16<17:12, 35.79it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 37068800 | Self Similarity: -0.0088
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 37068800 | Self Similarity: 0.0179
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 37068800 | Self Similarity: 0.0030
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 37068800 | Self Similarity: -0.0122
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 37068800 | Self Similarity: -0.0002
Sparsity: 107.9 | Dead Features: 0 | Total Loss: 0.0945 | Reconstruction Loss: 0.0531 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 370

 33%|███▎      | 18204/55054 [08:19<17:08, 35.83it/s]

Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 37273600 | Self Similarity: -0.0087
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 37273600 | Self Similarity: 0.0181
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 37273600 | Self Similarity: 0.0033
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 37273600 | Self Similarity: -0.0121
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 37273600 | Self Similarity: -0.0001
Sparsity: 110.7 | Dead Features: 0 | Total Loss: 0.0946 | Reconstruction Loss: 0.0519 | L1 Loss: 0.0427 | l1_alpha: 8.0000e-04 | Tokens: 372

 33%|███▎      | 18304/55054 [08:22<16:56, 36.14it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 37478400 | Self Similarity: -0.0085
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 37478400 | Self Similarity: 0.0176
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 37478400 | Self Similarity: 0.0036
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 37478400 | Self Similarity: -0.0121
Sparsity: 123.6 | Dead Features: 0 | Total Loss: 0.0505 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 37478400 | Self Similarity: -0.0006
Sparsity: 105.4 | Dead Features: 0 | Total Loss: 0.1009 | Reconstruction Loss: 0.0589 | L1 Loss: 0.0420 | l1_alpha: 8.0000e-04 | Tokens: 374

 33%|███▎      | 18405/55054 [08:24<17:03, 35.80it/s]

Sparsity: 24.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 37683200 | Self Similarity: -0.0085
Sparsity: 39.3 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 37683200 | Self Similarity: 0.0174
Sparsity: 51.0 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 37683200 | Self Similarity: 0.0035
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 37683200 | Self Similarity: -0.0119
Sparsity: 125.9 | Dead Features: 0 | Total Loss: 0.0505 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 37683200 | Self Similarity: -0.0004
Sparsity: 108.2 | Dead Features: 0 | Total Loss: 0.0949 | Reconstruction Loss: 0.0533 | L1 Loss: 0.0416 | l1_alpha: 8.0000e-04 | Tokens: 376

 34%|███▎      | 18507/55054 [08:27<15:43, 38.75it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 37888000 | Self Similarity: -0.0085
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 37888000 | Self Similarity: 0.0173
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 37888000 | Self Similarity: 0.0037
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 37888000 | Self Similarity: -0.0117
Sparsity: 123.7 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 37888000 | Self Similarity: -0.0005
Sparsity: 108.5 | Dead Features: 0 | Total Loss: 0.0932 | Reconstruction Loss: 0.0511 | L1 Loss: 0.0421 | l1_alpha: 8.0000e-04 | Tokens: 378

 34%|███▍      | 18604/55054 [08:30<16:48, 36.15it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 38092800 | Self Similarity: -0.0087
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 38092800 | Self Similarity: 0.0177
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 38092800 | Self Similarity: 0.0034
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 38092800 | Self Similarity: -0.0116
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 38092800 | Self Similarity: -0.0004
Sparsity: 111.1 | Dead Features: 0 | Total Loss: 0.0972 | Reconstruction Loss: 0.0554 | L1 Loss: 0.0417 | l1_alpha: 8.0000e-04 | Tokens: 380

 34%|███▍      | 18704/55054 [08:33<16:54, 35.83it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 38297600 | Self Similarity: -0.0084
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 38297600 | Self Similarity: 0.0175
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 38297600 | Self Similarity: 0.0036
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 38297600 | Self Similarity: -0.0113
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 38297600 | Self Similarity: -0.0004
Sparsity: 108.2 | Dead Features: 0 | Total Loss: 0.0937 | Reconstruction Loss: 0.0525 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 382

 34%|███▍      | 18805/55054 [08:35<17:01, 35.47it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 38502400 | Self Similarity: -0.0086
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 38502400 | Self Similarity: 0.0175
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 38502400 | Self Similarity: 0.0035
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 38502400 | Self Similarity: -0.0114
Sparsity: 123.7 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 38502400 | Self Similarity: -0.0002
Sparsity: 110.3 | Dead Features: 0 | Total Loss: 0.0960 | Reconstruction Loss: 0.0539 | L1 Loss: 0.0421 | l1_alpha: 8.0000e-04 | Tokens: 385

 34%|███▍      | 18905/55054 [08:38<16:39, 36.15it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 38707200 | Self Similarity: -0.0087
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 38707200 | Self Similarity: 0.0177
Sparsity: 50.6 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 38707200 | Self Similarity: 0.0032
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0417 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 38707200 | Self Similarity: -0.0119
Sparsity: 126.4 | Dead Features: 0 | Total Loss: 0.0519 | Reconstruction Loss: 0.0264 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 38707200 | Self Similarity: -0.0006
Sparsity: 112.3 | Dead Features: 0 | Total Loss: 0.0950 | Reconstruction Loss: 0.0523 | L1 Loss: 0.0427 | l1_alpha: 8.0000e-04 | Tokens: 387

 35%|███▍      | 19006/55054 [08:41<16:23, 36.66it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 38912000 | Self Similarity: -0.0084
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 38912000 | Self Similarity: 0.0177
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 38912000 | Self Similarity: 0.0029
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 38912000 | Self Similarity: -0.0116
Sparsity: 123.3 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 38912000 | Self Similarity: -0.0007
Sparsity: 111.5 | Dead Features: 0 | Total Loss: 0.0942 | Reconstruction Loss: 0.0528 | L1 Loss: 0.0415 | l1_alpha: 8.0000e-04 | Tokens: 389

 35%|███▍      | 19106/55054 [08:44<17:27, 34.31it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 39116800 | Self Similarity: -0.0083
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 39116800 | Self Similarity: 0.0177
Sparsity: 50.4 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 39116800 | Self Similarity: 0.0031
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 39116800 | Self Similarity: -0.0114
Sparsity: 122.6 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 39116800 | Self Similarity: -0.0006
Sparsity: 111.6 | Dead Features: 0 | Total Loss: 0.0915 | Reconstruction Loss: 0.0508 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 391

 35%|███▍      | 19206/55054 [08:47<17:22, 34.38it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 39321600 | Self Similarity: -0.0084
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 39321600 | Self Similarity: 0.0176
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 39321600 | Self Similarity: 0.0032
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 39321600 | Self Similarity: -0.0110
Sparsity: 122.6 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 39321600 | Self Similarity: -0.0006
Sparsity: 113.4 | Dead Features: 0 | Total Loss: 0.0920 | Reconstruction Loss: 0.0493 | L1 Loss: 0.0427 | l1_alpha: 8.0000e-04 | Tokens: 393

 35%|███▌      | 19306/55054 [08:50<17:16, 34.49it/s]

Sparsity: 24.8 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 39526400 | Self Similarity: -0.0082
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 39526400 | Self Similarity: 0.0173
Sparsity: 51.1 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 39526400 | Self Similarity: 0.0034
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 39526400 | Self Similarity: -0.0110
Sparsity: 127.1 | Dead Features: 0 | Total Loss: 0.0512 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 39526400 | Self Similarity: -0.0007
Sparsity: 113.6 | Dead Features: 0 | Total Loss: 0.0961 | Reconstruction Loss: 0.0545 | L1 Loss: 0.0416 | l1_alpha: 8.0000e-04 | Tokens: 395

 35%|███▌      | 19406/55054 [08:52<17:02, 34.86it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 39731200 | Self Similarity: -0.0085
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 39731200 | Self Similarity: 0.0176
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 39731200 | Self Similarity: 0.0031
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 39731200 | Self Similarity: -0.0111
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 39731200 | Self Similarity: -0.0007
Sparsity: 114.2 | Dead Features: 0 | Total Loss: 0.0910 | Reconstruction Loss: 0.0499 | L1 Loss: 0.0410 | l1_alpha: 8.0000e-04 | Tokens: 397

 35%|███▌      | 19506/55054 [08:55<17:06, 34.64it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 39936000 | Self Similarity: -0.0084
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 39936000 | Self Similarity: 0.0175
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 39936000 | Self Similarity: 0.0031
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 39936000 | Self Similarity: -0.0111
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 39936000 | Self Similarity: -0.0007
Sparsity: 111.0 | Dead Features: 0 | Total Loss: 0.0891 | Reconstruction Loss: 0.0485 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens: 399

 36%|███▌      | 19606/55054 [08:58<16:57, 34.85it/s]

Sparsity: 19.1 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 40140800 | Self Similarity: -0.0082
Sparsity: 34.4 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 40140800 | Self Similarity: 0.0175
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 40140800 | Self Similarity: 0.0032
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 40140800 | Self Similarity: -0.0111
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0502 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 40140800 | Self Similarity: -0.0007
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0929 | Reconstruction Loss: 0.0505 | L1 Loss: 0.0424 | l1_alpha: 8.0000e-04 | Tokens: 401

 36%|███▌      | 19706/55054 [09:01<16:22, 35.97it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 40345600 | Self Similarity: -0.0080
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 40345600 | Self Similarity: 0.0173
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 40345600 | Self Similarity: 0.0031
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 40345600 | Self Similarity: -0.0113
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 40345600 | Self Similarity: -0.0008
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0936 | Reconstruction Loss: 0.0511 | L1 Loss: 0.0425 | l1_alpha: 8.0000e-04 | Tokens: 403

 36%|███▌      | 19807/55054 [09:04<16:07, 36.43it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 40550400 | Self Similarity: -0.0081
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 40550400 | Self Similarity: 0.0175
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 40550400 | Self Similarity: 0.0030
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 40550400 | Self Similarity: -0.0112
Sparsity: 124.9 | Dead Features: 0 | Total Loss: 0.0504 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 40550400 | Self Similarity: -0.0009
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0948 | Reconstruction Loss: 0.0542 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 405

 36%|███▌      | 19907/55054 [09:07<16:44, 34.98it/s]

Sparsity: 30.1 | Dead Features: 0 | Total Loss: 0.0187 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0071 | l1_alpha: 8.0000e-04 | Tokens: 40755200 | Self Similarity: -0.0082
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 40755200 | Self Similarity: 0.0175
Sparsity: 51.0 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 40755200 | Self Similarity: 0.0033
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 40755200 | Self Similarity: -0.0112
Sparsity: 127.1 | Dead Features: 0 | Total Loss: 0.0508 | Reconstruction Loss: 0.0262 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 40755200 | Self Similarity: -0.0009
Sparsity: 113.9 | Dead Features: 0 | Total Loss: 0.0931 | Reconstruction Loss: 0.0513 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 407

 36%|███▋      | 20007/55054 [09:09<16:54, 34.55it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 40960000 | Self Similarity: -0.0084
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 40960000 | Self Similarity: 0.0174
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 40960000 | Self Similarity: 0.0031
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 40960000 | Self Similarity: -0.0112
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 40960000 | Self Similarity: -0.0006
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0874 | Reconstruction Loss: 0.0472 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 409

 37%|███▋      | 20107/55054 [09:12<16:47, 34.70it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 41164800 | Self Similarity: -0.0084
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 41164800 | Self Similarity: 0.0175
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 41164800 | Self Similarity: 0.0034
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 41164800 | Self Similarity: -0.0115
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 41164800 | Self Similarity: -0.0007
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0921 | Reconstruction Loss: 0.0516 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 411

 37%|███▋      | 20207/55054 [09:15<16:54, 34.36it/s]

Sparsity: 26.3 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 41369600 | Self Similarity: -0.0088
Sparsity: 40.8 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 41369600 | Self Similarity: 0.0174
Sparsity: 50.7 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 41369600 | Self Similarity: 0.0033
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 41369600 | Self Similarity: -0.0115
Sparsity: 124.3 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 41369600 | Self Similarity: -0.0005
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0917 | Reconstruction Loss: 0.0495 | L1 Loss: 0.0422 | l1_alpha: 8.0000e-04 | Tokens: 413

 37%|███▋      | 20305/55054 [09:18<16:29, 35.13it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 41574400 | Self Similarity: -0.0086
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 41574400 | Self Similarity: 0.0170
Sparsity: 50.7 | Dead Features: 0 | Total Loss: 0.0209 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 41574400 | Self Similarity: 0.0031
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0417 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 41574400 | Self Similarity: -0.0111
Sparsity: 126.8 | Dead Features: 0 | Total Loss: 0.0524 | Reconstruction Loss: 0.0269 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 41574400 | Self Similarity: -0.0003
Sparsity: 121.6 | Dead Features: 0 | Total Loss: 0.0969 | Reconstruction Loss: 0.0531 | L1 Loss: 0.0438 | l1_alpha: 8.0000e-04 | Tokens: 415

 37%|███▋      | 20405/55054 [09:21<16:47, 34.38it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 41779200 | Self Similarity: -0.0087
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 41779200 | Self Similarity: 0.0170
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 41779200 | Self Similarity: 0.0036
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 41779200 | Self Similarity: -0.0112
Sparsity: 123.7 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 41779200 | Self Similarity: -0.0003
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0881 | Reconstruction Loss: 0.0478 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 417

 37%|███▋      | 20506/55054 [09:24<16:52, 34.14it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 41984000 | Self Similarity: -0.0084
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 41984000 | Self Similarity: 0.0172
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 41984000 | Self Similarity: 0.0034
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 41984000 | Self Similarity: -0.0112
Sparsity: 123.3 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 41984000 | Self Similarity: -0.0006
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0887 | Reconstruction Loss: 0.0477 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 419

 37%|███▋      | 20606/55054 [09:27<16:33, 34.69it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 42188800 | Self Similarity: -0.0085
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 42188800 | Self Similarity: 0.0172
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 42188800 | Self Similarity: 0.0034
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 42188800 | Self Similarity: -0.0111
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 42188800 | Self Similarity: -0.0006
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0868 | Reconstruction Loss: 0.0472 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 421

 38%|███▊      | 20704/55054 [09:29<13:55, 41.09it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 42393600 | Self Similarity: -0.0086
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 42393600 | Self Similarity: 0.0171
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 42393600 | Self Similarity: 0.0035
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 42393600 | Self Similarity: -0.0108
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 42393600 | Self Similarity: -0.0007
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0913 | Reconstruction Loss: 0.0503 | L1 Loss: 0.0409 | l1_alpha: 8.0000e-04 | Tokens: 423

 38%|███▊      | 20804/55054 [09:32<16:08, 35.35it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 42598400 | Self Similarity: -0.0086
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 42598400 | Self Similarity: 0.0173
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 42598400 | Self Similarity: 0.0033
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 42598400 | Self Similarity: -0.0110
Sparsity: 122.8 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 42598400 | Self Similarity: -0.0007
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0896 | Reconstruction Loss: 0.0493 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 425

 38%|███▊      | 20904/55054 [09:35<15:56, 35.71it/s]

Sparsity: 23.4 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 42803200 | Self Similarity: -0.0089
Sparsity: 40.0 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 42803200 | Self Similarity: 0.0169
Sparsity: 51.3 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 42803200 | Self Similarity: 0.0033
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 42803200 | Self Similarity: -0.0107
Sparsity: 126.6 | Dead Features: 0 | Total Loss: 0.0513 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 42803200 | Self Similarity: -0.0010
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0933 | Reconstruction Loss: 0.0511 | L1 Loss: 0.0422 | l1_alpha: 8.0000e-04 | Tokens: 428

 38%|███▊      | 21004/55054 [09:38<16:07, 35.18it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 43008000 | Self Similarity: -0.0091
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 43008000 | Self Similarity: 0.0171
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 43008000 | Self Similarity: 0.0030
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 43008000 | Self Similarity: -0.0109
Sparsity: 121.6 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 43008000 | Self Similarity: -0.0009
Sparsity: 121.9 | Dead Features: 0 | Total Loss: 0.0876 | Reconstruction Loss: 0.0461 | L1 Loss: 0.0415 | l1_alpha: 8.0000e-04 | Tokens: 430

 38%|███▊      | 21104/55054 [09:40<15:45, 35.89it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 43212800 | Self Similarity: -0.0086
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 43212800 | Self Similarity: 0.0169
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 43212800 | Self Similarity: 0.0032
Sparsity: 115.4 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 43212800 | Self Similarity: -0.0106
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 43212800 | Self Similarity: -0.0011
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0862 | Reconstruction Loss: 0.0465 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 432

 39%|███▊      | 21204/55054 [09:43<15:48, 35.67it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 43417600 | Self Similarity: -0.0086
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 43417600 | Self Similarity: 0.0165
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 43417600 | Self Similarity: 0.0028
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 43417600 | Self Similarity: -0.0104
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 43417600 | Self Similarity: -0.0012
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0860 | Reconstruction Loss: 0.0458 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 434

 39%|███▊      | 21306/55054 [09:46<15:36, 36.03it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 43622400 | Self Similarity: -0.0085
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 43622400 | Self Similarity: 0.0164
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 43622400 | Self Similarity: 0.0028
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 43622400 | Self Similarity: -0.0102
Sparsity: 124.6 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 43622400 | Self Similarity: -0.0014
Sparsity: 123.7 | Dead Features: 0 | Total Loss: 0.0898 | Reconstruction Loss: 0.0493 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 436

 39%|███▉      | 21406/55054 [09:49<15:34, 35.99it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 43827200 | Self Similarity: -0.0084
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 43827200 | Self Similarity: 0.0165
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 43827200 | Self Similarity: 0.0028
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 43827200 | Self Similarity: -0.0103
Sparsity: 121.6 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 43827200 | Self Similarity: -0.0013
Sparsity: 114.0 | Dead Features: 0 | Total Loss: 0.0902 | Reconstruction Loss: 0.0508 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 438

 39%|███▉      | 21506/55054 [09:51<15:35, 35.85it/s]

Sparsity: 24.6 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 44032000 | Self Similarity: -0.0090
Sparsity: 40.1 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 44032000 | Self Similarity: 0.0166
Sparsity: 49.9 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 44032000 | Self Similarity: 0.0027
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 44032000 | Self Similarity: -0.0102
Sparsity: 129.2 | Dead Features: 0 | Total Loss: 0.0525 | Reconstruction Loss: 0.0268 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 44032000 | Self Similarity: -0.0011
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0928 | Reconstruction Loss: 0.0522 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 440

 39%|███▉      | 21606/55054 [09:54<15:33, 35.81it/s]

Sparsity: 23.5 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 44236800 | Self Similarity: -0.0088
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 44236800 | Self Similarity: 0.0162
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 44236800 | Self Similarity: 0.0028
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 44236800 | Self Similarity: -0.0101
Sparsity: 125.1 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 44236800 | Self Similarity: -0.0013
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0871 | Reconstruction Loss: 0.0469 | L1 Loss: 0.0403 | l1_alpha: 8.0000e-04 | Tokens: 442

 39%|███▉      | 21704/55054 [09:57<14:37, 38.00it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 44441600 | Self Similarity: -0.0087
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 44441600 | Self Similarity: 0.0163
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 44441600 | Self Similarity: 0.0026
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 44441600 | Self Similarity: -0.0101
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 44441600 | Self Similarity: -0.0013
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0822 | Reconstruction Loss: 0.0431 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 444

 40%|███▉      | 21806/55054 [09:59<13:33, 40.87it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 44646400 | Self Similarity: -0.0086
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 44646400 | Self Similarity: 0.0163
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 44646400 | Self Similarity: 0.0027
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 44646400 | Self Similarity: -0.0103
Sparsity: 124.4 | Dead Features: 0 | Total Loss: 0.0511 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 44646400 | Self Similarity: -0.0013
Sparsity: 125.4 | Dead Features: 0 | Total Loss: 0.0888 | Reconstruction Loss: 0.0470 | L1 Loss: 0.0419 | l1_alpha: 8.0000e-04 | Tokens: 446

 40%|███▉      | 21907/55054 [10:02<15:22, 35.95it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 44851200 | Self Similarity: -0.0087
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 44851200 | Self Similarity: 0.0162
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 44851200 | Self Similarity: 0.0030
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 44851200 | Self Similarity: -0.0101
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 44851200 | Self Similarity: -0.0007
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0878 | Reconstruction Loss: 0.0495 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 448

 40%|███▉      | 22007/55054 [10:05<15:10, 36.30it/s]

Sparsity: 23.1 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 45056000 | Self Similarity: -0.0085
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 45056000 | Self Similarity: 0.0164
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 45056000 | Self Similarity: 0.0026
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 45056000 | Self Similarity: -0.0099
Sparsity: 125.1 | Dead Features: 0 | Total Loss: 0.0522 | Reconstruction Loss: 0.0273 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 45056000 | Self Similarity: -0.0009
Sparsity: 124.0 | Dead Features: 0 | Total Loss: 0.0921 | Reconstruction Loss: 0.0511 | L1 Loss: 0.0410 | l1_alpha: 8.0000e-04 | Tokens: 450

 40%|████      | 22104/55054 [10:08<14:55, 36.79it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 45260800 | Self Similarity: -0.0088
Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 45260800 | Self Similarity: 0.0165
Sparsity: 49.1 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 45260800 | Self Similarity: 0.0024
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 45260800 | Self Similarity: -0.0102
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0501 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 45260800 | Self Similarity: -0.0007
Sparsity: 124.9 | Dead Features: 0 | Total Loss: 0.0891 | Reconstruction Loss: 0.0485 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 452

 40%|████      | 22204/55054 [10:10<15:08, 36.18it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 45465600 | Self Similarity: -0.0088
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 45465600 | Self Similarity: 0.0166
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 45465600 | Self Similarity: 0.0028
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 45465600 | Self Similarity: -0.0100
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 45465600 | Self Similarity: -0.0009
Sparsity: 111.8 | Dead Features: 0 | Total Loss: 0.0847 | Reconstruction Loss: 0.0474 | L1 Loss: 0.0373 | l1_alpha: 8.0000e-04 | Tokens: 454

 41%|████      | 22305/55054 [10:13<15:04, 36.22it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 45670400 | Self Similarity: -0.0089
Sparsity: 38.7 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 45670400 | Self Similarity: 0.0164
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 45670400 | Self Similarity: 0.0027
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 45670400 | Self Similarity: -0.0098
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 45670400 | Self Similarity: -0.0012
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0872 | Reconstruction Loss: 0.0465 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 456

 41%|████      | 22407/55054 [10:16<15:07, 35.95it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 45875200 | Self Similarity: -0.0089
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 45875200 | Self Similarity: 0.0163
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 45875200 | Self Similarity: 0.0031
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 45875200 | Self Similarity: -0.0099
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 45875200 | Self Similarity: -0.0012
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0866 | Reconstruction Loss: 0.0459 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 458

 41%|████      | 22507/55054 [10:19<14:54, 36.40it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 46080000 | Self Similarity: -0.0087
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 46080000 | Self Similarity: 0.0161
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 46080000 | Self Similarity: 0.0031
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 46080000 | Self Similarity: -0.0103
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 46080000 | Self Similarity: -0.0012
Sparsity: 129.2 | Dead Features: 0 | Total Loss: 0.0877 | Reconstruction Loss: 0.0460 | L1 Loss: 0.0417 | l1_alpha: 8.0000e-04 | Tokens: 460

 41%|████      | 22607/55054 [10:21<15:03, 35.93it/s]

Sparsity: 18.0 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0060 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 46284800 | Self Similarity: -0.0087
Sparsity: 33.9 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 46284800 | Self Similarity: 0.0162
Sparsity: 45.9 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 46284800 | Self Similarity: 0.0028
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 46284800 | Self Similarity: -0.0100
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 46284800 | Self Similarity: -0.0012
Sparsity: 123.3 | Dead Features: 0 | Total Loss: 0.0835 | Reconstruction Loss: 0.0439 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 462

 41%|████      | 22707/55054 [10:24<14:53, 36.20it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 46489600 | Self Similarity: -0.0088
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 46489600 | Self Similarity: 0.0163
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 46489600 | Self Similarity: 0.0027
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0178 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 46489600 | Self Similarity: -0.0101
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0231 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 46489600 | Self Similarity: -0.0013
Sparsity: 125.8 | Dead Features: 0 | Total Loss: 0.0833 | Reconstruction Loss: 0.0437 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 464

 41%|████▏     | 22807/55054 [10:27<14:42, 36.55it/s]

Sparsity: 27.9 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0110 | L1 Loss: 0.0069 | l1_alpha: 8.0000e-04 | Tokens: 46694400 | Self Similarity: -0.0088
Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 46694400 | Self Similarity: 0.0162
Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 46694400 | Self Similarity: 0.0025
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 46694400 | Self Similarity: -0.0101
Sparsity: 125.2 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 46694400 | Self Similarity: -0.0014
Sparsity: 124.0 | Dead Features: 0 | Total Loss: 0.0876 | Reconstruction Loss: 0.0456 | L1 Loss: 0.0420 | l1_alpha: 8.0000e-04 | Tokens: 466

 42%|████▏     | 22907/55054 [10:30<14:42, 36.42it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 46899200 | Self Similarity: -0.0087
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 46899200 | Self Similarity: 0.0161
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 46899200 | Self Similarity: 0.0029
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 46899200 | Self Similarity: -0.0103
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 46899200 | Self Similarity: -0.0017
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0802 | Reconstruction Loss: 0.0409 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 468

 42%|████▏     | 23007/55054 [10:32<14:42, 36.30it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 47104000 | Self Similarity: -0.0088
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 47104000 | Self Similarity: 0.0164
Sparsity: 50.5 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 47104000 | Self Similarity: 0.0028
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 47104000 | Self Similarity: -0.0100
Sparsity: 125.0 | Dead Features: 0 | Total Loss: 0.0504 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 47104000 | Self Similarity: -0.0016
Sparsity: 129.2 | Dead Features: 0 | Total Loss: 0.0863 | Reconstruction Loss: 0.0453 | L1 Loss: 0.0410 | l1_alpha: 8.0000e-04 | Tokens: 471

 42%|████▏     | 23107/55054 [10:35<14:51, 35.82it/s]

Sparsity: 24.9 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 47308800 | Self Similarity: -0.0087
Sparsity: 40.1 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 47308800 | Self Similarity: 0.0163
Sparsity: 49.9 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 47308800 | Self Similarity: 0.0027
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 47308800 | Self Similarity: -0.0102
Sparsity: 125.4 | Dead Features: 0 | Total Loss: 0.0507 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 47308800 | Self Similarity: -0.0015
Sparsity: 128.4 | Dead Features: 0 | Total Loss: 0.0904 | Reconstruction Loss: 0.0469 | L1 Loss: 0.0435 | l1_alpha: 8.0000e-04 | Tokens: 473

 42%|████▏     | 23207/55054 [10:38<14:56, 35.52it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 47513600 | Self Similarity: -0.0090
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 47513600 | Self Similarity: 0.0164
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 47513600 | Self Similarity: 0.0028
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 47513600 | Self Similarity: -0.0102
Sparsity: 124.3 | Dead Features: 0 | Total Loss: 0.0514 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 47513600 | Self Similarity: -0.0014
Sparsity: 129.3 | Dead Features: 0 | Total Loss: 0.0896 | Reconstruction Loss: 0.0478 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 475

 42%|████▏     | 23307/55054 [10:41<14:54, 35.50it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 47718400 | Self Similarity: -0.0090
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 47718400 | Self Similarity: 0.0163
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 47718400 | Self Similarity: 0.0027
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 47718400 | Self Similarity: -0.0102
Sparsity: 124.6 | Dead Features: 0 | Total Loss: 0.0504 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 47718400 | Self Similarity: -0.0016
Sparsity: 129.3 | Dead Features: 0 | Total Loss: 0.0878 | Reconstruction Loss: 0.0464 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 477

 43%|████▎     | 23407/55054 [10:44<15:26, 34.16it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 47923200 | Self Similarity: -0.0091
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 47923200 | Self Similarity: 0.0163
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 47923200 | Self Similarity: 0.0029
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 47923200 | Self Similarity: -0.0099
Sparsity: 125.5 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 47923200 | Self Similarity: -0.0017
Sparsity: 132.6 | Dead Features: 0 | Total Loss: 0.0888 | Reconstruction Loss: 0.0469 | L1 Loss: 0.0420 | l1_alpha: 8.0000e-04 | Tokens: 479

 43%|████▎     | 23507/55054 [10:46<15:00, 35.02it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 48128000 | Self Similarity: -0.0089
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 48128000 | Self Similarity: 0.0161
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 48128000 | Self Similarity: 0.0027
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 48128000 | Self Similarity: -0.0097
Sparsity: 123.7 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 48128000 | Self Similarity: -0.0015
Sparsity: 130.2 | Dead Features: 0 | Total Loss: 0.0863 | Reconstruction Loss: 0.0472 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 481

 43%|████▎     | 23607/55054 [10:49<15:20, 34.17it/s]

Sparsity: 27.1 | Dead Features: 0 | Total Loss: 0.0163 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 48332800 | Self Similarity: -0.0090
Sparsity: 41.2 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 48332800 | Self Similarity: 0.0161
Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 48332800 | Self Similarity: 0.0029
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 48332800 | Self Similarity: -0.0097
Sparsity: 126.3 | Dead Features: 0 | Total Loss: 0.0513 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 48332800 | Self Similarity: -0.0016
Sparsity: 132.6 | Dead Features: 0 | Total Loss: 0.0901 | Reconstruction Loss: 0.0480 | L1 Loss: 0.0422 | l1_alpha: 8.0000e-04 | Tokens: 483

 43%|████▎     | 23707/55054 [10:52<15:06, 34.58it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 48537600 | Self Similarity: -0.0089
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 48537600 | Self Similarity: 0.0159
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 48537600 | Self Similarity: 0.0030
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 48537600 | Self Similarity: -0.0097
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 48537600 | Self Similarity: -0.0016
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0822 | Reconstruction Loss: 0.0442 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 485

 43%|████▎     | 23807/55054 [10:55<15:01, 34.65it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 48742400 | Self Similarity: -0.0090
Sparsity: 39.0 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 48742400 | Self Similarity: 0.0159
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 48742400 | Self Similarity: 0.0031
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 48742400 | Self Similarity: -0.0100
Sparsity: 124.4 | Dead Features: 0 | Total Loss: 0.0505 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 48742400 | Self Similarity: -0.0018
Sparsity: 128.9 | Dead Features: 0 | Total Loss: 0.0875 | Reconstruction Loss: 0.0461 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 487

 43%|████▎     | 23907/55054 [10:58<14:33, 35.67it/s]

Sparsity: 24.3 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 48947200 | Self Similarity: -0.0088
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 48947200 | Self Similarity: 0.0160
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 48947200 | Self Similarity: 0.0034
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 48947200 | Self Similarity: -0.0101
Sparsity: 124.1 | Dead Features: 0 | Total Loss: 0.0502 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 48947200 | Self Similarity: -0.0019
Sparsity: 126.6 | Dead Features: 0 | Total Loss: 0.0851 | Reconstruction Loss: 0.0445 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 489

 44%|████▎     | 24005/55054 [11:01<14:36, 35.44it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 49152000 | Self Similarity: -0.0088
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 49152000 | Self Similarity: 0.0160
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 49152000 | Self Similarity: 0.0036
Sparsity: 113.3 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 49152000 | Self Similarity: -0.0097
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 49152000 | Self Similarity: -0.0019
Sparsity: 128.5 | Dead Features: 0 | Total Loss: 0.0855 | Reconstruction Loss: 0.0448 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 491

 44%|████▍     | 24105/55054 [11:04<14:44, 34.99it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 49356800 | Self Similarity: -0.0089
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 49356800 | Self Similarity: 0.0159
Sparsity: 49.9 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 49356800 | Self Similarity: 0.0034
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 49356800 | Self Similarity: -0.0096
Sparsity: 122.6 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 49356800 | Self Similarity: -0.0021
Sparsity: 131.5 | Dead Features: 0 | Total Loss: 0.0856 | Reconstruction Loss: 0.0452 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 493

 44%|████▍     | 24206/55054 [11:06<13:28, 38.17it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 49561600 | Self Similarity: -0.0086
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 49561600 | Self Similarity: 0.0162
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 49561600 | Self Similarity: 0.0033
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 49561600 | Self Similarity: -0.0096
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 49561600 | Self Similarity: -0.0019
Sparsity: 132.1 | Dead Features: 0 | Total Loss: 0.0842 | Reconstruction Loss: 0.0438 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 495

 44%|████▍     | 24304/55054 [11:09<14:43, 34.82it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 49766400 | Self Similarity: -0.0083
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 49766400 | Self Similarity: 0.0160
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 49766400 | Self Similarity: 0.0033
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 49766400 | Self Similarity: -0.0094
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 49766400 | Self Similarity: -0.0021
Sparsity: 131.1 | Dead Features: 0 | Total Loss: 0.0835 | Reconstruction Loss: 0.0429 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 497

 44%|████▍     | 24405/55054 [11:12<14:54, 34.28it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 49971200 | Self Similarity: -0.0085
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 49971200 | Self Similarity: 0.0158
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 49971200 | Self Similarity: 0.0033
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 49971200 | Self Similarity: -0.0099
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 49971200 | Self Similarity: -0.0016
Sparsity: 131.5 | Dead Features: 0 | Total Loss: 0.0838 | Reconstruction Loss: 0.0442 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 499

 45%|████▍     | 24504/55054 [11:15<14:54, 34.16it/s]

Sparsity: 27.8 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0069 | l1_alpha: 8.0000e-04 | Tokens: 50176000 | Self Similarity: -0.0085
Sparsity: 43.1 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0111 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 50176000 | Self Similarity: 0.0161
Sparsity: 54.2 | Dead Features: 0 | Total Loss: 0.0225 | Reconstruction Loss: 0.0140 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 50176000 | Self Similarity: 0.0032
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0431 | Reconstruction Loss: 0.0214 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 50176000 | Self Similarity: -0.0097
Sparsity: 128.8 | Dead Features: 0 | Total Loss: 0.0540 | Reconstruction Loss: 0.0278 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 50176000 | Self Similarity: -0.0017
Sparsity: 126.8 | Dead Features: 0 | Total Loss: 0.0916 | Reconstruction Loss: 0.0506 | L1 Loss: 0.0410 | l1_alpha: 8.0000e-04 | Tokens: 501

 45%|████▍     | 24604/55054 [11:18<15:02, 33.74it/s]

Sparsity: 24.5 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 50380800 | Self Similarity: -0.0085
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 50380800 | Self Similarity: 0.0157
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0211 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 50380800 | Self Similarity: 0.0032
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0431 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 50380800 | Self Similarity: -0.0098
Sparsity: 122.7 | Dead Features: 0 | Total Loss: 0.0541 | Reconstruction Loss: 0.0288 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 50380800 | Self Similarity: -0.0014
Sparsity: 99.1 | Dead Features: 0 | Total Loss: 0.0961 | Reconstruction Loss: 0.0590 | L1 Loss: 0.0370 | l1_alpha: 8.0000e-04 | Tokens: 5038

 45%|████▍     | 24704/55054 [11:21<14:48, 34.17it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 50585600 | Self Similarity: -0.0096
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 50585600 | Self Similarity: 0.0159
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 50585600 | Self Similarity: 0.0031
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 50585600 | Self Similarity: -0.0098
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 50585600 | Self Similarity: -0.0018
Sparsity: 112.5 | Dead Features: 0 | Total Loss: 0.0858 | Reconstruction Loss: 0.0472 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 505

 45%|████▌     | 24804/55054 [11:23<14:36, 34.51it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 50790400 | Self Similarity: -0.0091
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 50790400 | Self Similarity: 0.0159
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 50790400 | Self Similarity: 0.0032
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 50790400 | Self Similarity: -0.0097
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 50790400 | Self Similarity: -0.0016
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0809 | Reconstruction Loss: 0.0426 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 507

 45%|████▌     | 24904/55054 [11:26<14:27, 34.77it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 50995200 | Self Similarity: -0.0091
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 50995200 | Self Similarity: 0.0160
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 50995200 | Self Similarity: 0.0028
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 50995200 | Self Similarity: -0.0097
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 50995200 | Self Similarity: -0.0016
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0820 | Reconstruction Loss: 0.0429 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 509

 45%|████▌     | 25004/55054 [11:29<14:16, 35.07it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 51200000 | Self Similarity: -0.0087
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 51200000 | Self Similarity: 0.0159
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 51200000 | Self Similarity: 0.0031
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 51200000 | Self Similarity: -0.0096
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 51200000 | Self Similarity: -0.0019
Sparsity: 126.6 | Dead Features: 0 | Total Loss: 0.0847 | Reconstruction Loss: 0.0442 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 512

 46%|████▌     | 25104/55054 [11:32<14:21, 34.78it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 51404800 | Self Similarity: -0.0091
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 51404800 | Self Similarity: 0.0160
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 51404800 | Self Similarity: 0.0029
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 51404800 | Self Similarity: -0.0097
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 51404800 | Self Similarity: -0.0016
Sparsity: 126.2 | Dead Features: 0 | Total Loss: 0.0803 | Reconstruction Loss: 0.0409 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 514

 46%|████▌     | 25204/55054 [11:35<13:53, 35.80it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 51609600 | Self Similarity: -0.0089
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 51609600 | Self Similarity: 0.0155
Sparsity: 45.4 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 51609600 | Self Similarity: 0.0027
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 51609600 | Self Similarity: -0.0098
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 51609600 | Self Similarity: -0.0015
Sparsity: 127.9 | Dead Features: 0 | Total Loss: 0.0802 | Reconstruction Loss: 0.0412 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 516

 46%|████▌     | 25304/55054 [11:38<14:11, 34.95it/s]

Sparsity: 27.4 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0068 | l1_alpha: 8.0000e-04 | Tokens: 51814400 | Self Similarity: -0.0088
Sparsity: 42.3 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0111 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 51814400 | Self Similarity: 0.0155
Sparsity: 52.4 | Dead Features: 0 | Total Loss: 0.0220 | Reconstruction Loss: 0.0139 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 51814400 | Self Similarity: 0.0029
Sparsity: 124.3 | Dead Features: 0 | Total Loss: 0.0435 | Reconstruction Loss: 0.0213 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 51814400 | Self Similarity: -0.0095
Sparsity: 131.3 | Dead Features: 0 | Total Loss: 0.0542 | Reconstruction Loss: 0.0279 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 51814400 | Self Similarity: -0.0015
Sparsity: 135.9 | Dead Features: 0 | Total Loss: 0.0905 | Reconstruction Loss: 0.0487 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 518

 46%|████▌     | 25404/55054 [11:41<14:16, 34.62it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 52019200 | Self Similarity: -0.0087
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 52019200 | Self Similarity: 0.0155
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 52019200 | Self Similarity: 0.0029
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 52019200 | Self Similarity: -0.0096
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 52019200 | Self Similarity: -0.0018
Sparsity: 129.6 | Dead Features: 0 | Total Loss: 0.0802 | Reconstruction Loss: 0.0409 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 520

 46%|████▋     | 25504/55054 [11:43<14:17, 34.47it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 52224000 | Self Similarity: -0.0087
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 52224000 | Self Similarity: 0.0154
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 52224000 | Self Similarity: 0.0027
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 52224000 | Self Similarity: -0.0096
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 52224000 | Self Similarity: -0.0018
Sparsity: 130.1 | Dead Features: 0 | Total Loss: 0.0814 | Reconstruction Loss: 0.0423 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 522

 47%|████▋     | 25604/55054 [11:46<14:19, 34.26it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 52428800 | Self Similarity: -0.0086
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 52428800 | Self Similarity: 0.0157
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 52428800 | Self Similarity: 0.0028
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 52428800 | Self Similarity: -0.0095
Sparsity: 124.9 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 52428800 | Self Similarity: -0.0018
Sparsity: 132.3 | Dead Features: 0 | Total Loss: 0.0818 | Reconstruction Loss: 0.0412 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 524

 47%|████▋     | 25705/55054 [11:49<14:13, 34.37it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 52633600 | Self Similarity: -0.0085
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 52633600 | Self Similarity: 0.0154
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 52633600 | Self Similarity: 0.0028
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 52633600 | Self Similarity: -0.0096
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 52633600 | Self Similarity: -0.0017
Sparsity: 134.2 | Dead Features: 0 | Total Loss: 0.0829 | Reconstruction Loss: 0.0425 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 526

 47%|████▋     | 25806/55054 [11:52<13:04, 37.30it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 52838400 | Self Similarity: -0.0083
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 52838400 | Self Similarity: 0.0157
Sparsity: 50.2 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 52838400 | Self Similarity: 0.0032
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 52838400 | Self Similarity: -0.0097
Sparsity: 122.7 | Dead Features: 0 | Total Loss: 0.0522 | Reconstruction Loss: 0.0268 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 52838400 | Self Similarity: -0.0018
Sparsity: 135.0 | Dead Features: 0 | Total Loss: 0.0876 | Reconstruction Loss: 0.0464 | L1 Loss: 0.0412 | l1_alpha: 8.0000e-04 | Tokens: 528

 47%|████▋     | 25906/55054 [11:55<14:09, 34.32it/s]

Sparsity: 23.4 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 53043200 | Self Similarity: -0.0084
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 53043200 | Self Similarity: 0.0161
Sparsity: 51.4 | Dead Features: 0 | Total Loss: 0.0211 | Reconstruction Loss: 0.0132 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 53043200 | Self Similarity: 0.0029
Sparsity: 123.8 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 53043200 | Self Similarity: -0.0096
Sparsity: 127.9 | Dead Features: 0 | Total Loss: 0.0533 | Reconstruction Loss: 0.0275 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 53043200 | Self Similarity: -0.0018
Sparsity: 139.5 | Dead Features: 0 | Total Loss: 0.0890 | Reconstruction Loss: 0.0467 | L1 Loss: 0.0424 | l1_alpha: 8.0000e-04 | Tokens: 530

 47%|████▋     | 26004/55054 [11:58<12:55, 37.47it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 53248000 | Self Similarity: -0.0085
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 53248000 | Self Similarity: 0.0161
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 53248000 | Self Similarity: 0.0030
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0417 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 53248000 | Self Similarity: -0.0096
Sparsity: 126.0 | Dead Features: 0 | Total Loss: 0.0522 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0268 | l1_alpha: 8.0000e-04 | Tokens: 53248000 | Self Similarity: -0.0018
Sparsity: 138.0 | Dead Features: 0 | Total Loss: 0.0868 | Reconstruction Loss: 0.0440 | L1 Loss: 0.0428 | l1_alpha: 8.0000e-04 | Tokens: 532

 47%|████▋     | 26105/55054 [12:00<13:19, 36.22it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 53452800 | Self Similarity: -0.0085
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 53452800 | Self Similarity: 0.0157
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 53452800 | Self Similarity: 0.0032
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 53452800 | Self Similarity: -0.0097
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 53452800 | Self Similarity: -0.0019
Sparsity: 134.1 | Dead Features: 0 | Total Loss: 0.0807 | Reconstruction Loss: 0.0419 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 534

 48%|████▊     | 26207/55054 [12:03<13:25, 35.82it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 53657600 | Self Similarity: -0.0085
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 53657600 | Self Similarity: 0.0159
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 53657600 | Self Similarity: 0.0035
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 53657600 | Self Similarity: -0.0097
Sparsity: 124.8 | Dead Features: 0 | Total Loss: 0.0502 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 53657600 | Self Similarity: -0.0019
Sparsity: 138.9 | Dead Features: 0 | Total Loss: 0.0836 | Reconstruction Loss: 0.0428 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 536

 48%|████▊     | 26305/55054 [12:06<13:31, 35.43it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 53862400 | Self Similarity: -0.0084
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 53862400 | Self Similarity: 0.0157
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 53862400 | Self Similarity: 0.0033
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 53862400 | Self Similarity: -0.0095
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 53862400 | Self Similarity: -0.0017
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0805 | Reconstruction Loss: 0.0426 | L1 Loss: 0.0378 | l1_alpha: 8.0000e-04 | Tokens: 538

 48%|████▊     | 26406/55054 [12:09<13:12, 36.14it/s]

Sparsity: 24.3 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 54067200 | Self Similarity: -0.0084
Sparsity: 40.2 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 54067200 | Self Similarity: 0.0155
Sparsity: 51.3 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 54067200 | Self Similarity: 0.0031
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 54067200 | Self Similarity: -0.0097
Sparsity: 126.5 | Dead Features: 0 | Total Loss: 0.0527 | Reconstruction Loss: 0.0268 | L1 Loss: 0.0259 | l1_alpha: 8.0000e-04 | Tokens: 54067200 | Self Similarity: -0.0019
Sparsity: 133.6 | Dead Features: 0 | Total Loss: 0.0881 | Reconstruction Loss: 0.0462 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 540

 48%|████▊     | 26506/55054 [12:11<12:56, 36.76it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 54272000 | Self Similarity: -0.0081
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 54272000 | Self Similarity: 0.0156
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 54272000 | Self Similarity: 0.0032
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 54272000 | Self Similarity: -0.0093
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 54272000 | Self Similarity: -0.0019
Sparsity: 129.2 | Dead Features: 0 | Total Loss: 0.0786 | Reconstruction Loss: 0.0400 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 542

 48%|████▊     | 26606/55054 [12:14<13:35, 34.90it/s]

Sparsity: 24.9 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 54476800 | Self Similarity: -0.0080
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 54476800 | Self Similarity: 0.0155
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 54476800 | Self Similarity: 0.0033
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 54476800 | Self Similarity: -0.0093
Sparsity: 123.0 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 54476800 | Self Similarity: -0.0019
Sparsity: 128.5 | Dead Features: 0 | Total Loss: 0.0805 | Reconstruction Loss: 0.0415 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 544

 49%|████▊     | 26706/55054 [12:17<13:45, 34.33it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 54681600 | Self Similarity: -0.0081
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 54681600 | Self Similarity: 0.0155
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 54681600 | Self Similarity: 0.0031
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 54681600 | Self Similarity: -0.0093
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 54681600 | Self Similarity: -0.0020
Sparsity: 132.5 | Dead Features: 0 | Total Loss: 0.0811 | Reconstruction Loss: 0.0406 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens: 546

 49%|████▊     | 26806/55054 [12:20<13:13, 35.60it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 54886400 | Self Similarity: -0.0081
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 54886400 | Self Similarity: 0.0156
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 54886400 | Self Similarity: 0.0033
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 54886400 | Self Similarity: -0.0094
Sparsity: 127.0 | Dead Features: 0 | Total Loss: 0.0513 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0266 | l1_alpha: 8.0000e-04 | Tokens: 54886400 | Self Similarity: -0.0020
Sparsity: 137.0 | Dead Features: 0 | Total Loss: 0.0825 | Reconstruction Loss: 0.0427 | L1 Loss: 0.0398 | l1_alpha: 8.0000e-04 | Tokens: 548

 49%|████▉     | 26904/55054 [12:22<12:57, 36.20it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 55091200 | Self Similarity: -0.0080
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 55091200 | Self Similarity: 0.0153
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 55091200 | Self Similarity: 0.0033
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 55091200 | Self Similarity: -0.0093
Sparsity: 124.9 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 55091200 | Self Similarity: -0.0022
Sparsity: 135.9 | Dead Features: 0 | Total Loss: 0.0825 | Reconstruction Loss: 0.0435 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 550

 49%|████▉     | 27005/55054 [12:25<13:01, 35.88it/s]

Sparsity: 24.1 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 55296000 | Self Similarity: -0.0079
Sparsity: 38.7 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 55296000 | Self Similarity: 0.0156
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 55296000 | Self Similarity: 0.0031
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 55296000 | Self Similarity: -0.0092
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 55296000 | Self Similarity: -0.0021
Sparsity: 135.7 | Dead Features: 0 | Total Loss: 0.0806 | Reconstruction Loss: 0.0413 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 552

 49%|████▉     | 27105/55054 [12:28<13:04, 35.63it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 55500800 | Self Similarity: -0.0079
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 55500800 | Self Similarity: 0.0155
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 55500800 | Self Similarity: 0.0029
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 55500800 | Self Similarity: -0.0094
Sparsity: 121.9 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 55500800 | Self Similarity: -0.0020
Sparsity: 125.0 | Dead Features: 0 | Total Loss: 0.0781 | Reconstruction Loss: 0.0397 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 555

 49%|████▉     | 27205/55054 [12:31<12:41, 36.56it/s]

Sparsity: 21.0 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 55705600 | Self Similarity: -0.0081
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 55705600 | Self Similarity: 0.0158
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 55705600 | Self Similarity: 0.0028
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 55705600 | Self Similarity: -0.0095
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 55705600 | Self Similarity: -0.0019
Sparsity: 136.0 | Dead Features: 0 | Total Loss: 0.0795 | Reconstruction Loss: 0.0408 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 557

 50%|████▉     | 27305/55054 [12:34<13:00, 35.55it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 55910400 | Self Similarity: -0.0076
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 55910400 | Self Similarity: 0.0155
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 55910400 | Self Similarity: 0.0032
Sparsity: 111.6 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 55910400 | Self Similarity: -0.0093
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 55910400 | Self Similarity: -0.0018
Sparsity: 128.6 | Dead Features: 0 | Total Loss: 0.0789 | Reconstruction Loss: 0.0410 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 559

 50%|████▉     | 27405/55054 [12:36<12:43, 36.19it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 56115200 | Self Similarity: -0.0079
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 56115200 | Self Similarity: 0.0153
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 56115200 | Self Similarity: 0.0030
Sparsity: 114.5 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 56115200 | Self Similarity: -0.0093
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 56115200 | Self Similarity: -0.0018
Sparsity: 136.3 | Dead Features: 0 | Total Loss: 0.0817 | Reconstruction Loss: 0.0411 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 561

 50%|████▉     | 27505/55054 [12:39<12:42, 36.11it/s]

Sparsity: 24.1 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 56320000 | Self Similarity: -0.0077
Sparsity: 40.3 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 56320000 | Self Similarity: 0.0155
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 56320000 | Self Similarity: 0.0028
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 56320000 | Self Similarity: -0.0093
Sparsity: 124.9 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 56320000 | Self Similarity: -0.0017
Sparsity: 128.7 | Dead Features: 0 | Total Loss: 0.0864 | Reconstruction Loss: 0.0471 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 563

 50%|█████     | 27605/55054 [12:42<12:45, 35.88it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 56524800 | Self Similarity: -0.0077
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 56524800 | Self Similarity: 0.0152
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 56524800 | Self Similarity: 0.0032
Sparsity: 123.4 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 56524800 | Self Similarity: -0.0092
Sparsity: 127.8 | Dead Features: 0 | Total Loss: 0.0535 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0277 | l1_alpha: 8.0000e-04 | Tokens: 56524800 | Self Similarity: -0.0019
Sparsity: 141.2 | Dead Features: 0 | Total Loss: 0.0927 | Reconstruction Loss: 0.0481 | L1 Loss: 0.0447 | l1_alpha: 8.0000e-04 | Tokens: 565

 50%|█████     | 27705/55054 [12:45<12:46, 35.67it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 56729600 | Self Similarity: -0.0078
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 56729600 | Self Similarity: 0.0152
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 56729600 | Self Similarity: 0.0030
Sparsity: 114.1 | Dead Features: 0 | Total Loss: 0.0372 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 56729600 | Self Similarity: -0.0089
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0230 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 56729600 | Self Similarity: -0.0019
Sparsity: 133.5 | Dead Features: 0 | Total Loss: 0.0780 | Reconstruction Loss: 0.0391 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 567

 51%|█████     | 27808/55054 [12:47<11:03, 41.06it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 56934400 | Self Similarity: -0.0077
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 56934400 | Self Similarity: 0.0152
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 56934400 | Self Similarity: 0.0027
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0377 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 56934400 | Self Similarity: -0.0091
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0466 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 56934400 | Self Similarity: -0.0020
Sparsity: 134.2 | Dead Features: 0 | Total Loss: 0.0770 | Reconstruction Loss: 0.0388 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 569

 51%|█████     | 27905/55054 [12:50<12:32, 36.08it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 57139200 | Self Similarity: -0.0075
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 57139200 | Self Similarity: 0.0158
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 57139200 | Self Similarity: 0.0026
Sparsity: 114.8 | Dead Features: 0 | Total Loss: 0.0375 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 57139200 | Self Similarity: -0.0092
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0465 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 57139200 | Self Similarity: -0.0023
Sparsity: 130.6 | Dead Features: 0 | Total Loss: 0.0759 | Reconstruction Loss: 0.0387 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 571

 51%|█████     | 28006/55054 [12:53<12:31, 35.99it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 57344000 | Self Similarity: -0.0074
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 57344000 | Self Similarity: 0.0155
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 57344000 | Self Similarity: 0.0026
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 57344000 | Self Similarity: -0.0094
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0261 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 57344000 | Self Similarity: -0.0021
Sparsity: 135.1 | Dead Features: 0 | Total Loss: 0.0833 | Reconstruction Loss: 0.0426 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 573

 51%|█████     | 28106/55054 [12:56<12:26, 36.09it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 57548800 | Self Similarity: -0.0074
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 57548800 | Self Similarity: 0.0156
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 57548800 | Self Similarity: 0.0024
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 57548800 | Self Similarity: -0.0095
Sparsity: 123.8 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 57548800 | Self Similarity: -0.0021
Sparsity: 140.9 | Dead Features: 0 | Total Loss: 0.0817 | Reconstruction Loss: 0.0415 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 575

 51%|█████     | 28206/55054 [12:58<12:29, 35.81it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 57753600 | Self Similarity: -0.0074
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 57753600 | Self Similarity: 0.0155
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 57753600 | Self Similarity: 0.0026
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 57753600 | Self Similarity: -0.0095
Sparsity: 123.1 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 57753600 | Self Similarity: -0.0021
Sparsity: 140.5 | Dead Features: 0 | Total Loss: 0.0807 | Reconstruction Loss: 0.0408 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 577

 51%|█████▏    | 28306/55054 [13:01<12:24, 35.92it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 57958400 | Self Similarity: -0.0073
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 57958400 | Self Similarity: 0.0154
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 57958400 | Self Similarity: 0.0029
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 57958400 | Self Similarity: -0.0094
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 57958400 | Self Similarity: -0.0020
Sparsity: 142.1 | Dead Features: 0 | Total Loss: 0.0835 | Reconstruction Loss: 0.0421 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 579

 52%|█████▏    | 28406/55054 [13:04<12:25, 35.77it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 58163200 | Self Similarity: -0.0071
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 58163200 | Self Similarity: 0.0154
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 58163200 | Self Similarity: 0.0025
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 58163200 | Self Similarity: -0.0093
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0502 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 58163200 | Self Similarity: -0.0021
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0827 | Reconstruction Loss: 0.0448 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 581

 52%|█████▏    | 28506/55054 [13:07<12:10, 36.33it/s]

Sparsity: 23.9 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 58368000 | Self Similarity: -0.0070
Sparsity: 39.4 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 58368000 | Self Similarity: 0.0151
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 58368000 | Self Similarity: 0.0026
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 58368000 | Self Similarity: -0.0093
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0509 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 58368000 | Self Similarity: -0.0023
Sparsity: 134.5 | Dead Features: 0 | Total Loss: 0.0850 | Reconstruction Loss: 0.0454 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 583

 52%|█████▏    | 28606/55054 [13:09<12:12, 36.11it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 58572800 | Self Similarity: -0.0070
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 58572800 | Self Similarity: 0.0150
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 58572800 | Self Similarity: 0.0025
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 58572800 | Self Similarity: -0.0090
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 58572800 | Self Similarity: -0.0019
Sparsity: 135.3 | Dead Features: 0 | Total Loss: 0.0827 | Reconstruction Loss: 0.0430 | L1 Loss: 0.0398 | l1_alpha: 8.0000e-04 | Tokens: 585

 52%|█████▏    | 28706/55054 [13:12<12:02, 36.49it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 58777600 | Self Similarity: -0.0068
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 58777600 | Self Similarity: 0.0154
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 58777600 | Self Similarity: 0.0023
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 58777600 | Self Similarity: -0.0089
Sparsity: 123.7 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 58777600 | Self Similarity: -0.0016
Sparsity: 136.7 | Dead Features: 0 | Total Loss: 0.0786 | Reconstruction Loss: 0.0391 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 587

 52%|█████▏    | 28806/55054 [13:15<12:05, 36.18it/s]

Sparsity: 18.9 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 58982400 | Self Similarity: -0.0070
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 58982400 | Self Similarity: 0.0154
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 58982400 | Self Similarity: 0.0024
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 58982400 | Self Similarity: -0.0089
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 58982400 | Self Similarity: -0.0018
Sparsity: 140.5 | Dead Features: 0 | Total Loss: 0.0799 | Reconstruction Loss: 0.0400 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 589

 53%|█████▎    | 28906/55054 [13:18<12:01, 36.22it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 59187200 | Self Similarity: -0.0070
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 59187200 | Self Similarity: 0.0152
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 59187200 | Self Similarity: 0.0025
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 59187200 | Self Similarity: -0.0091
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 59187200 | Self Similarity: -0.0019
Sparsity: 135.2 | Dead Features: 0 | Total Loss: 0.0773 | Reconstruction Loss: 0.0386 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 591

 53%|█████▎    | 29008/55054 [13:21<10:48, 40.16it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 59392000 | Self Similarity: -0.0070
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 59392000 | Self Similarity: 0.0153
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.0188 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 59392000 | Self Similarity: 0.0024
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 59392000 | Self Similarity: -0.0096
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 59392000 | Self Similarity: -0.0017
Sparsity: 133.6 | Dead Features: 0 | Total Loss: 0.0751 | Reconstruction Loss: 0.0376 | L1 Loss: 0.0375 | l1_alpha: 8.0000e-04 | Tokens: 593

 53%|█████▎    | 29107/55054 [13:23<11:07, 38.90it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 59596800 | Self Similarity: -0.0074
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 59596800 | Self Similarity: 0.0150
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 59596800 | Self Similarity: 0.0023
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 59596800 | Self Similarity: -0.0093
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 59596800 | Self Similarity: -0.0018
Sparsity: 135.8 | Dead Features: 0 | Total Loss: 0.0805 | Reconstruction Loss: 0.0405 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 595

 53%|█████▎    | 29207/55054 [13:26<11:53, 36.22it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 59801600 | Self Similarity: -0.0070
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 59801600 | Self Similarity: 0.0148
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 59801600 | Self Similarity: 0.0021
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 59801600 | Self Similarity: -0.0095
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 59801600 | Self Similarity: -0.0022
Sparsity: 133.2 | Dead Features: 0 | Total Loss: 0.0781 | Reconstruction Loss: 0.0397 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 598

 53%|█████▎    | 29307/55054 [13:29<11:44, 36.54it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 60006400 | Self Similarity: -0.0071
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 60006400 | Self Similarity: 0.0151
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 60006400 | Self Similarity: 0.0020
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 60006400 | Self Similarity: -0.0096
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 60006400 | Self Similarity: -0.0018
Sparsity: 138.0 | Dead Features: 0 | Total Loss: 0.0791 | Reconstruction Loss: 0.0398 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 600

 53%|█████▎    | 29407/55054 [13:31<11:56, 35.81it/s]

Sparsity: 24.1 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 60211200 | Self Similarity: -0.0071
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 60211200 | Self Similarity: 0.0152
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 60211200 | Self Similarity: 0.0023
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 60211200 | Self Similarity: -0.0097
Sparsity: 125.0 | Dead Features: 0 | Total Loss: 0.0513 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 60211200 | Self Similarity: -0.0019
Sparsity: 109.1 | Dead Features: 0 | Total Loss: 0.0899 | Reconstruction Loss: 0.0533 | L1 Loss: 0.0367 | l1_alpha: 8.0000e-04 | Tokens: 602

 54%|█████▎    | 29507/55054 [13:34<11:57, 35.60it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 60416000 | Self Similarity: -0.0074
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 60416000 | Self Similarity: 0.0152
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 60416000 | Self Similarity: 0.0023
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 60416000 | Self Similarity: -0.0095
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 60416000 | Self Similarity: -0.0020
Sparsity: 124.1 | Dead Features: 0 | Total Loss: 0.0772 | Reconstruction Loss: 0.0400 | L1 Loss: 0.0371 | l1_alpha: 8.0000e-04 | Tokens: 604

 54%|█████▍    | 29607/55054 [13:37<11:26, 37.06it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 60620800 | Self Similarity: -0.0072
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 60620800 | Self Similarity: 0.0150
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 60620800 | Self Similarity: 0.0023
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 60620800 | Self Similarity: -0.0094
Sparsity: 125.3 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 60620800 | Self Similarity: -0.0019
Sparsity: 133.1 | Dead Features: 0 | Total Loss: 0.0808 | Reconstruction Loss: 0.0415 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 606

 54%|█████▍    | 29705/55054 [13:39<11:38, 36.30it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 60825600 | Self Similarity: -0.0070
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 60825600 | Self Similarity: 0.0151
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 60825600 | Self Similarity: 0.0022
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 60825600 | Self Similarity: -0.0095
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 60825600 | Self Similarity: -0.0020
Sparsity: 136.3 | Dead Features: 0 | Total Loss: 0.0791 | Reconstruction Loss: 0.0398 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 608

 54%|█████▍    | 29805/55054 [13:42<11:42, 35.94it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 61030400 | Self Similarity: -0.0067
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 61030400 | Self Similarity: 0.0151
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 61030400 | Self Similarity: 0.0024
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 61030400 | Self Similarity: -0.0095
Sparsity: 127.4 | Dead Features: 0 | Total Loss: 0.0511 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 61030400 | Self Similarity: -0.0021
Sparsity: 140.0 | Dead Features: 0 | Total Loss: 0.0824 | Reconstruction Loss: 0.0416 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 610

 54%|█████▍    | 29905/55054 [13:45<11:46, 35.60it/s]

Sparsity: 25.8 | Dead Features: 0 | Total Loss: 0.0161 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 61235200 | Self Similarity: -0.0066
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 61235200 | Self Similarity: 0.0149
Sparsity: 50.2 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 61235200 | Self Similarity: 0.0027
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 61235200 | Self Similarity: -0.0094
Sparsity: 126.3 | Dead Features: 0 | Total Loss: 0.0509 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 61235200 | Self Similarity: -0.0017
Sparsity: 138.4 | Dead Features: 0 | Total Loss: 0.0876 | Reconstruction Loss: 0.0422 | L1 Loss: 0.0454 | l1_alpha: 8.0000e-04 | Tokens: 612

 55%|█████▍    | 30005/55054 [13:48<11:30, 36.25it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 61440000 | Self Similarity: -0.0066
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 61440000 | Self Similarity: 0.0148
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 61440000 | Self Similarity: 0.0023
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 61440000 | Self Similarity: -0.0095
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 61440000 | Self Similarity: -0.0015
Sparsity: 137.5 | Dead Features: 0 | Total Loss: 0.0784 | Reconstruction Loss: 0.0395 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 614

 55%|█████▍    | 30105/55054 [13:50<11:36, 35.84it/s]

Sparsity: 24.2 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 61644800 | Self Similarity: -0.0070
Sparsity: 40.2 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 61644800 | Self Similarity: 0.0149
Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0216 | Reconstruction Loss: 0.0133 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 61644800 | Self Similarity: 0.0024
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 61644800 | Self Similarity: -0.0096
Sparsity: 124.9 | Dead Features: 0 | Total Loss: 0.0525 | Reconstruction Loss: 0.0270 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 61644800 | Self Similarity: -0.0016
Sparsity: 141.0 | Dead Features: 0 | Total Loss: 0.0846 | Reconstruction Loss: 0.0432 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 616

 55%|█████▍    | 30205/55054 [13:53<11:25, 36.22it/s]

Sparsity: 18.5 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0062 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 61849600 | Self Similarity: -0.0069
Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 61849600 | Self Similarity: 0.0148
Sparsity: 46.4 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 61849600 | Self Similarity: 0.0025
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 61849600 | Self Similarity: -0.0096
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 61849600 | Self Similarity: -0.0016
Sparsity: 146.6 | Dead Features: 0 | Total Loss: 0.1230 | Reconstruction Loss: 0.0806 | L1 Loss: 0.0424 | l1_alpha: 8.0000e-04 | Tokens: 618

 55%|█████▌    | 30305/55054 [13:56<11:38, 35.42it/s]

Sparsity: 26.5 | Dead Features: 0 | Total Loss: 0.0161 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 62054400 | Self Similarity: -0.0071
Sparsity: 41.4 | Dead Features: 0 | Total Loss: 0.0160 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 62054400 | Self Similarity: 0.0147
Sparsity: 52.3 | Dead Features: 0 | Total Loss: 0.0215 | Reconstruction Loss: 0.0132 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 62054400 | Self Similarity: 0.0022
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 62054400 | Self Similarity: -0.0096
Sparsity: 128.5 | Dead Features: 0 | Total Loss: 0.0527 | Reconstruction Loss: 0.0264 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 62054400 | Self Similarity: -0.0017
Sparsity: 129.2 | Dead Features: 0 | Total Loss: 0.0862 | Reconstruction Loss: 0.0457 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens: 620

 55%|█████▌    | 30405/55054 [13:59<11:31, 35.63it/s]

Sparsity: 23.4 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 62259200 | Self Similarity: -0.0075
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 62259200 | Self Similarity: 0.0146
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 62259200 | Self Similarity: 0.0020
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 62259200 | Self Similarity: -0.0095
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 62259200 | Self Similarity: -0.0017
Sparsity: 130.2 | Dead Features: 0 | Total Loss: 0.0779 | Reconstruction Loss: 0.0395 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 622

 55%|█████▌    | 30505/55054 [14:02<11:24, 35.87it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 62464000 | Self Similarity: -0.0072
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 62464000 | Self Similarity: 0.0147
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 62464000 | Self Similarity: 0.0020
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 62464000 | Self Similarity: -0.0097
Sparsity: 123.3 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 62464000 | Self Similarity: -0.0018
Sparsity: 131.3 | Dead Features: 0 | Total Loss: 0.0801 | Reconstruction Loss: 0.0426 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens: 624

 56%|█████▌    | 30605/55054 [14:04<11:19, 35.98it/s]

Sparsity: 19.4 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 62668800 | Self Similarity: -0.0073
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 62668800 | Self Similarity: 0.0145
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 62668800 | Self Similarity: 0.0022
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 62668800 | Self Similarity: -0.0097
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 62668800 | Self Similarity: -0.0018
Sparsity: 139.1 | Dead Features: 0 | Total Loss: 0.0796 | Reconstruction Loss: 0.0398 | L1 Loss: 0.0398 | l1_alpha: 8.0000e-04 | Tokens: 626

 56%|█████▌    | 30705/55054 [14:07<11:15, 36.06it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 62873600 | Self Similarity: -0.0072
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 62873600 | Self Similarity: 0.0148
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 62873600 | Self Similarity: 0.0024
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 62873600 | Self Similarity: -0.0094
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 62873600 | Self Similarity: -0.0017
Sparsity: 137.3 | Dead Features: 0 | Total Loss: 0.0777 | Reconstruction Loss: 0.0384 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 628

 56%|█████▌    | 30805/55054 [14:10<11:13, 36.00it/s]

Sparsity: 23.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 63078400 | Self Similarity: -0.0070
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 63078400 | Self Similarity: 0.0147
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 63078400 | Self Similarity: 0.0023
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 63078400 | Self Similarity: -0.0094
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 63078400 | Self Similarity: -0.0016
Sparsity: 139.8 | Dead Features: 0 | Total Loss: 0.0776 | Reconstruction Loss: 0.0386 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 630

 56%|█████▌    | 30906/55054 [14:13<11:00, 36.56it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 63283200 | Self Similarity: -0.0070
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 63283200 | Self Similarity: 0.0146
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 63283200 | Self Similarity: 0.0022
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 63283200 | Self Similarity: -0.0094
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 63283200 | Self Similarity: -0.0017
Sparsity: 140.7 | Dead Features: 0 | Total Loss: 0.0773 | Reconstruction Loss: 0.0382 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 632

 56%|█████▋    | 31006/55054 [14:15<11:22, 35.23it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 63488000 | Self Similarity: -0.0068
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 63488000 | Self Similarity: 0.0148
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 63488000 | Self Similarity: 0.0023
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 63488000 | Self Similarity: -0.0091
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 63488000 | Self Similarity: -0.0018
Sparsity: 137.2 | Dead Features: 0 | Total Loss: 0.0750 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 634

 57%|█████▋    | 31106/55054 [14:18<11:13, 35.56it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 63692800 | Self Similarity: -0.0071
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 63692800 | Self Similarity: 0.0146
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 63692800 | Self Similarity: 0.0025
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 63692800 | Self Similarity: -0.0093
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 63692800 | Self Similarity: -0.0017
Sparsity: 142.3 | Dead Features: 0 | Total Loss: 0.0769 | Reconstruction Loss: 0.0388 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 636

 57%|█████▋    | 31206/55054 [14:21<10:57, 36.29it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 63897600 | Self Similarity: -0.0070
Sparsity: 39.4 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 63897600 | Self Similarity: 0.0148
Sparsity: 50.2 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 63897600 | Self Similarity: 0.0025
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 63897600 | Self Similarity: -0.0091
Sparsity: 123.6 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 63897600 | Self Similarity: -0.0018
Sparsity: 141.5 | Dead Features: 0 | Total Loss: 0.0787 | Reconstruction Loss: 0.0391 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 638

 57%|█████▋    | 31306/55054 [14:24<11:02, 35.84it/s]

Sparsity: 27.0 | Dead Features: 0 | Total Loss: 0.0161 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0068 | l1_alpha: 8.0000e-04 | Tokens: 64102400 | Self Similarity: -0.0068
Sparsity: 43.2 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 64102400 | Self Similarity: 0.0150
Sparsity: 56.1 | Dead Features: 0 | Total Loss: 0.0230 | Reconstruction Loss: 0.0144 | L1 Loss: 0.0086 | l1_alpha: 8.0000e-04 | Tokens: 64102400 | Self Similarity: 0.0023
Sparsity: 126.2 | Dead Features: 0 | Total Loss: 0.0435 | Reconstruction Loss: 0.0214 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 64102400 | Self Similarity: -0.0094
Sparsity: 134.4 | Dead Features: 0 | Total Loss: 0.0572 | Reconstruction Loss: 0.0299 | L1 Loss: 0.0273 | l1_alpha: 8.0000e-04 | Tokens: 64102400 | Self Similarity: -0.0018
Sparsity: 147.5 | Dead Features: 0 | Total Loss: 0.0900 | Reconstruction Loss: 0.0466 | L1 Loss: 0.0434 | l1_alpha: 8.0000e-04 | Tokens: 641

 57%|█████▋    | 31406/55054 [14:27<10:55, 36.07it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 64307200 | Self Similarity: -0.0066
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 64307200 | Self Similarity: 0.0149
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 64307200 | Self Similarity: 0.0022
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 64307200 | Self Similarity: -0.0093
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 64307200 | Self Similarity: -0.0020
Sparsity: 137.6 | Dead Features: 0 | Total Loss: 0.0756 | Reconstruction Loss: 0.0376 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 643

 57%|█████▋    | 31506/55054 [14:29<10:53, 36.06it/s]

Sparsity: 23.1 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 64512000 | Self Similarity: -0.0065
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 64512000 | Self Similarity: 0.0150
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 64512000 | Self Similarity: 0.0020
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 64512000 | Self Similarity: -0.0092
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 64512000 | Self Similarity: -0.0021
Sparsity: 140.7 | Dead Features: 0 | Total Loss: 0.0765 | Reconstruction Loss: 0.0376 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 645

 57%|█████▋    | 31606/55054 [14:32<11:08, 35.10it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 64716800 | Self Similarity: -0.0066
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 64716800 | Self Similarity: 0.0148
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 64716800 | Self Similarity: 0.0020
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 64716800 | Self Similarity: -0.0094
Sparsity: 123.4 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 64716800 | Self Similarity: -0.0018
Sparsity: 142.8 | Dead Features: 0 | Total Loss: 0.0782 | Reconstruction Loss: 0.0387 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 647

 58%|█████▊    | 31706/55054 [14:35<10:51, 35.84it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 64921600 | Self Similarity: -0.0066
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 64921600 | Self Similarity: 0.0146
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 64921600 | Self Similarity: 0.0027
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 64921600 | Self Similarity: -0.0091
Sparsity: 124.0 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 64921600 | Self Similarity: -0.0018
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0782 | Reconstruction Loss: 0.0383 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 649

 58%|█████▊    | 31806/55054 [14:38<10:55, 35.47it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 65126400 | Self Similarity: -0.0066
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 65126400 | Self Similarity: 0.0145
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 65126400 | Self Similarity: 0.0026
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 65126400 | Self Similarity: -0.0091
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 65126400 | Self Similarity: -0.0021
Sparsity: 137.1 | Dead Features: 0 | Total Loss: 0.0753 | Reconstruction Loss: 0.0376 | L1 Loss: 0.0377 | l1_alpha: 8.0000e-04 | Tokens: 651

 58%|█████▊    | 31906/55054 [14:40<10:22, 37.18it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 65331200 | Self Similarity: -0.0066
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 65331200 | Self Similarity: 0.0144
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 65331200 | Self Similarity: 0.0028
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 65331200 | Self Similarity: -0.0091
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 65331200 | Self Similarity: -0.0020
Sparsity: 143.5 | Dead Features: 0 | Total Loss: 0.0795 | Reconstruction Loss: 0.0391 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 653

 58%|█████▊    | 32006/55054 [14:43<10:55, 35.19it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 65536000 | Self Similarity: -0.0064
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 65536000 | Self Similarity: 0.0145
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 65536000 | Self Similarity: 0.0025
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 65536000 | Self Similarity: -0.0092
Sparsity: 125.2 | Dead Features: 0 | Total Loss: 0.0509 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0260 | l1_alpha: 8.0000e-04 | Tokens: 65536000 | Self Similarity: -0.0021
Sparsity: 147.1 | Dead Features: 0 | Total Loss: 0.0809 | Reconstruction Loss: 0.0393 | L1 Loss: 0.0416 | l1_alpha: 8.0000e-04 | Tokens: 655

 58%|█████▊    | 32104/55054 [14:46<10:22, 36.88it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 65740800 | Self Similarity: -0.0062
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 65740800 | Self Similarity: 0.0145
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 65740800 | Self Similarity: 0.0023
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 65740800 | Self Similarity: -0.0092
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 65740800 | Self Similarity: -0.0018
Sparsity: 134.5 | Dead Features: 0 | Total Loss: 0.0770 | Reconstruction Loss: 0.0382 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 657

 58%|█████▊    | 32204/55054 [14:49<10:32, 36.14it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 65945600 | Self Similarity: -0.0063
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0160 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 65945600 | Self Similarity: 0.0143
Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0211 | Reconstruction Loss: 0.0132 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 65945600 | Self Similarity: 0.0021
Sparsity: 122.6 | Dead Features: 0 | Total Loss: 0.0424 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 65945600 | Self Similarity: -0.0095
Sparsity: 130.5 | Dead Features: 0 | Total Loss: 0.0551 | Reconstruction Loss: 0.0272 | L1 Loss: 0.0279 | l1_alpha: 8.0000e-04 | Tokens: 65945600 | Self Similarity: -0.0019
Sparsity: 141.4 | Dead Features: 0 | Total Loss: 0.0849 | Reconstruction Loss: 0.0436 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 659

 59%|█████▊    | 32304/55054 [14:51<10:29, 36.14it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 66150400 | Self Similarity: -0.0063
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 66150400 | Self Similarity: 0.0144
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 66150400 | Self Similarity: 0.0030
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 66150400 | Self Similarity: -0.0095
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 66150400 | Self Similarity: -0.0022
Sparsity: 142.7 | Dead Features: 0 | Total Loss: 0.0770 | Reconstruction Loss: 0.0384 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 661

 59%|█████▉    | 32404/55054 [14:54<10:37, 35.54it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 66355200 | Self Similarity: -0.0063
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 66355200 | Self Similarity: 0.0144
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 66355200 | Self Similarity: 0.0027
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 66355200 | Self Similarity: -0.0094
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 66355200 | Self Similarity: -0.0022
Sparsity: 138.3 | Dead Features: 0 | Total Loss: 0.0762 | Reconstruction Loss: 0.0372 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 663

 59%|█████▉    | 32506/55054 [14:57<10:16, 36.55it/s]

Sparsity: 23.7 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 66560000 | Self Similarity: -0.0062
Sparsity: 39.2 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 66560000 | Self Similarity: 0.0141
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 66560000 | Self Similarity: 0.0029
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 66560000 | Self Similarity: -0.0094
Sparsity: 125.4 | Dead Features: 0 | Total Loss: 0.0509 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 66560000 | Self Similarity: -0.0023
Sparsity: 143.1 | Dead Features: 0 | Total Loss: 0.0790 | Reconstruction Loss: 0.0395 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 665

 59%|█████▉    | 32606/55054 [15:00<10:24, 35.95it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 66764800 | Self Similarity: -0.0062
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 66764800 | Self Similarity: 0.0143
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 66764800 | Self Similarity: 0.0026
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 66764800 | Self Similarity: -0.0093
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 66764800 | Self Similarity: -0.0021
Sparsity: 132.7 | Dead Features: 0 | Total Loss: 0.0761 | Reconstruction Loss: 0.0386 | L1 Loss: 0.0375 | l1_alpha: 8.0000e-04 | Tokens: 667

 59%|█████▉    | 32706/55054 [15:03<10:21, 35.94it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 66969600 | Self Similarity: -0.0062
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 66969600 | Self Similarity: 0.0142
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 66969600 | Self Similarity: 0.0025
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 66969600 | Self Similarity: -0.0094
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 66969600 | Self Similarity: -0.0019
Sparsity: 147.1 | Dead Features: 0 | Total Loss: 0.0818 | Reconstruction Loss: 0.0418 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 669

 60%|█████▉    | 32806/55054 [15:05<10:18, 35.98it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 67174400 | Self Similarity: -0.0061
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 67174400 | Self Similarity: 0.0142
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 67174400 | Self Similarity: 0.0024
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 67174400 | Self Similarity: -0.0093
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 67174400 | Self Similarity: -0.0019
Sparsity: 142.4 | Dead Features: 0 | Total Loss: 0.0780 | Reconstruction Loss: 0.0378 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens: 671

 60%|█████▉    | 32906/55054 [15:08<10:22, 35.56it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 67379200 | Self Similarity: -0.0060
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 67379200 | Self Similarity: 0.0143
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 67379200 | Self Similarity: 0.0022
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 67379200 | Self Similarity: -0.0094
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 67379200 | Self Similarity: -0.0022
Sparsity: 134.8 | Dead Features: 0 | Total Loss: 0.0747 | Reconstruction Loss: 0.0383 | L1 Loss: 0.0364 | l1_alpha: 8.0000e-04 | Tokens: 673

 60%|█████▉    | 33006/55054 [15:11<10:15, 35.84it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 67584000 | Self Similarity: -0.0061
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 67584000 | Self Similarity: 0.0141
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 67584000 | Self Similarity: 0.0019
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 67584000 | Self Similarity: -0.0097
Sparsity: 125.9 | Dead Features: 0 | Total Loss: 0.0509 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 67584000 | Self Similarity: -0.0023
Sparsity: 142.3 | Dead Features: 0 | Total Loss: 0.0815 | Reconstruction Loss: 0.0409 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens: 675

 60%|██████    | 33106/55054 [15:14<10:07, 36.10it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 67788800 | Self Similarity: -0.0057
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 67788800 | Self Similarity: 0.0144
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 67788800 | Self Similarity: 0.0021
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 67788800 | Self Similarity: -0.0096
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 67788800 | Self Similarity: -0.0022
Sparsity: 141.4 | Dead Features: 0 | Total Loss: 0.0763 | Reconstruction Loss: 0.0371 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 677

 60%|██████    | 33206/55054 [15:17<10:26, 34.90it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 67993600 | Self Similarity: -0.0057
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 67993600 | Self Similarity: 0.0142
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 67993600 | Self Similarity: 0.0020
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 67993600 | Self Similarity: -0.0095
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 67993600 | Self Similarity: -0.0023
Sparsity: 142.3 | Dead Features: 0 | Total Loss: 0.0748 | Reconstruction Loss: 0.0363 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 679

 60%|██████    | 33306/55054 [15:19<10:23, 34.89it/s]

Sparsity: 23.7 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 68198400 | Self Similarity: -0.0056
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 68198400 | Self Similarity: 0.0141
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 68198400 | Self Similarity: 0.0020
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 68198400 | Self Similarity: -0.0091
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 68198400 | Self Similarity: -0.0019
Sparsity: 142.1 | Dead Features: 0 | Total Loss: 0.0765 | Reconstruction Loss: 0.0380 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 681

 61%|██████    | 33406/55054 [15:22<10:23, 34.72it/s]

Sparsity: 18.5 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 68403200 | Self Similarity: -0.0056
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 68403200 | Self Similarity: 0.0143
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 68403200 | Self Similarity: 0.0018
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 68403200 | Self Similarity: -0.0092
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 68403200 | Self Similarity: -0.0025
Sparsity: 132.6 | Dead Features: 0 | Total Loss: 0.0764 | Reconstruction Loss: 0.0381 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 684

 61%|██████    | 33506/55054 [15:25<10:21, 34.68it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 68608000 | Self Similarity: -0.0059
Sparsity: 38.8 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 68608000 | Self Similarity: 0.0141
Sparsity: 50.5 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 68608000 | Self Similarity: 0.0021
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 68608000 | Self Similarity: -0.0097
Sparsity: 126.5 | Dead Features: 0 | Total Loss: 0.0514 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 68608000 | Self Similarity: -0.0023
Sparsity: 140.8 | Dead Features: 0 | Total Loss: 0.0786 | Reconstruction Loss: 0.0399 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 686

 61%|██████    | 33606/55054 [15:28<10:25, 34.30it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 68812800 | Self Similarity: -0.0056
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 68812800 | Self Similarity: 0.0142
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 68812800 | Self Similarity: 0.0021
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 68812800 | Self Similarity: -0.0091
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 68812800 | Self Similarity: -0.0023
Sparsity: 141.5 | Dead Features: 0 | Total Loss: 0.0754 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 688

 61%|██████    | 33706/55054 [15:31<10:10, 34.98it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 69017600 | Self Similarity: -0.0054
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 69017600 | Self Similarity: 0.0143
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 69017600 | Self Similarity: 0.0022
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 69017600 | Self Similarity: -0.0095
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 69017600 | Self Similarity: -0.0022
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0791 | Reconstruction Loss: 0.0388 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 690

 61%|██████▏   | 33806/55054 [15:34<10:05, 35.09it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 69222400 | Self Similarity: -0.0053
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 69222400 | Self Similarity: 0.0145
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 69222400 | Self Similarity: 0.0024
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 69222400 | Self Similarity: -0.0092
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0504 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 69222400 | Self Similarity: -0.0021
Sparsity: 147.5 | Dead Features: 0 | Total Loss: 0.0794 | Reconstruction Loss: 0.0389 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens: 692

 62%|██████▏   | 33906/55054 [15:37<10:06, 34.85it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 69427200 | Self Similarity: -0.0052
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 69427200 | Self Similarity: 0.0145
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 69427200 | Self Similarity: 0.0022
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 69427200 | Self Similarity: -0.0091
Sparsity: 122.7 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 69427200 | Self Similarity: -0.0020
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0753 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 694

 62%|██████▏   | 34006/55054 [15:39<08:35, 40.83it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 69632000 | Self Similarity: -0.0052
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 69632000 | Self Similarity: 0.0143
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 69632000 | Self Similarity: 0.0020
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 69632000 | Self Similarity: -0.0092
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0501 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 69632000 | Self Similarity: -0.0022
Sparsity: 145.9 | Dead Features: 0 | Total Loss: 0.0776 | Reconstruction Loss: 0.0376 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens: 696

 62%|██████▏   | 34107/55054 [15:42<09:45, 35.76it/s]

Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 69836800 | Self Similarity: -0.0050
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 69836800 | Self Similarity: 0.0141
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 69836800 | Self Similarity: 0.0019
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 69836800 | Self Similarity: -0.0090
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 69836800 | Self Similarity: -0.0021
Sparsity: 144.5 | Dead Features: 0 | Total Loss: 0.0782 | Reconstruction Loss: 0.0385 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 698

 62%|██████▏   | 34207/55054 [15:45<09:57, 34.87it/s]

Sparsity: 19.1 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 70041600 | Self Similarity: -0.0050
Sparsity: 33.9 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 70041600 | Self Similarity: 0.0138
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 70041600 | Self Similarity: 0.0018
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 70041600 | Self Similarity: -0.0091
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 70041600 | Self Similarity: -0.0021
Sparsity: 134.7 | Dead Features: 0 | Total Loss: 0.0767 | Reconstruction Loss: 0.0393 | L1 Loss: 0.0374 | l1_alpha: 8.0000e-04 | Tokens: 700

 62%|██████▏   | 34307/55054 [15:48<09:39, 35.77it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 70246400 | Self Similarity: -0.0048
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 70246400 | Self Similarity: 0.0142
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0209 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 70246400 | Self Similarity: 0.0017
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 70246400 | Self Similarity: -0.0092
Sparsity: 123.6 | Dead Features: 0 | Total Loss: 0.0519 | Reconstruction Loss: 0.0266 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 70246400 | Self Similarity: -0.0015
Sparsity: 146.0 | Dead Features: 0 | Total Loss: 0.0815 | Reconstruction Loss: 0.0404 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 702

 62%|██████▏   | 34404/55054 [15:50<09:34, 35.96it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 70451200 | Self Similarity: -0.0047
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 70451200 | Self Similarity: 0.0139
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 70451200 | Self Similarity: 0.0020
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 70451200 | Self Similarity: -0.0087
Sparsity: 123.2 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 70451200 | Self Similarity: -0.0018
Sparsity: 141.8 | Dead Features: 0 | Total Loss: 0.0750 | Reconstruction Loss: 0.0362 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 704

 63%|██████▎   | 34505/55054 [15:53<09:27, 36.20it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 70656000 | Self Similarity: -0.0046
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 70656000 | Self Similarity: 0.0139
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 70656000 | Self Similarity: 0.0022
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 70656000 | Self Similarity: -0.0087
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 70656000 | Self Similarity: -0.0016
Sparsity: 144.7 | Dead Features: 0 | Total Loss: 0.0786 | Reconstruction Loss: 0.0398 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 706

 63%|██████▎   | 34605/55054 [15:56<09:31, 35.76it/s]

Sparsity: 18.8 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 70860800 | Self Similarity: -0.0045
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 70860800 | Self Similarity: 0.0140
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 70860800 | Self Similarity: 0.0022
Sparsity: 113.2 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 70860800 | Self Similarity: -0.0084
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 70860800 | Self Similarity: -0.0020
Sparsity: 142.0 | Dead Features: 0 | Total Loss: 0.0763 | Reconstruction Loss: 0.0367 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 708

 63%|██████▎   | 34707/55054 [15:59<09:24, 36.06it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 71065600 | Self Similarity: -0.0043
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 71065600 | Self Similarity: 0.0138
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 71065600 | Self Similarity: 0.0020
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 71065600 | Self Similarity: -0.0086
Sparsity: 123.0 | Dead Features: 0 | Total Loss: 0.0498 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 71065600 | Self Similarity: -0.0021
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0798 | Reconstruction Loss: 0.0396 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 710

 63%|██████▎   | 34807/55054 [16:02<09:26, 35.72it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 71270400 | Self Similarity: -0.0043
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 71270400 | Self Similarity: 0.0140
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 71270400 | Self Similarity: 0.0020
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 71270400 | Self Similarity: -0.0085
Sparsity: 125.2 | Dead Features: 0 | Total Loss: 0.0520 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0261 | l1_alpha: 8.0000e-04 | Tokens: 71270400 | Self Similarity: -0.0019
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0832 | Reconstruction Loss: 0.0413 | L1 Loss: 0.0419 | l1_alpha: 8.0000e-04 | Tokens: 712

 63%|██████▎   | 34907/55054 [16:04<09:25, 35.66it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 71475200 | Self Similarity: -0.0044
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 71475200 | Self Similarity: 0.0142
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 71475200 | Self Similarity: 0.0023
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 71475200 | Self Similarity: -0.0086
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 71475200 | Self Similarity: -0.0019
Sparsity: 140.9 | Dead Features: 0 | Total Loss: 0.0748 | Reconstruction Loss: 0.0368 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 714

 64%|██████▎   | 35007/55054 [16:07<09:11, 36.33it/s]

Sparsity: 24.1 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 71680000 | Self Similarity: -0.0044
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 71680000 | Self Similarity: 0.0139
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 71680000 | Self Similarity: 0.0021
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 71680000 | Self Similarity: -0.0086
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 71680000 | Self Similarity: -0.0018
Sparsity: 143.6 | Dead Features: 0 | Total Loss: 0.0745 | Reconstruction Loss: 0.0360 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 716

 64%|██████▍   | 35105/55054 [16:10<09:11, 36.18it/s]

Sparsity: 18.9 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 71884800 | Self Similarity: -0.0043
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 71884800 | Self Similarity: 0.0142
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 71884800 | Self Similarity: 0.0019
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 71884800 | Self Similarity: -0.0088
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 71884800 | Self Similarity: -0.0019
Sparsity: 143.4 | Dead Features: 0 | Total Loss: 0.0898 | Reconstruction Loss: 0.0519 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 718

 64%|██████▍   | 35205/55054 [16:12<09:20, 35.39it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 72089600 | Self Similarity: -0.0042
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 72089600 | Self Similarity: 0.0140
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 72089600 | Self Similarity: 0.0020
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 72089600 | Self Similarity: -0.0090
Sparsity: 122.8 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 72089600 | Self Similarity: -0.0018
Sparsity: 141.6 | Dead Features: 0 | Total Loss: 0.0815 | Reconstruction Loss: 0.0416 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 720

 64%|██████▍   | 35305/55054 [16:15<09:17, 35.45it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 72294400 | Self Similarity: -0.0046
Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 72294400 | Self Similarity: 0.0142
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 72294400 | Self Similarity: 0.0019
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 72294400 | Self Similarity: -0.0088
Sparsity: 124.5 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 72294400 | Self Similarity: -0.0021
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0798 | Reconstruction Loss: 0.0393 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens: 722

 64%|██████▍   | 35405/55054 [16:18<09:14, 35.43it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 72499200 | Self Similarity: -0.0043
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 72499200 | Self Similarity: 0.0141
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 72499200 | Self Similarity: 0.0020
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 72499200 | Self Similarity: -0.0085
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 72499200 | Self Similarity: -0.0021
Sparsity: 139.0 | Dead Features: 0 | Total Loss: 0.0773 | Reconstruction Loss: 0.0382 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 724

 64%|██████▍   | 35505/55054 [16:21<09:17, 35.08it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 72704000 | Self Similarity: -0.0042
Sparsity: 34.2 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 72704000 | Self Similarity: 0.0143
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 72704000 | Self Similarity: 0.0020
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 72704000 | Self Similarity: -0.0089
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 72704000 | Self Similarity: -0.0019
Sparsity: 138.6 | Dead Features: 0 | Total Loss: 0.0730 | Reconstruction Loss: 0.0358 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 727

 65%|██████▍   | 35605/55054 [16:24<09:30, 34.09it/s]

Sparsity: 23.4 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 72908800 | Self Similarity: -0.0043
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 72908800 | Self Similarity: 0.0144
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 72908800 | Self Similarity: 0.0017
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 72908800 | Self Similarity: -0.0085
Sparsity: 123.8 | Dead Features: 0 | Total Loss: 0.0505 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 72908800 | Self Similarity: -0.0023
Sparsity: 148.2 | Dead Features: 0 | Total Loss: 0.0782 | Reconstruction Loss: 0.0376 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 729

 65%|██████▍   | 35705/55054 [16:27<09:22, 34.38it/s]

Sparsity: 23.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 73113600 | Self Similarity: -0.0041
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 73113600 | Self Similarity: 0.0143
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 73113600 | Self Similarity: 0.0019
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 73113600 | Self Similarity: -0.0088
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 73113600 | Self Similarity: -0.0019
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0744 | Reconstruction Loss: 0.0353 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 731

 65%|██████▌   | 35805/55054 [16:29<09:16, 34.59it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 73318400 | Self Similarity: -0.0039
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 73318400 | Self Similarity: 0.0144
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 73318400 | Self Similarity: 0.0020
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 73318400 | Self Similarity: -0.0090
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 73318400 | Self Similarity: -0.0020
Sparsity: 144.2 | Dead Features: 0 | Total Loss: 0.0739 | Reconstruction Loss: 0.0360 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 733

 65%|██████▌   | 35905/55054 [16:32<09:15, 34.49it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 73523200 | Self Similarity: -0.0039
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 73523200 | Self Similarity: 0.0142
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 73523200 | Self Similarity: 0.0021
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 73523200 | Self Similarity: -0.0086
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 73523200 | Self Similarity: -0.0018
Sparsity: 142.9 | Dead Features: 0 | Total Loss: 0.0742 | Reconstruction Loss: 0.0360 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 735

 65%|██████▌   | 36005/55054 [16:35<09:15, 34.29it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 73728000 | Self Similarity: -0.0038
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 73728000 | Self Similarity: 0.0140
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 73728000 | Self Similarity: 0.0021
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 73728000 | Self Similarity: -0.0087
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 73728000 | Self Similarity: -0.0019
Sparsity: 145.6 | Dead Features: 0 | Total Loss: 0.0737 | Reconstruction Loss: 0.0357 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 737

 66%|██████▌   | 36107/55054 [16:38<07:44, 40.82it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 73932800 | Self Similarity: -0.0038
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 73932800 | Self Similarity: 0.0141
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0189 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 73932800 | Self Similarity: 0.0020
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0377 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 73932800 | Self Similarity: -0.0086
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0460 | Reconstruction Loss: 0.0230 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 73932800 | Self Similarity: -0.0019
Sparsity: 139.3 | Dead Features: 0 | Total Loss: 0.0716 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0368 | l1_alpha: 8.0000e-04 | Tokens: 739

 66%|██████▌   | 36205/55054 [16:41<08:57, 35.06it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 74137600 | Self Similarity: -0.0038
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 74137600 | Self Similarity: 0.0142
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 74137600 | Self Similarity: 0.0020
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 74137600 | Self Similarity: -0.0088
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0507 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 74137600 | Self Similarity: -0.0018
Sparsity: 146.4 | Dead Features: 0 | Total Loss: 0.0785 | Reconstruction Loss: 0.0387 | L1 Loss: 0.0398 | l1_alpha: 8.0000e-04 | Tokens: 741

 66%|██████▌   | 36305/55054 [16:44<08:58, 34.83it/s]

Sparsity: 23.2 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 74342400 | Self Similarity: -0.0033
Sparsity: 39.0 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 74342400 | Self Similarity: 0.0141
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 74342400 | Self Similarity: 0.0021
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 74342400 | Self Similarity: -0.0087
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 74342400 | Self Similarity: -0.0019
Sparsity: 142.6 | Dead Features: 0 | Total Loss: 0.0774 | Reconstruction Loss: 0.0384 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 743

 66%|██████▌   | 36406/55054 [16:46<08:20, 37.26it/s]

Sparsity: 23.4 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 74547200 | Self Similarity: -0.0033
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 74547200 | Self Similarity: 0.0139
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 74547200 | Self Similarity: 0.0018
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 74547200 | Self Similarity: -0.0088
Sparsity: 123.1 | Dead Features: 0 | Total Loss: 0.0517 | Reconstruction Loss: 0.0261 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 74547200 | Self Similarity: -0.0015
Sparsity: 139.1 | Dead Features: 0 | Total Loss: 0.0831 | Reconstruction Loss: 0.0427 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 745

 66%|██████▋   | 36506/55054 [16:49<08:34, 36.07it/s]

Sparsity: 19.4 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 74752000 | Self Similarity: -0.0033
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 74752000 | Self Similarity: 0.0137
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 74752000 | Self Similarity: 0.0017
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 74752000 | Self Similarity: -0.0086
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 74752000 | Self Similarity: -0.0015
Sparsity: 144.7 | Dead Features: 0 | Total Loss: 0.0771 | Reconstruction Loss: 0.0376 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 747

 66%|██████▋   | 36606/55054 [16:52<08:33, 35.90it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 74956800 | Self Similarity: -0.0035
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 74956800 | Self Similarity: 0.0137
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0211 | Reconstruction Loss: 0.0132 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 74956800 | Self Similarity: 0.0017
Sparsity: 123.6 | Dead Features: 0 | Total Loss: 0.0433 | Reconstruction Loss: 0.0211 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 74956800 | Self Similarity: -0.0087
Sparsity: 128.4 | Dead Features: 0 | Total Loss: 0.0545 | Reconstruction Loss: 0.0277 | L1 Loss: 0.0268 | l1_alpha: 8.0000e-04 | Tokens: 74956800 | Self Similarity: -0.0017
Sparsity: 150.2 | Dead Features: 0 | Total Loss: 0.0846 | Reconstruction Loss: 0.0428 | L1 Loss: 0.0419 | l1_alpha: 8.0000e-04 | Tokens: 749

 67%|██████▋   | 36706/55054 [16:55<08:22, 36.53it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 75161600 | Self Similarity: -0.0033
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 75161600 | Self Similarity: 0.0139
Sparsity: 46.4 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 75161600 | Self Similarity: 0.0019
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 75161600 | Self Similarity: -0.0085
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 75161600 | Self Similarity: -0.0015
Sparsity: 145.7 | Dead Features: 0 | Total Loss: 0.0774 | Reconstruction Loss: 0.0375 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 751

 67%|██████▋   | 36806/55054 [16:57<08:27, 35.98it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 75366400 | Self Similarity: -0.0033
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 75366400 | Self Similarity: 0.0135
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 75366400 | Self Similarity: 0.0021
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 75366400 | Self Similarity: -0.0083
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 75366400 | Self Similarity: -0.0017
Sparsity: 147.2 | Dead Features: 0 | Total Loss: 0.0781 | Reconstruction Loss: 0.0379 | L1 Loss: 0.0403 | l1_alpha: 8.0000e-04 | Tokens: 753

 67%|██████▋   | 36906/55054 [17:00<08:24, 35.98it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 75571200 | Self Similarity: -0.0036
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 75571200 | Self Similarity: 0.0137
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 75571200 | Self Similarity: 0.0025
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 75571200 | Self Similarity: -0.0084
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 75571200 | Self Similarity: -0.0018
Sparsity: 143.4 | Dead Features: 0 | Total Loss: 0.0755 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 755

 67%|██████▋   | 37006/55054 [17:03<08:22, 35.91it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 75776000 | Self Similarity: -0.0035
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 75776000 | Self Similarity: 0.0136
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 75776000 | Self Similarity: 0.0023
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 75776000 | Self Similarity: -0.0083
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 75776000 | Self Similarity: -0.0018
Sparsity: 140.5 | Dead Features: 0 | Total Loss: 0.0768 | Reconstruction Loss: 0.0385 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 757

 67%|██████▋   | 37105/55054 [17:06<08:07, 36.83it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 75980800 | Self Similarity: -0.0036
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 75980800 | Self Similarity: 0.0136
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 75980800 | Self Similarity: 0.0019
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 75980800 | Self Similarity: -0.0085
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 75980800 | Self Similarity: -0.0015
Sparsity: 139.7 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0356 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 759

 68%|██████▊   | 37205/55054 [17:08<08:17, 35.88it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 76185600 | Self Similarity: -0.0035
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 76185600 | Self Similarity: 0.0137
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 76185600 | Self Similarity: 0.0022
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 76185600 | Self Similarity: -0.0084
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 76185600 | Self Similarity: -0.0017
Sparsity: 144.7 | Dead Features: 0 | Total Loss: 0.0738 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 761

 68%|██████▊   | 37305/55054 [17:11<08:11, 36.11it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 76390400 | Self Similarity: -0.0033
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 76390400 | Self Similarity: 0.0137
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 76390400 | Self Similarity: 0.0021
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 76390400 | Self Similarity: -0.0085
Sparsity: 125.3 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 76390400 | Self Similarity: -0.0017
Sparsity: 151.1 | Dead Features: 0 | Total Loss: 0.0795 | Reconstruction Loss: 0.0392 | L1 Loss: 0.0403 | l1_alpha: 8.0000e-04 | Tokens: 763

 68%|██████▊   | 37405/55054 [17:14<08:06, 36.26it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 76595200 | Self Similarity: -0.0033
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 76595200 | Self Similarity: 0.0135
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 76595200 | Self Similarity: 0.0016
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 76595200 | Self Similarity: -0.0085
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 76595200 | Self Similarity: -0.0022
Sparsity: 128.6 | Dead Features: 0 | Total Loss: 0.0793 | Reconstruction Loss: 0.0418 | L1 Loss: 0.0375 | l1_alpha: 8.0000e-04 | Tokens: 765

 68%|██████▊   | 37507/55054 [17:17<08:03, 36.26it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 76800000 | Self Similarity: -0.0036
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 76800000 | Self Similarity: 0.0141
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 76800000 | Self Similarity: 0.0019
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 76800000 | Self Similarity: -0.0088
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0465 | Reconstruction Loss: 0.0228 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 76800000 | Self Similarity: -0.0020
Sparsity: 127.1 | Dead Features: 0 | Total Loss: 0.1239 | Reconstruction Loss: 0.0778 | L1 Loss: 0.0461 | l1_alpha: 8.0000e-04 | Tokens: 768

 68%|██████▊   | 37607/55054 [17:19<08:00, 36.34it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 77004800 | Self Similarity: -0.0040
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 77004800 | Self Similarity: 0.0139
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 77004800 | Self Similarity: 0.0018
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 77004800 | Self Similarity: -0.0086
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 77004800 | Self Similarity: -0.0017
Sparsity: 109.1 | Dead Features: 0 | Total Loss: 0.0803 | Reconstruction Loss: 0.0441 | L1 Loss: 0.0362 | l1_alpha: 8.0000e-04 | Tokens: 770

 68%|██████▊   | 37707/55054 [17:22<08:00, 36.08it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 77209600 | Self Similarity: -0.0045
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 77209600 | Self Similarity: 0.0138
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 77209600 | Self Similarity: 0.0019
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 77209600 | Self Similarity: -0.0086
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 77209600 | Self Similarity: -0.0017
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0760 | Reconstruction Loss: 0.0392 | L1 Loss: 0.0367 | l1_alpha: 8.0000e-04 | Tokens: 772

 69%|██████▊   | 37805/55054 [17:25<07:49, 36.73it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 77414400 | Self Similarity: -0.0041
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 77414400 | Self Similarity: 0.0138
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 77414400 | Self Similarity: 0.0018
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 77414400 | Self Similarity: -0.0085
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 77414400 | Self Similarity: -0.0021
Sparsity: 130.3 | Dead Features: 0 | Total Loss: 0.0766 | Reconstruction Loss: 0.0385 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 774

 69%|██████▉   | 37905/55054 [17:28<07:58, 35.84it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 77619200 | Self Similarity: -0.0037
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 77619200 | Self Similarity: 0.0137
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 77619200 | Self Similarity: 0.0017
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 77619200 | Self Similarity: -0.0083
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 77619200 | Self Similarity: -0.0017
Sparsity: 132.1 | Dead Features: 0 | Total Loss: 0.0740 | Reconstruction Loss: 0.0361 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 776

 69%|██████▉   | 38006/55054 [17:30<07:55, 35.85it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 77824000 | Self Similarity: -0.0036
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 77824000 | Self Similarity: 0.0136
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 77824000 | Self Similarity: 0.0017
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 77824000 | Self Similarity: -0.0083
Sparsity: 123.7 | Dead Features: 0 | Total Loss: 0.0507 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 77824000 | Self Similarity: -0.0021
Sparsity: 141.0 | Dead Features: 0 | Total Loss: 0.0811 | Reconstruction Loss: 0.0405 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 778

 69%|██████▉   | 38104/55054 [17:33<07:54, 35.73it/s]

Sparsity: 18.3 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 78028800 | Self Similarity: -0.0032
Sparsity: 34.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 78028800 | Self Similarity: 0.0132
Sparsity: 45.3 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 78028800 | Self Similarity: 0.0017
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 78028800 | Self Similarity: -0.0087
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 78028800 | Self Similarity: -0.0024
Sparsity: 137.6 | Dead Features: 0 | Total Loss: 0.0789 | Reconstruction Loss: 0.0399 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 780

 69%|██████▉   | 38204/55054 [17:36<07:51, 35.71it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 78233600 | Self Similarity: -0.0030
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 78233600 | Self Similarity: 0.0135
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 78233600 | Self Similarity: 0.0017
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 78233600 | Self Similarity: -0.0084
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 78233600 | Self Similarity: -0.0019
Sparsity: 137.7 | Dead Features: 0 | Total Loss: 0.0777 | Reconstruction Loss: 0.0382 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 782

 70%|██████▉   | 38304/55054 [17:39<07:47, 35.82it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 78438400 | Self Similarity: -0.0032
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 78438400 | Self Similarity: 0.0139
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 78438400 | Self Similarity: 0.0017
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 78438400 | Self Similarity: -0.0084
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 78438400 | Self Similarity: -0.0018
Sparsity: 136.5 | Dead Features: 0 | Total Loss: 0.0720 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 784

 70%|██████▉   | 38404/55054 [17:41<07:42, 35.98it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 78643200 | Self Similarity: -0.0030
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 78643200 | Self Similarity: 0.0136
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 78643200 | Self Similarity: 0.0017
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 78643200 | Self Similarity: -0.0082
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 78643200 | Self Similarity: -0.0018
Sparsity: 141.0 | Dead Features: 0 | Total Loss: 0.0751 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 786

 70%|██████▉   | 38507/55054 [17:44<07:40, 35.97it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 78848000 | Self Similarity: -0.0029
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 78848000 | Self Similarity: 0.0138
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 78848000 | Self Similarity: 0.0020
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 78848000 | Self Similarity: -0.0082
Sparsity: 123.4 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 78848000 | Self Similarity: -0.0017
Sparsity: 143.0 | Dead Features: 0 | Total Loss: 0.0760 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 788

 70%|███████   | 38604/55054 [17:47<07:39, 35.81it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 79052800 | Self Similarity: -0.0027
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 79052800 | Self Similarity: 0.0139
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 79052800 | Self Similarity: 0.0015
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 79052800 | Self Similarity: -0.0082
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0511 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 79052800 | Self Similarity: -0.0018
Sparsity: 147.6 | Dead Features: 0 | Total Loss: 0.0799 | Reconstruction Loss: 0.0384 | L1 Loss: 0.0415 | l1_alpha: 8.0000e-04 | Tokens: 790

 70%|███████   | 38705/55054 [17:49<06:32, 41.63it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 79257600 | Self Similarity: -0.0027
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 79257600 | Self Similarity: 0.0135
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 79257600 | Self Similarity: 0.0015
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 79257600 | Self Similarity: -0.0084
Sparsity: 126.5 | Dead Features: 0 | Total Loss: 0.0515 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0264 | l1_alpha: 8.0000e-04 | Tokens: 79257600 | Self Similarity: -0.0016
Sparsity: 147.8 | Dead Features: 0 | Total Loss: 0.0780 | Reconstruction Loss: 0.0372 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 792

 70%|███████   | 38806/55054 [17:52<06:08, 44.08it/s]

Sparsity: 19.2 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 79462400 | Self Similarity: -0.0025
Sparsity: 33.7 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 79462400 | Self Similarity: 0.0137
Sparsity: 45.8 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 79462400 | Self Similarity: 0.0017
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 79462400 | Self Similarity: -0.0082
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 79462400 | Self Similarity: -0.0022
Sparsity: 142.3 | Dead Features: 0 | Total Loss: 0.0741 | Reconstruction Loss: 0.0362 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 794

 71%|███████   | 38907/55054 [17:54<07:34, 35.55it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 79667200 | Self Similarity: -0.0026
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 79667200 | Self Similarity: 0.0139
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 79667200 | Self Similarity: 0.0023
Sparsity: 114.1 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 79667200 | Self Similarity: -0.0084
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 79667200 | Self Similarity: -0.0018
Sparsity: 143.6 | Dead Features: 0 | Total Loss: 0.0734 | Reconstruction Loss: 0.0350 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 796

 71%|███████   | 39007/55054 [17:57<07:17, 36.69it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 79872000 | Self Similarity: -0.0026
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 79872000 | Self Similarity: 0.0140
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 79872000 | Self Similarity: 0.0017
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 79872000 | Self Similarity: -0.0084
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 79872000 | Self Similarity: -0.0018
Sparsity: 145.7 | Dead Features: 0 | Total Loss: 0.0734 | Reconstruction Loss: 0.0347 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 798

 71%|███████   | 39107/55054 [18:00<07:21, 36.13it/s]

Sparsity: 25.3 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 80076800 | Self Similarity: -0.0026
Sparsity: 40.3 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 80076800 | Self Similarity: 0.0140
Sparsity: 50.3 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 80076800 | Self Similarity: 0.0022
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 80076800 | Self Similarity: -0.0083
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0518 | Reconstruction Loss: 0.0268 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 80076800 | Self Similarity: -0.0019
Sparsity: 141.9 | Dead Features: 0 | Total Loss: 0.1032 | Reconstruction Loss: 0.0617 | L1 Loss: 0.0415 | l1_alpha: 8.0000e-04 | Tokens: 800

 71%|███████   | 39205/55054 [18:03<07:18, 36.15it/s]

Sparsity: 19.4 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 80281600 | Self Similarity: -0.0027
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 80281600 | Self Similarity: 0.0139
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 80281600 | Self Similarity: 0.0021
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 80281600 | Self Similarity: -0.0083
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 80281600 | Self Similarity: -0.0019
Sparsity: 132.2 | Dead Features: 0 | Total Loss: 0.0775 | Reconstruction Loss: 0.0396 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 802

 71%|███████▏  | 39306/55054 [18:05<07:05, 36.97it/s]

Sparsity: 18.7 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 80486400 | Self Similarity: -0.0028
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 80486400 | Self Similarity: 0.0138
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 80486400 | Self Similarity: 0.0021
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 80486400 | Self Similarity: -0.0084
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0464 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 80486400 | Self Similarity: -0.0020
Sparsity: 135.6 | Dead Features: 0 | Total Loss: 0.0713 | Reconstruction Loss: 0.0343 | L1 Loss: 0.0370 | l1_alpha: 8.0000e-04 | Tokens: 804

 72%|███████▏  | 39406/55054 [18:08<07:12, 36.20it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 80691200 | Self Similarity: -0.0027
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 80691200 | Self Similarity: 0.0137
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 80691200 | Self Similarity: 0.0021
Sparsity: 115.8 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 80691200 | Self Similarity: -0.0086
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 80691200 | Self Similarity: -0.0022
Sparsity: 141.4 | Dead Features: 0 | Total Loss: 0.0740 | Reconstruction Loss: 0.0355 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 806

 72%|███████▏  | 39506/55054 [18:11<07:14, 35.79it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 80896000 | Self Similarity: -0.0025
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 80896000 | Self Similarity: 0.0137
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 80896000 | Self Similarity: 0.0022
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 80896000 | Self Similarity: -0.0084
Sparsity: 125.6 | Dead Features: 0 | Total Loss: 0.0522 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 80896000 | Self Similarity: -0.0020
Sparsity: 149.2 | Dead Features: 0 | Total Loss: 0.0823 | Reconstruction Loss: 0.0402 | L1 Loss: 0.0420 | l1_alpha: 8.0000e-04 | Tokens: 808

 72%|███████▏  | 39606/55054 [18:14<07:10, 35.88it/s]

Sparsity: 28.9 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0107 | L1 Loss: 0.0070 | l1_alpha: 8.0000e-04 | Tokens: 81100800 | Self Similarity: -0.0024
Sparsity: 39.8 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 81100800 | Self Similarity: 0.0137
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 81100800 | Self Similarity: 0.0019
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0418 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 81100800 | Self Similarity: -0.0082
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0539 | Reconstruction Loss: 0.0269 | L1 Loss: 0.0270 | l1_alpha: 8.0000e-04 | Tokens: 81100800 | Self Similarity: -0.0020
Sparsity: 148.4 | Dead Features: 0 | Total Loss: 0.0799 | Reconstruction Loss: 0.0378 | L1 Loss: 0.0421 | l1_alpha: 8.0000e-04 | Tokens: 811

 72%|███████▏  | 39706/55054 [18:16<07:06, 35.98it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 81305600 | Self Similarity: -0.0022
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 81305600 | Self Similarity: 0.0137
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 81305600 | Self Similarity: 0.0020
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 81305600 | Self Similarity: -0.0083
Sparsity: 121.6 | Dead Features: 0 | Total Loss: 0.0498 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 81305600 | Self Similarity: -0.0019
Sparsity: 149.9 | Dead Features: 0 | Total Loss: 0.0778 | Reconstruction Loss: 0.0374 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 813

 72%|███████▏  | 39806/55054 [18:19<07:07, 35.65it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 81510400 | Self Similarity: -0.0024
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 81510400 | Self Similarity: 0.0136
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 81510400 | Self Similarity: 0.0017
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 81510400 | Self Similarity: -0.0085
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 81510400 | Self Similarity: -0.0021
Sparsity: 141.3 | Dead Features: 0 | Total Loss: 0.0736 | Reconstruction Loss: 0.0359 | L1 Loss: 0.0377 | l1_alpha: 8.0000e-04 | Tokens: 815

 72%|███████▏  | 39906/55054 [18:22<06:57, 36.24it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 81715200 | Self Similarity: -0.0025
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 81715200 | Self Similarity: 0.0137
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 81715200 | Self Similarity: 0.0020
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 81715200 | Self Similarity: -0.0080
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 81715200 | Self Similarity: -0.0020
Sparsity: 146.2 | Dead Features: 0 | Total Loss: 0.0725 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 817

 73%|███████▎  | 40006/55054 [18:25<06:53, 36.40it/s]

Sparsity: 18.8 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 81920000 | Self Similarity: -0.0022
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 81920000 | Self Similarity: 0.0136
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 81920000 | Self Similarity: 0.0022
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 81920000 | Self Similarity: -0.0082
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 81920000 | Self Similarity: -0.0019
Sparsity: 146.6 | Dead Features: 0 | Total Loss: 0.0728 | Reconstruction Loss: 0.0341 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 819

 73%|███████▎  | 40106/55054 [18:28<07:00, 35.54it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 82124800 | Self Similarity: -0.0020
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 82124800 | Self Similarity: 0.0134
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 82124800 | Self Similarity: 0.0017
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 82124800 | Self Similarity: -0.0082
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0507 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 82124800 | Self Similarity: -0.0020
Sparsity: 152.2 | Dead Features: 0 | Total Loss: 0.0770 | Reconstruction Loss: 0.0377 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 821

 73%|███████▎  | 40206/55054 [18:30<06:49, 36.23it/s]

Sparsity: 18.4 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0062 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 82329600 | Self Similarity: -0.0021
Sparsity: 33.4 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 82329600 | Self Similarity: 0.0134
Sparsity: 44.5 | Dead Features: 0 | Total Loss: 0.0189 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 82329600 | Self Similarity: 0.0021
Sparsity: 113.8 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 82329600 | Self Similarity: -0.0078
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 82329600 | Self Similarity: -0.0019
Sparsity: 148.7 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0341 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 823

 73%|███████▎  | 40306/55054 [18:33<06:49, 35.98it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 82534400 | Self Similarity: -0.0021
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 82534400 | Self Similarity: 0.0138
Sparsity: 45.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 82534400 | Self Similarity: 0.0022
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 82534400 | Self Similarity: -0.0078
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 82534400 | Self Similarity: -0.0016
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0771 | Reconstruction Loss: 0.0404 | L1 Loss: 0.0367 | l1_alpha: 8.0000e-04 | Tokens: 825

 73%|███████▎  | 40406/55054 [18:36<06:52, 35.47it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 82739200 | Self Similarity: -0.0025
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 82739200 | Self Similarity: 0.0136
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 82739200 | Self Similarity: 0.0023
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 82739200 | Self Similarity: -0.0080
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 82739200 | Self Similarity: -0.0019
Sparsity: 131.9 | Dead Features: 0 | Total Loss: 0.0736 | Reconstruction Loss: 0.0359 | L1 Loss: 0.0377 | l1_alpha: 8.0000e-04 | Tokens: 827

 74%|███████▎  | 40506/55054 [18:39<06:39, 36.41it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 82944000 | Self Similarity: -0.0023
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 82944000 | Self Similarity: 0.0134
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 82944000 | Self Similarity: 0.0023
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 82944000 | Self Similarity: -0.0081
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 82944000 | Self Similarity: -0.0017
Sparsity: 135.4 | Dead Features: 0 | Total Loss: 0.0716 | Reconstruction Loss: 0.0350 | L1 Loss: 0.0366 | l1_alpha: 8.0000e-04 | Tokens: 829

 74%|███████▍  | 40608/55054 [18:41<06:13, 38.72it/s]

Sparsity: 18.4 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 83148800 | Self Similarity: -0.0022
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 83148800 | Self Similarity: 0.0136
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 83148800 | Self Similarity: 0.0024
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 83148800 | Self Similarity: -0.0081
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 83148800 | Self Similarity: -0.0018
Sparsity: 138.4 | Dead Features: 0 | Total Loss: 0.0734 | Reconstruction Loss: 0.0357 | L1 Loss: 0.0377 | l1_alpha: 8.0000e-04 | Tokens: 831

 74%|███████▍  | 40704/55054 [18:44<06:55, 34.54it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 83353600 | Self Similarity: -0.0022
Sparsity: 34.4 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 83353600 | Self Similarity: 0.0137
Sparsity: 45.5 | Dead Features: 0 | Total Loss: 0.0188 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 83353600 | Self Similarity: 0.0021
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 83353600 | Self Similarity: -0.0082
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0465 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 83353600 | Self Similarity: -0.0016
Sparsity: 137.1 | Dead Features: 0 | Total Loss: 0.0711 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0367 | l1_alpha: 8.0000e-04 | Tokens: 833

 74%|███████▍  | 40806/55054 [18:47<06:35, 36.01it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 83558400 | Self Similarity: -0.0022
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 83558400 | Self Similarity: 0.0136
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 83558400 | Self Similarity: 0.0019
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 83558400 | Self Similarity: -0.0082
Sparsity: 122.8 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 83558400 | Self Similarity: -0.0016
Sparsity: 145.5 | Dead Features: 0 | Total Loss: 0.0764 | Reconstruction Loss: 0.0370 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 835

 74%|███████▍  | 40906/55054 [18:50<06:44, 34.96it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 83763200 | Self Similarity: -0.0021
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 83763200 | Self Similarity: 0.0136
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 83763200 | Self Similarity: 0.0023
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 83763200 | Self Similarity: -0.0078
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 83763200 | Self Similarity: -0.0018
Sparsity: 148.0 | Dead Features: 0 | Total Loss: 0.0769 | Reconstruction Loss: 0.0370 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 837

 74%|███████▍  | 41006/55054 [18:53<06:39, 35.14it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 83968000 | Self Similarity: -0.0019
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 83968000 | Self Similarity: 0.0137
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 83968000 | Self Similarity: 0.0023
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 83968000 | Self Similarity: -0.0081
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 83968000 | Self Similarity: -0.0018
Sparsity: 144.6 | Dead Features: 0 | Total Loss: 0.0733 | Reconstruction Loss: 0.0351 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 839

 75%|███████▍  | 41106/55054 [18:55<06:39, 34.93it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 84172800 | Self Similarity: -0.0019
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 84172800 | Self Similarity: 0.0136
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 84172800 | Self Similarity: 0.0022
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 84172800 | Self Similarity: -0.0078
Sparsity: 123.7 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 84172800 | Self Similarity: -0.0021
Sparsity: 148.4 | Dead Features: 0 | Total Loss: 0.0746 | Reconstruction Loss: 0.0355 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 841

 75%|███████▍  | 41206/55054 [18:58<06:39, 34.70it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 84377600 | Self Similarity: -0.0020
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 84377600 | Self Similarity: 0.0137
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 84377600 | Self Similarity: 0.0022
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 84377600 | Self Similarity: -0.0079
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 84377600 | Self Similarity: -0.0021
Sparsity: 141.3 | Dead Features: 0 | Total Loss: 0.0746 | Reconstruction Loss: 0.0374 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 843

 75%|███████▌  | 41306/55054 [19:01<06:23, 35.89it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 84582400 | Self Similarity: -0.0021
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 84582400 | Self Similarity: 0.0139
Sparsity: 46.4 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 84582400 | Self Similarity: 0.0018
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 84582400 | Self Similarity: -0.0080
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0231 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 84582400 | Self Similarity: -0.0022
Sparsity: 144.4 | Dead Features: 0 | Total Loss: 0.0731 | Reconstruction Loss: 0.0350 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 845

 75%|███████▌  | 41406/55054 [19:04<06:20, 35.86it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 84787200 | Self Similarity: -0.0019
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 84787200 | Self Similarity: 0.0136
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 84787200 | Self Similarity: 0.0021
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 84787200 | Self Similarity: -0.0080
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 84787200 | Self Similarity: -0.0019
Sparsity: 126.0 | Dead Features: 0 | Total Loss: 0.0824 | Reconstruction Loss: 0.0460 | L1 Loss: 0.0364 | l1_alpha: 8.0000e-04 | Tokens: 847

 75%|███████▌  | 41507/55054 [19:07<06:02, 37.34it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 84992000 | Self Similarity: -0.0024
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 84992000 | Self Similarity: 0.0134
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 84992000 | Self Similarity: 0.0020
Sparsity: 113.0 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 84992000 | Self Similarity: -0.0080
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 84992000 | Self Similarity: -0.0015
Sparsity: 140.6 | Dead Features: 0 | Total Loss: 0.0782 | Reconstruction Loss: 0.0396 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 849

 76%|███████▌  | 41607/55054 [19:09<06:16, 35.74it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 85196800 | Self Similarity: -0.0026
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 85196800 | Self Similarity: 0.0136
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 85196800 | Self Similarity: 0.0023
Sparsity: 113.7 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 85196800 | Self Similarity: -0.0078
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 85196800 | Self Similarity: -0.0018
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0741 | Reconstruction Loss: 0.0397 | L1 Loss: 0.0344 | l1_alpha: 8.0000e-04 | Tokens: 851

 76%|███████▌  | 41707/55054 [19:12<06:11, 35.89it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 85401600 | Self Similarity: -0.0027
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 85401600 | Self Similarity: 0.0137
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 85401600 | Self Similarity: 0.0025
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 85401600 | Self Similarity: -0.0079
Sparsity: 121.9 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 85401600 | Self Similarity: -0.0020
Sparsity: 139.3 | Dead Features: 0 | Total Loss: 0.0752 | Reconstruction Loss: 0.0373 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 854

 76%|███████▌  | 41805/55054 [19:15<06:00, 36.72it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 85606400 | Self Similarity: -0.0024
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 85606400 | Self Similarity: 0.0138
Sparsity: 49.9 | Dead Features: 0 | Total Loss: 0.0213 | Reconstruction Loss: 0.0132 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 85606400 | Self Similarity: 0.0024
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 85606400 | Self Similarity: -0.0079
Sparsity: 123.6 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 85606400 | Self Similarity: -0.0020
Sparsity: 142.1 | Dead Features: 0 | Total Loss: 0.0789 | Reconstruction Loss: 0.0394 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 856

 76%|███████▌  | 41905/55054 [19:18<06:06, 35.90it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 85811200 | Self Similarity: -0.0022
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 85811200 | Self Similarity: 0.0137
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 85811200 | Self Similarity: 0.0019
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 85811200 | Self Similarity: -0.0079
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 85811200 | Self Similarity: -0.0020
Sparsity: 138.0 | Dead Features: 0 | Total Loss: 0.0776 | Reconstruction Loss: 0.0395 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 858

 76%|███████▋  | 42005/55054 [19:20<06:05, 35.69it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 86016000 | Self Similarity: -0.0021
Sparsity: 38.7 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 86016000 | Self Similarity: 0.0136
Sparsity: 50.3 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 86016000 | Self Similarity: 0.0021
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 86016000 | Self Similarity: -0.0080
Sparsity: 126.8 | Dead Features: 0 | Total Loss: 0.0524 | Reconstruction Loss: 0.0266 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 86016000 | Self Similarity: -0.0021
Sparsity: 152.2 | Dead Features: 0 | Total Loss: 0.0826 | Reconstruction Loss: 0.0415 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 860

 76%|███████▋  | 42105/55054 [19:23<06:09, 35.01it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 86220800 | Self Similarity: -0.0021
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 86220800 | Self Similarity: 0.0136
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 86220800 | Self Similarity: 0.0021
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 86220800 | Self Similarity: -0.0078
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 86220800 | Self Similarity: -0.0018
Sparsity: 142.9 | Dead Features: 0 | Total Loss: 0.0760 | Reconstruction Loss: 0.0373 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 862

 77%|███████▋  | 42205/55054 [19:26<05:58, 35.80it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 86425600 | Self Similarity: -0.0021
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 86425600 | Self Similarity: 0.0137
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 86425600 | Self Similarity: 0.0020
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 86425600 | Self Similarity: -0.0085
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 86425600 | Self Similarity: -0.0021
Sparsity: 145.7 | Dead Features: 0 | Total Loss: 0.0739 | Reconstruction Loss: 0.0356 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 864

 77%|███████▋  | 42305/55054 [19:29<06:00, 35.38it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 86630400 | Self Similarity: -0.0018
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 86630400 | Self Similarity: 0.0135
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 86630400 | Self Similarity: 0.0023
Sparsity: 115.8 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 86630400 | Self Similarity: -0.0079
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 86630400 | Self Similarity: -0.0020
Sparsity: 139.3 | Dead Features: 0 | Total Loss: 0.0744 | Reconstruction Loss: 0.0361 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 866

 77%|███████▋  | 42409/55054 [19:31<04:59, 42.24it/s]

Sparsity: 23.7 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 86835200 | Self Similarity: -0.0021
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 86835200 | Self Similarity: 0.0134
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 86835200 | Self Similarity: 0.0024
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 86835200 | Self Similarity: -0.0078
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 86835200 | Self Similarity: -0.0019
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0792 | Reconstruction Loss: 0.0389 | L1 Loss: 0.0403 | l1_alpha: 8.0000e-04 | Tokens: 868

 77%|███████▋  | 42507/55054 [19:34<05:41, 36.75it/s]

Sparsity: 19.3 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 87040000 | Self Similarity: -0.0021
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 87040000 | Self Similarity: 0.0134
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 87040000 | Self Similarity: 0.0022
Sparsity: 113.9 | Dead Features: 0 | Total Loss: 0.0376 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 87040000 | Self Similarity: -0.0079
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 87040000 | Self Similarity: -0.0019
Sparsity: 132.1 | Dead Features: 0 | Total Loss: 0.0725 | Reconstruction Loss: 0.0357 | L1 Loss: 0.0368 | l1_alpha: 8.0000e-04 | Tokens: 870

 77%|███████▋  | 42607/55054 [19:37<05:45, 36.05it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 87244800 | Self Similarity: -0.0020
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 87244800 | Self Similarity: 0.0133
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 87244800 | Self Similarity: 0.0021
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 87244800 | Self Similarity: -0.0079
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 87244800 | Self Similarity: -0.0020
Sparsity: 146.3 | Dead Features: 0 | Total Loss: 0.0768 | Reconstruction Loss: 0.0372 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 872

 78%|███████▊  | 42707/55054 [19:39<05:40, 36.21it/s]

Sparsity: 18.5 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0062 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 87449600 | Self Similarity: -0.0019
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 87449600 | Self Similarity: 0.0133
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 87449600 | Self Similarity: 0.0022
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 87449600 | Self Similarity: -0.0076
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 87449600 | Self Similarity: -0.0019
Sparsity: 146.5 | Dead Features: 0 | Total Loss: 0.0754 | Reconstruction Loss: 0.0359 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 874

 78%|███████▊  | 42807/55054 [19:42<05:44, 35.54it/s]

Sparsity: 23.9 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 87654400 | Self Similarity: -0.0019
Sparsity: 39.2 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 87654400 | Self Similarity: 0.0133
Sparsity: 49.1 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 87654400 | Self Similarity: 0.0020
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 87654400 | Self Similarity: -0.0074
Sparsity: 124.1 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 87654400 | Self Similarity: -0.0020
Sparsity: 151.1 | Dead Features: 0 | Total Loss: 0.0767 | Reconstruction Loss: 0.0367 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 876

 78%|███████▊  | 42906/55054 [19:45<05:32, 36.57it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 87859200 | Self Similarity: -0.0018
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 87859200 | Self Similarity: 0.0135
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 87859200 | Self Similarity: 0.0022
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 87859200 | Self Similarity: -0.0076
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 87859200 | Self Similarity: -0.0019
Sparsity: 147.8 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0350 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 878

 78%|███████▊  | 43006/55054 [19:47<05:39, 35.53it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 88064000 | Self Similarity: -0.0017
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 88064000 | Self Similarity: 0.0135
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 88064000 | Self Similarity: 0.0023
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 88064000 | Self Similarity: -0.0078
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 88064000 | Self Similarity: -0.0019
Sparsity: 146.9 | Dead Features: 0 | Total Loss: 0.0720 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens: 880

 78%|███████▊  | 43106/55054 [19:50<05:31, 36.08it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 88268800 | Self Similarity: -0.0016
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 88268800 | Self Similarity: 0.0136
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 88268800 | Self Similarity: 0.0021
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 88268800 | Self Similarity: -0.0076
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 88268800 | Self Similarity: -0.0018
Sparsity: 150.5 | Dead Features: 0 | Total Loss: 0.0731 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 882

 78%|███████▊  | 43206/55054 [19:53<05:27, 36.19it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 88473600 | Self Similarity: -0.0017
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 88473600 | Self Similarity: 0.0135
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 88473600 | Self Similarity: 0.0023
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 88473600 | Self Similarity: -0.0078
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 88473600 | Self Similarity: -0.0018
Sparsity: 148.4 | Dead Features: 0 | Total Loss: 0.0756 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 884

 79%|███████▊  | 43306/55054 [19:56<04:45, 41.16it/s]

Sparsity: 23.2 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 88678400 | Self Similarity: -0.0019
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 88678400 | Self Similarity: 0.0135
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 88678400 | Self Similarity: 0.0021
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 88678400 | Self Similarity: -0.0078
Sparsity: 121.6 | Dead Features: 0 | Total Loss: 0.0501 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 88678400 | Self Similarity: -0.0016
Sparsity: 144.1 | Dead Features: 0 | Total Loss: 0.0763 | Reconstruction Loss: 0.0374 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 886

 79%|███████▉  | 43405/55054 [19:58<05:24, 35.87it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 88883200 | Self Similarity: -0.0018
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 88883200 | Self Similarity: 0.0134
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 88883200 | Self Similarity: 0.0018
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 88883200 | Self Similarity: -0.0075
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 88883200 | Self Similarity: -0.0020
Sparsity: 146.5 | Dead Features: 0 | Total Loss: 0.0763 | Reconstruction Loss: 0.0370 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 888

 79%|███████▉  | 43505/55054 [20:01<05:17, 36.37it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 89088000 | Self Similarity: -0.0015
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 89088000 | Self Similarity: 0.0133
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 89088000 | Self Similarity: 0.0019
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 89088000 | Self Similarity: -0.0075
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 89088000 | Self Similarity: -0.0020
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0790 | Reconstruction Loss: 0.0421 | L1 Loss: 0.0370 | l1_alpha: 8.0000e-04 | Tokens: 890

 79%|███████▉  | 43605/55054 [20:04<05:20, 35.75it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 89292800 | Self Similarity: -0.0024
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 89292800 | Self Similarity: 0.0131
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 89292800 | Self Similarity: 0.0018
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 89292800 | Self Similarity: -0.0077
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 89292800 | Self Similarity: -0.0019
Sparsity: 132.0 | Dead Features: 0 | Total Loss: 0.0745 | Reconstruction Loss: 0.0379 | L1 Loss: 0.0365 | l1_alpha: 8.0000e-04 | Tokens: 892

 79%|███████▉  | 43705/55054 [20:07<05:19, 35.54it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 89497600 | Self Similarity: -0.0022
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 89497600 | Self Similarity: 0.0130
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 89497600 | Self Similarity: 0.0021
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 89497600 | Self Similarity: -0.0079
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 89497600 | Self Similarity: -0.0016
Sparsity: 131.5 | Dead Features: 0 | Total Loss: 0.0769 | Reconstruction Loss: 0.0400 | L1 Loss: 0.0369 | l1_alpha: 8.0000e-04 | Tokens: 894

 80%|███████▉  | 43805/55054 [20:09<05:10, 36.25it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 89702400 | Self Similarity: -0.0021
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 89702400 | Self Similarity: 0.0132
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 89702400 | Self Similarity: 0.0022
Sparsity: 114.3 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 89702400 | Self Similarity: -0.0079
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 89702400 | Self Similarity: -0.0020
Sparsity: 134.4 | Dead Features: 0 | Total Loss: 0.0731 | Reconstruction Loss: 0.0358 | L1 Loss: 0.0373 | l1_alpha: 8.0000e-04 | Tokens: 897

 80%|███████▉  | 43905/55054 [20:12<05:07, 36.31it/s]

Sparsity: 25.7 | Dead Features: 0 | Total Loss: 0.0161 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 89907200 | Self Similarity: -0.0017
Sparsity: 42.2 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 89907200 | Self Similarity: 0.0131
Sparsity: 52.2 | Dead Features: 0 | Total Loss: 0.0219 | Reconstruction Loss: 0.0137 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 89907200 | Self Similarity: 0.0023
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 89907200 | Self Similarity: -0.0079
Sparsity: 128.6 | Dead Features: 0 | Total Loss: 0.0547 | Reconstruction Loss: 0.0279 | L1 Loss: 0.0268 | l1_alpha: 8.0000e-04 | Tokens: 89907200 | Self Similarity: -0.0017
Sparsity: 148.1 | Dead Features: 0 | Total Loss: 0.0842 | Reconstruction Loss: 0.0420 | L1 Loss: 0.0422 | l1_alpha: 8.0000e-04 | Tokens: 899

 80%|███████▉  | 44005/55054 [20:15<05:08, 35.81it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 90112000 | Self Similarity: -0.0017
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 90112000 | Self Similarity: 0.0130
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 90112000 | Self Similarity: 0.0025
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 90112000 | Self Similarity: -0.0080
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 90112000 | Self Similarity: -0.0022
Sparsity: 139.2 | Dead Features: 0 | Total Loss: 0.0739 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 901

 80%|████████  | 44105/55054 [20:18<05:05, 35.87it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 90316800 | Self Similarity: -0.0019
Sparsity: 39.0 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 90316800 | Self Similarity: 0.0133
Sparsity: 50.2 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 90316800 | Self Similarity: 0.0024
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 90316800 | Self Similarity: -0.0082
Sparsity: 128.5 | Dead Features: 0 | Total Loss: 0.0541 | Reconstruction Loss: 0.0276 | L1 Loss: 0.0265 | l1_alpha: 8.0000e-04 | Tokens: 90316800 | Self Similarity: -0.0019
Sparsity: 152.4 | Dead Features: 0 | Total Loss: 0.0819 | Reconstruction Loss: 0.0401 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 903

 80%|████████  | 44205/55054 [20:20<05:00, 36.13it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 90521600 | Self Similarity: -0.0018
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 90521600 | Self Similarity: 0.0132
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 90521600 | Self Similarity: 0.0025
Sparsity: 114.7 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 90521600 | Self Similarity: -0.0082
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 90521600 | Self Similarity: -0.0017
Sparsity: 144.6 | Dead Features: 0 | Total Loss: 0.0744 | Reconstruction Loss: 0.0359 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 905

 80%|████████  | 44305/55054 [20:23<04:57, 36.07it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 90726400 | Self Similarity: -0.0018
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 90726400 | Self Similarity: 0.0135
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 90726400 | Self Similarity: 0.0024
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 90726400 | Self Similarity: -0.0078
Sparsity: 122.6 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 90726400 | Self Similarity: -0.0020
Sparsity: 143.2 | Dead Features: 0 | Total Loss: 0.0747 | Reconstruction Loss: 0.0350 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 907

 81%|████████  | 44405/55054 [20:26<04:54, 36.12it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 90931200 | Self Similarity: -0.0016
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 90931200 | Self Similarity: 0.0134
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 90931200 | Self Similarity: 0.0023
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 90931200 | Self Similarity: -0.0077
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0508 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 90931200 | Self Similarity: -0.0021
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0761 | Reconstruction Loss: 0.0371 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 909

 81%|████████  | 44505/55054 [20:29<04:51, 36.23it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 91136000 | Self Similarity: -0.0014
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 91136000 | Self Similarity: 0.0133
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 91136000 | Self Similarity: 0.0024
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 91136000 | Self Similarity: -0.0075
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 91136000 | Self Similarity: -0.0019
Sparsity: 148.3 | Dead Features: 0 | Total Loss: 0.0727 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 911

 81%|████████  | 44607/55054 [20:31<04:51, 35.85it/s]

Sparsity: 23.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 91340800 | Self Similarity: -0.0013
Sparsity: 39.3 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 91340800 | Self Similarity: 0.0130
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 91340800 | Self Similarity: 0.0023
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 91340800 | Self Similarity: -0.0076
Sparsity: 123.5 | Dead Features: 0 | Total Loss: 0.0507 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 91340800 | Self Similarity: -0.0019
Sparsity: 154.3 | Dead Features: 0 | Total Loss: 0.0792 | Reconstruction Loss: 0.0384 | L1 Loss: 0.0409 | l1_alpha: 8.0000e-04 | Tokens: 913

 81%|████████  | 44708/55054 [20:34<04:28, 38.53it/s]

Sparsity: 18.5 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 91545600 | Self Similarity: -0.0015
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 91545600 | Self Similarity: 0.0130
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 91545600 | Self Similarity: 0.0026
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 91545600 | Self Similarity: -0.0075
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 91545600 | Self Similarity: -0.0017
Sparsity: 146.1 | Dead Features: 0 | Total Loss: 0.0749 | Reconstruction Loss: 0.0357 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 915

 81%|████████▏ | 44804/55054 [20:37<04:47, 35.69it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 91750400 | Self Similarity: -0.0013
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 91750400 | Self Similarity: 0.0132
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 91750400 | Self Similarity: 0.0022
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 91750400 | Self Similarity: -0.0077
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 91750400 | Self Similarity: -0.0017
Sparsity: 148.6 | Dead Features: 0 | Total Loss: 0.0760 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 917

 82%|████████▏ | 44904/55054 [20:40<04:39, 36.30it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 91955200 | Self Similarity: -0.0012
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 91955200 | Self Similarity: 0.0133
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 91955200 | Self Similarity: 0.0021
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0376 | Reconstruction Loss: 0.0178 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 91955200 | Self Similarity: -0.0076
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 91955200 | Self Similarity: -0.0019
Sparsity: 143.7 | Dead Features: 0 | Total Loss: 0.0726 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 919

 82%|████████▏ | 45004/55054 [20:42<04:41, 35.73it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 92160000 | Self Similarity: -0.0011
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 92160000 | Self Similarity: 0.0131
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 92160000 | Self Similarity: 0.0021
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 92160000 | Self Similarity: -0.0076
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 92160000 | Self Similarity: -0.0017
Sparsity: 145.3 | Dead Features: 0 | Total Loss: 0.0761 | Reconstruction Loss: 0.0372 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 921

 82%|████████▏ | 45104/55054 [20:45<04:35, 36.16it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 92364800 | Self Similarity: -0.0008
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 92364800 | Self Similarity: 0.0132
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 92364800 | Self Similarity: 0.0021
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 92364800 | Self Similarity: -0.0073
Sparsity: 121.9 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 92364800 | Self Similarity: -0.0015
Sparsity: 146.7 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0345 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 923

 82%|████████▏ | 45204/55054 [20:48<04:32, 36.16it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 92569600 | Self Similarity: -0.0007
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 92569600 | Self Similarity: 0.0131
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 92569600 | Self Similarity: 0.0023
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 92569600 | Self Similarity: -0.0076
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 92569600 | Self Similarity: -0.0017
Sparsity: 144.7 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0345 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 925

 82%|████████▏ | 45304/55054 [20:51<04:28, 36.26it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 92774400 | Self Similarity: -0.0009
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 92774400 | Self Similarity: 0.0132
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 92774400 | Self Similarity: 0.0023
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 92774400 | Self Similarity: -0.0078
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 92774400 | Self Similarity: -0.0019
Sparsity: 146.0 | Dead Features: 0 | Total Loss: 0.0737 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 927

 82%|████████▏ | 45404/55054 [20:53<04:31, 35.53it/s]

Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 92979200 | Self Similarity: -0.0007
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 92979200 | Self Similarity: 0.0134
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 92979200 | Self Similarity: 0.0024
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 92979200 | Self Similarity: -0.0075
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 92979200 | Self Similarity: -0.0017
Sparsity: 142.4 | Dead Features: 0 | Total Loss: 0.0726 | Reconstruction Loss: 0.0342 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 929

 83%|████████▎ | 45504/55054 [20:56<04:24, 36.13it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 93184000 | Self Similarity: -0.0009
Sparsity: 39.3 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 93184000 | Self Similarity: 0.0134
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 93184000 | Self Similarity: 0.0022
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 93184000 | Self Similarity: -0.0076
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 93184000 | Self Similarity: -0.0019
Sparsity: 144.7 | Dead Features: 0 | Total Loss: 0.0777 | Reconstruction Loss: 0.0395 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 931

 83%|████████▎ | 45604/55054 [20:59<04:21, 36.10it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 93388800 | Self Similarity: -0.0009
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 93388800 | Self Similarity: 0.0135
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 93388800 | Self Similarity: 0.0025
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 93388800 | Self Similarity: -0.0074
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 93388800 | Self Similarity: -0.0018
Sparsity: 142.5 | Dead Features: 0 | Total Loss: 0.0738 | Reconstruction Loss: 0.0351 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 933

 83%|████████▎ | 45704/55054 [21:02<04:21, 35.73it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 93593600 | Self Similarity: -0.0008
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 93593600 | Self Similarity: 0.0134
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 93593600 | Self Similarity: 0.0026
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 93593600 | Self Similarity: -0.0072
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 93593600 | Self Similarity: -0.0019
Sparsity: 148.5 | Dead Features: 0 | Total Loss: 0.0748 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 935

 83%|████████▎ | 45804/55054 [21:04<04:15, 36.24it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 93798400 | Self Similarity: -0.0008
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 93798400 | Self Similarity: 0.0134
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 93798400 | Self Similarity: 0.0024
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 93798400 | Self Similarity: -0.0077
Sparsity: 125.2 | Dead Features: 0 | Total Loss: 0.0524 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0273 | l1_alpha: 8.0000e-04 | Tokens: 93798400 | Self Similarity: -0.0017
Sparsity: 153.8 | Dead Features: 0 | Total Loss: 0.0806 | Reconstruction Loss: 0.0377 | L1 Loss: 0.0429 | l1_alpha: 8.0000e-04 | Tokens: 937

 83%|████████▎ | 45904/55054 [21:07<04:14, 35.91it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 94003200 | Self Similarity: -0.0005
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 94003200 | Self Similarity: 0.0133
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 94003200 | Self Similarity: 0.0025
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 94003200 | Self Similarity: -0.0076
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 94003200 | Self Similarity: -0.0017
Sparsity: 148.3 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 940

 84%|████████▎ | 46004/55054 [21:10<04:11, 35.93it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 94208000 | Self Similarity: -0.0007
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 94208000 | Self Similarity: 0.0132
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 94208000 | Self Similarity: 0.0022
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 94208000 | Self Similarity: -0.0076
Sparsity: 122.7 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 94208000 | Self Similarity: -0.0019
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0880 | Reconstruction Loss: 0.0523 | L1 Loss: 0.0356 | l1_alpha: 8.0000e-04 | Tokens: 942

 84%|████████▎ | 46104/55054 [21:13<04:09, 35.90it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 94412800 | Self Similarity: -0.0011
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 94412800 | Self Similarity: 0.0129
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 94412800 | Self Similarity: 0.0023
Sparsity: 114.3 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 94412800 | Self Similarity: -0.0072
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 94412800 | Self Similarity: -0.0019
Sparsity: 130.8 | Dead Features: 0 | Total Loss: 0.0757 | Reconstruction Loss: 0.0376 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 944

 84%|████████▍ | 46204/55054 [21:15<04:07, 35.77it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 94617600 | Self Similarity: -0.0012
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 94617600 | Self Similarity: 0.0129
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 94617600 | Self Similarity: 0.0023
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 94617600 | Self Similarity: -0.0074
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 94617600 | Self Similarity: -0.0020
Sparsity: 137.6 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0355 | L1 Loss: 0.0377 | l1_alpha: 8.0000e-04 | Tokens: 946

 84%|████████▍ | 46305/55054 [21:18<04:00, 36.35it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 94822400 | Self Similarity: -0.0010
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 94822400 | Self Similarity: 0.0129
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 94822400 | Self Similarity: 0.0023
Sparsity: 115.8 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 94822400 | Self Similarity: -0.0072
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 94822400 | Self Similarity: -0.0019
Sparsity: 141.0 | Dead Features: 0 | Total Loss: 0.0747 | Reconstruction Loss: 0.0359 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 948

 84%|████████▍ | 46405/55054 [21:21<03:56, 36.60it/s]

Sparsity: 26.3 | Dead Features: 0 | Total Loss: 0.0160 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 95027200 | Self Similarity: -0.0009
Sparsity: 40.3 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 95027200 | Self Similarity: 0.0130
Sparsity: 52.0 | Dead Features: 0 | Total Loss: 0.0215 | Reconstruction Loss: 0.0133 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 95027200 | Self Similarity: 0.0022
Sparsity: 122.8 | Dead Features: 0 | Total Loss: 0.0423 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 95027200 | Self Similarity: -0.0073
Sparsity: 128.2 | Dead Features: 0 | Total Loss: 0.0529 | Reconstruction Loss: 0.0268 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 95027200 | Self Similarity: -0.0019
Sparsity: 149.6 | Dead Features: 0 | Total Loss: 0.0799 | Reconstruction Loss: 0.0385 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 950

 84%|████████▍ | 46505/55054 [21:24<03:28, 41.05it/s]

Sparsity: 26.0 | Dead Features: 0 | Total Loss: 0.0161 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 95232000 | Self Similarity: -0.0005
Sparsity: 39.6 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 95232000 | Self Similarity: 0.0132
Sparsity: 51.4 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 95232000 | Self Similarity: 0.0021
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 95232000 | Self Similarity: -0.0072
Sparsity: 126.2 | Dead Features: 0 | Total Loss: 0.0524 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0260 | l1_alpha: 8.0000e-04 | Tokens: 95232000 | Self Similarity: -0.0018
Sparsity: 152.5 | Dead Features: 0 | Total Loss: 0.0804 | Reconstruction Loss: 0.0386 | L1 Loss: 0.0417 | l1_alpha: 8.0000e-04 | Tokens: 952

 85%|████████▍ | 46607/55054 [21:26<03:58, 35.48it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 95436800 | Self Similarity: -0.0004
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 95436800 | Self Similarity: 0.0131
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 95436800 | Self Similarity: 0.0021
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 95436800 | Self Similarity: -0.0074
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 95436800 | Self Similarity: -0.0020
Sparsity: 148.6 | Dead Features: 0 | Total Loss: 0.0746 | Reconstruction Loss: 0.0357 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 954

 85%|████████▍ | 46707/55054 [21:29<03:53, 35.68it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 95641600 | Self Similarity: -0.0005
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 95641600 | Self Similarity: 0.0130
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 95641600 | Self Similarity: 0.0022
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 95641600 | Self Similarity: -0.0072
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 95641600 | Self Similarity: -0.0019
Sparsity: 146.6 | Dead Features: 0 | Total Loss: 0.0708 | Reconstruction Loss: 0.0332 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens: 956

 85%|████████▌ | 46807/55054 [21:32<03:53, 35.30it/s]

Sparsity: 23.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 95846400 | Self Similarity: -0.0004
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 95846400 | Self Similarity: 0.0129
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 95846400 | Self Similarity: 0.0019
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 95846400 | Self Similarity: -0.0070
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 95846400 | Self Similarity: -0.0018
Sparsity: 145.5 | Dead Features: 0 | Total Loss: 0.0724 | Reconstruction Loss: 0.0342 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 958

 85%|████████▌ | 46907/55054 [21:35<03:46, 35.98it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 96051200 | Self Similarity: -0.0002
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 96051200 | Self Similarity: 0.0131
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 96051200 | Self Similarity: 0.0020
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 96051200 | Self Similarity: -0.0075
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0515 | Reconstruction Loss: 0.0269 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 96051200 | Self Similarity: -0.0018
Sparsity: 147.9 | Dead Features: 0 | Total Loss: 0.0792 | Reconstruction Loss: 0.0390 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 960

 85%|████████▌ | 47004/55054 [21:37<03:42, 36.17it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 96256000 | Self Similarity: -0.0002
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 96256000 | Self Similarity: 0.0128
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 96256000 | Self Similarity: 0.0019
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 96256000 | Self Similarity: -0.0076
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 96256000 | Self Similarity: -0.0018
Sparsity: 148.9 | Dead Features: 0 | Total Loss: 0.0762 | Reconstruction Loss: 0.0368 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 962

 86%|████████▌ | 47104/55054 [21:40<03:41, 35.82it/s]

Sparsity: 24.0 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 96460800 | Self Similarity: -0.0002
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 96460800 | Self Similarity: 0.0131
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 96460800 | Self Similarity: 0.0025
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 96460800 | Self Similarity: -0.0072
Sparsity: 125.0 | Dead Features: 0 | Total Loss: 0.0519 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 96460800 | Self Similarity: -0.0015
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0896 | Reconstruction Loss: 0.0516 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 964

 86%|████████▌ | 47204/55054 [21:43<03:39, 35.81it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 96665600 | Self Similarity: -0.0007
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 96665600 | Self Similarity: 0.0133
Sparsity: 45.3 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 96665600 | Self Similarity: 0.0025
Sparsity: 114.5 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 96665600 | Self Similarity: -0.0072
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 96665600 | Self Similarity: -0.0018
Sparsity: 135.4 | Dead Features: 0 | Total Loss: 0.0759 | Reconstruction Loss: 0.0380 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 966

 86%|████████▌ | 47304/55054 [21:45<03:37, 35.68it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 96870400 | Self Similarity: -0.0008
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 96870400 | Self Similarity: 0.0132
Sparsity: 45.9 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 96870400 | Self Similarity: 0.0023
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 96870400 | Self Similarity: -0.0071
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 96870400 | Self Similarity: -0.0015
Sparsity: 142.4 | Dead Features: 0 | Total Loss: 0.0740 | Reconstruction Loss: 0.0357 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 968

 86%|████████▌ | 47404/55054 [21:48<03:33, 35.86it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 97075200 | Self Similarity: -0.0007
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 97075200 | Self Similarity: 0.0131
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 97075200 | Self Similarity: 0.0026
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 97075200 | Self Similarity: -0.0074
Sparsity: 123.0 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 97075200 | Self Similarity: -0.0018
Sparsity: 141.3 | Dead Features: 0 | Total Loss: 0.0743 | Reconstruction Loss: 0.0357 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 970

 86%|████████▋ | 47504/55054 [21:51<03:29, 36.12it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 97280000 | Self Similarity: -0.0007
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 97280000 | Self Similarity: 0.0129
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 97280000 | Self Similarity: 0.0025
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 97280000 | Self Similarity: -0.0073
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 97280000 | Self Similarity: -0.0018
Sparsity: 142.6 | Dead Features: 0 | Total Loss: 0.0727 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 972

 86%|████████▋ | 47604/55054 [21:54<03:29, 35.64it/s]

Sparsity: 19.0 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 97484800 | Self Similarity: -0.0005
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 97484800 | Self Similarity: 0.0131
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 97484800 | Self Similarity: 0.0024
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 97484800 | Self Similarity: -0.0072
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 97484800 | Self Similarity: -0.0019
Sparsity: 147.4 | Dead Features: 0 | Total Loss: 0.0745 | Reconstruction Loss: 0.0351 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 974

 87%|████████▋ | 47704/55054 [21:57<03:22, 36.35it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 97689600 | Self Similarity: -0.0005
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 97689600 | Self Similarity: 0.0129
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 97689600 | Self Similarity: 0.0022
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 97689600 | Self Similarity: -0.0076
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 97689600 | Self Similarity: -0.0021
Sparsity: 149.5 | Dead Features: 0 | Total Loss: 0.0753 | Reconstruction Loss: 0.0358 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 976

 87%|████████▋ | 47804/55054 [21:59<03:21, 36.04it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 97894400 | Self Similarity: -0.0003
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 97894400 | Self Similarity: 0.0129
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 97894400 | Self Similarity: 0.0022
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 97894400 | Self Similarity: -0.0073
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 97894400 | Self Similarity: -0.0018
Sparsity: 150.0 | Dead Features: 0 | Total Loss: 0.0744 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 978

 87%|████████▋ | 47904/55054 [22:02<03:18, 36.10it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 98099200 | Self Similarity: -0.0004
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 98099200 | Self Similarity: 0.0132
Sparsity: 45.9 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 98099200 | Self Similarity: 0.0023
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 98099200 | Self Similarity: -0.0073
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0461 | Reconstruction Loss: 0.0229 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 98099200 | Self Similarity: -0.0019
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0690 | Reconstruction Loss: 0.0321 | L1 Loss: 0.0369 | l1_alpha: 8.0000e-04 | Tokens: 980

 87%|████████▋ | 48004/55054 [22:05<03:16, 35.97it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 98304000 | Self Similarity: -0.0003
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 98304000 | Self Similarity: 0.0133
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 98304000 | Self Similarity: 0.0026
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 98304000 | Self Similarity: -0.0072
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 98304000 | Self Similarity: -0.0020
Sparsity: 153.6 | Dead Features: 0 | Total Loss: 0.0761 | Reconstruction Loss: 0.0353 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 983

 87%|████████▋ | 48104/55054 [22:08<03:11, 36.27it/s]

Sparsity: 19.2 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 98508800 | Self Similarity: -0.0007
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 98508800 | Self Similarity: 0.0133
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 98508800 | Self Similarity: 0.0027
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 98508800 | Self Similarity: -0.0071
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 98508800 | Self Similarity: -0.0019
Sparsity: 264.8 | Dead Features: 0 | Total Loss: 0.4707 | Reconstruction Loss: 0.3213 | L1 Loss: 0.1495 | l1_alpha: 8.0000e-04 | Tokens: 985

 88%|████████▊ | 48204/55054 [22:10<03:11, 35.72it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 98713600 | Self Similarity: -0.0008
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 98713600 | Self Similarity: 0.0133
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 98713600 | Self Similarity: 0.0024
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 98713600 | Self Similarity: -0.0073
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0501 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 98713600 | Self Similarity: -0.0017
Sparsity: 95.3 | Dead Features: 0 | Total Loss: 0.0854 | Reconstruction Loss: 0.0517 | L1 Loss: 0.0337 | l1_alpha: 8.0000e-04 | Tokens: 9871

 88%|████████▊ | 48304/55054 [22:13<03:09, 35.55it/s]

Sparsity: 23.3 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 98918400 | Self Similarity: -0.0020
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 98918400 | Self Similarity: 0.0132
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 98918400 | Self Similarity: 0.0024
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 98918400 | Self Similarity: -0.0074
Sparsity: 124.3 | Dead Features: 0 | Total Loss: 0.0501 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 98918400 | Self Similarity: -0.0019
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0802 | Reconstruction Loss: 0.0434 | L1 Loss: 0.0368 | l1_alpha: 8.0000e-04 | Tokens: 989

 88%|████████▊ | 48404/55054 [22:16<03:04, 36.06it/s]

Sparsity: 23.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 99123200 | Self Similarity: -0.0019
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 99123200 | Self Similarity: 0.0131
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 99123200 | Self Similarity: 0.0022
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 99123200 | Self Similarity: -0.0072
Sparsity: 124.1 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 99123200 | Self Similarity: -0.0014
Sparsity: 124.0 | Dead Features: 0 | Total Loss: 0.0793 | Reconstruction Loss: 0.0415 | L1 Loss: 0.0378 | l1_alpha: 8.0000e-04 | Tokens: 991

 88%|████████▊ | 48504/55054 [22:19<03:02, 35.82it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 99328000 | Self Similarity: -0.0017
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 99328000 | Self Similarity: 0.0130
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 99328000 | Self Similarity: 0.0022
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 99328000 | Self Similarity: -0.0072
Sparsity: 122.6 | Dead Features: 0 | Total Loss: 0.0511 | Reconstruction Loss: 0.0266 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 99328000 | Self Similarity: -0.0018
Sparsity: 132.7 | Dead Features: 0 | Total Loss: 0.0796 | Reconstruction Loss: 0.0410 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 993

 88%|████████▊ | 48604/55054 [22:21<03:00, 35.73it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 99532800 | Self Similarity: -0.0016
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 99532800 | Self Similarity: 0.0131
Sparsity: 49.1 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 99532800 | Self Similarity: 0.0021
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 99532800 | Self Similarity: -0.0070
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0512 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 99532800 | Self Similarity: -0.0018
Sparsity: 134.8 | Dead Features: 0 | Total Loss: 0.0792 | Reconstruction Loss: 0.0398 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 995

 88%|████████▊ | 48704/55054 [22:24<02:56, 35.94it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 99737600 | Self Similarity: -0.0014
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 99737600 | Self Similarity: 0.0131
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 99737600 | Self Similarity: 0.0022
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 99737600 | Self Similarity: -0.0070
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 99737600 | Self Similarity: -0.0019
Sparsity: 131.0 | Dead Features: 0 | Total Loss: 0.0710 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0361 | l1_alpha: 8.0000e-04 | Tokens: 997

 89%|████████▊ | 48805/55054 [22:27<02:54, 35.87it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 99942400 | Self Similarity: -0.0013
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 99942400 | Self Similarity: 0.0130
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 99942400 | Self Similarity: 0.0021
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 99942400 | Self Similarity: -0.0070
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 99942400 | Self Similarity: -0.0018
Sparsity: 134.5 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0353 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 999

 89%|████████▉ | 48905/55054 [22:30<02:48, 36.51it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 100147200 | Self Similarity: -0.0013
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 100147200 | Self Similarity: 0.0128
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 100147200 | Self Similarity: 0.0025
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 100147200 | Self Similarity: -0.0070
Sparsity: 121.6 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 100147200 | Self Similarity: -0.0016
Sparsity: 138.1 | Dead Features: 0 | Total Loss: 0.0740 | Reconstruction Loss: 0.0355 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens

 89%|████████▉ | 49006/55054 [22:32<02:47, 36.03it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 100352000 | Self Similarity: -0.0010
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 100352000 | Self Similarity: 0.0128
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 100352000 | Self Similarity: 0.0024
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 100352000 | Self Similarity: -0.0072
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 100352000 | Self Similarity: -0.0018
Sparsity: 141.3 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0352 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens

 89%|████████▉ | 49105/55054 [22:35<02:44, 36.16it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 100556800 | Self Similarity: -0.0009
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 100556800 | Self Similarity: 0.0128
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 100556800 | Self Similarity: 0.0021
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 100556800 | Self Similarity: -0.0069
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0502 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 100556800 | Self Similarity: -0.0017
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0778 | Reconstruction Loss: 0.0378 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens

 89%|████████▉ | 49205/55054 [22:38<02:43, 35.76it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 100761600 | Self Similarity: -0.0007
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 100761600 | Self Similarity: 0.0130
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 100761600 | Self Similarity: 0.0024
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 100761600 | Self Similarity: -0.0071
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 100761600 | Self Similarity: -0.0017
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0755 | Reconstruction Loss: 0.0360 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens

 90%|████████▉ | 49306/55054 [22:40<02:37, 36.59it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 100966400 | Self Similarity: -0.0008
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 100966400 | Self Similarity: 0.0130
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 100966400 | Self Similarity: 0.0025
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 100966400 | Self Similarity: -0.0069
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0231 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 100966400 | Self Similarity: -0.0018
Sparsity: 140.3 | Dead Features: 0 | Total Loss: 0.0700 | Reconstruction Loss: 0.0328 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens

 90%|████████▉ | 49406/55054 [22:43<02:38, 35.71it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 101171200 | Self Similarity: -0.0009
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 101171200 | Self Similarity: 0.0133
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 101171200 | Self Similarity: 0.0023
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 101171200 | Self Similarity: -0.0070
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 101171200 | Self Similarity: -0.0017
Sparsity: 144.8 | Dead Features: 0 | Total Loss: 0.0733 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens

 90%|████████▉ | 49506/55054 [22:46<02:33, 36.08it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 101376000 | Self Similarity: -0.0010
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 101376000 | Self Similarity: 0.0128
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 101376000 | Self Similarity: 0.0023
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 101376000 | Self Similarity: -0.0069
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 101376000 | Self Similarity: -0.0019
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0741 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens

 90%|█████████ | 49606/55054 [22:49<02:34, 35.17it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 101580800 | Self Similarity: -0.0010
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 101580800 | Self Similarity: 0.0128
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 101580800 | Self Similarity: 0.0023
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 101580800 | Self Similarity: -0.0070
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0229 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 101580800 | Self Similarity: -0.0013
Sparsity: 142.3 | Dead Features: 0 | Total Loss: 0.0708 | Reconstruction Loss: 0.0326 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens

 90%|█████████ | 49706/55054 [22:52<02:28, 36.12it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 101785600 | Self Similarity: -0.0010
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 101785600 | Self Similarity: 0.0129
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 101785600 | Self Similarity: 0.0022
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 101785600 | Self Similarity: -0.0070
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 101785600 | Self Similarity: -0.0016
Sparsity: 145.5 | Dead Features: 0 | Total Loss: 0.0725 | Reconstruction Loss: 0.0337 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens

 90%|█████████ | 49806/55054 [22:54<02:27, 35.69it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 101990400 | Self Similarity: -0.0010
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 101990400 | Self Similarity: 0.0126
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 101990400 | Self Similarity: 0.0020
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 101990400 | Self Similarity: -0.0069
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 101990400 | Self Similarity: -0.0016
Sparsity: 149.1 | Dead Features: 0 | Total Loss: 0.0782 | Reconstruction Loss: 0.0377 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens

 91%|█████████ | 49906/55054 [22:57<02:27, 34.82it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 102195200 | Self Similarity: -0.0010
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 102195200 | Self Similarity: 0.0128
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 102195200 | Self Similarity: 0.0021
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 102195200 | Self Similarity: -0.0067
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0465 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 102195200 | Self Similarity: -0.0015
Sparsity: 145.3 | Dead Features: 0 | Total Loss: 0.0712 | Reconstruction Loss: 0.0330 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens

 91%|█████████ | 50006/55054 [23:00<02:24, 34.97it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 102400000 | Self Similarity: -0.0009
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 102400000 | Self Similarity: 0.0128
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 102400000 | Self Similarity: 0.0021
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 102400000 | Self Similarity: -0.0072
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 102400000 | Self Similarity: -0.0013
Sparsity: 149.6 | Dead Features: 0 | Total Loss: 0.0739 | Reconstruction Loss: 0.0351 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens

 91%|█████████ | 50106/55054 [23:03<02:22, 34.79it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 102604800 | Self Similarity: -0.0007
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 102604800 | Self Similarity: 0.0126
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 102604800 | Self Similarity: 0.0022
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 102604800 | Self Similarity: -0.0068
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 102604800 | Self Similarity: -0.0018
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0725 | Reconstruction Loss: 0.0343 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens

 91%|█████████ | 50206/55054 [23:06<02:20, 34.49it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 102809600 | Self Similarity: -0.0005
Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 102809600 | Self Similarity: 0.0126
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 102809600 | Self Similarity: 0.0022
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 102809600 | Self Similarity: -0.0069
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0504 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 102809600 | Self Similarity: -0.0016
Sparsity: 153.3 | Dead Features: 0 | Total Loss: 0.0774 | Reconstruction Loss: 0.0365 | L1 Loss: 0.0409 | l1_alpha: 8.0000e-04 | Tokens

 91%|█████████▏| 50305/55054 [23:08<01:56, 40.90it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 103014400 | Self Similarity: -0.0005
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 103014400 | Self Similarity: 0.0127
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 103014400 | Self Similarity: 0.0023
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 103014400 | Self Similarity: -0.0068
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 103014400 | Self Similarity: -0.0018
Sparsity: 151.0 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0341 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens

 92%|█████████▏| 50407/55054 [23:11<02:09, 35.95it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 103219200 | Self Similarity: -0.0005
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 103219200 | Self Similarity: 0.0129
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 103219200 | Self Similarity: 0.0023
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 103219200 | Self Similarity: -0.0071
Sparsity: 124.5 | Dead Features: 0 | Total Loss: 0.0504 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 103219200 | Self Similarity: -0.0018
Sparsity: 156.5 | Dead Features: 0 | Total Loss: 0.0777 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0410 | l1_alpha: 8.0000e-04 | Tokens

 92%|█████████▏| 50507/55054 [23:14<02:06, 35.94it/s]

Sparsity: 24.8 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 103424000 | Self Similarity: -0.0004
Sparsity: 40.2 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 103424000 | Self Similarity: 0.0126
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0212 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 103424000 | Self Similarity: 0.0023
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 103424000 | Self Similarity: -0.0072
Sparsity: 127.6 | Dead Features: 0 | Total Loss: 0.0530 | Reconstruction Loss: 0.0269 | L1 Loss: 0.0261 | l1_alpha: 8.0000e-04 | Tokens: 103424000 | Self Similarity: -0.0015
Sparsity: 157.2 | Dead Features: 0 | Total Loss: 0.0787 | Reconstruction Loss: 0.0370 | L1 Loss: 0.0416 | l1_alpha: 8.0000e-04 | Tokens

 92%|█████████▏| 50607/55054 [23:17<02:03, 36.04it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 103628800 | Self Similarity: -0.0005
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 103628800 | Self Similarity: 0.0131
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 103628800 | Self Similarity: 0.0022
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 103628800 | Self Similarity: -0.0070
Sparsity: 122.8 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 103628800 | Self Similarity: -0.0016
Sparsity: 151.0 | Dead Features: 0 | Total Loss: 0.0734 | Reconstruction Loss: 0.0342 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens

 92%|█████████▏| 50707/55054 [23:19<02:01, 35.74it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 103833600 | Self Similarity: -0.0006
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 103833600 | Self Similarity: 0.0130
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 103833600 | Self Similarity: 0.0022
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 103833600 | Self Similarity: -0.0070
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 103833600 | Self Similarity: -0.0012
Sparsity: 150.9 | Dead Features: 0 | Total Loss: 0.0726 | Reconstruction Loss: 0.0339 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens

 92%|█████████▏| 50807/55054 [23:22<01:57, 36.15it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 104038400 | Self Similarity: -0.0005
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 104038400 | Self Similarity: 0.0128
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 104038400 | Self Similarity: 0.0021
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 104038400 | Self Similarity: -0.0072
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 104038400 | Self Similarity: -0.0012
Sparsity: 146.8 | Dead Features: 0 | Total Loss: 0.0704 | Reconstruction Loss: 0.0331 | L1 Loss: 0.0373 | l1_alpha: 8.0000e-04 | Tokens

 92%|█████████▏| 50904/55054 [23:25<01:56, 35.72it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 104243200 | Self Similarity: -0.0007
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 104243200 | Self Similarity: 0.0130
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 104243200 | Self Similarity: 0.0022
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 104243200 | Self Similarity: -0.0071
Sparsity: 123.4 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 104243200 | Self Similarity: -0.0015
Sparsity: 153.0 | Dead Features: 0 | Total Loss: 0.0745 | Reconstruction Loss: 0.0340 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens

 93%|█████████▎| 51004/55054 [23:28<01:52, 35.90it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 104448000 | Self Similarity: -0.0006
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 104448000 | Self Similarity: 0.0128
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 104448000 | Self Similarity: 0.0024
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 104448000 | Self Similarity: -0.0070
Sparsity: 123.3 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 104448000 | Self Similarity: -0.0013
Sparsity: 155.0 | Dead Features: 0 | Total Loss: 0.0760 | Reconstruction Loss: 0.0357 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens

 93%|█████████▎| 51104/55054 [23:30<01:50, 35.59it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 104652800 | Self Similarity: -0.0006
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 104652800 | Self Similarity: 0.0125
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 104652800 | Self Similarity: 0.0021
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 104652800 | Self Similarity: -0.0070
Sparsity: 124.2 | Dead Features: 0 | Total Loss: 0.0512 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 104652800 | Self Similarity: -0.0012
Sparsity: 156.3 | Dead Features: 0 | Total Loss: 0.0781 | Reconstruction Loss: 0.0376 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens

 93%|█████████▎| 51204/55054 [23:33<01:46, 36.10it/s]

Sparsity: 23.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 104857600 | Self Similarity: -0.0004
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 104857600 | Self Similarity: 0.0129
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 104857600 | Self Similarity: 0.0021
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 104857600 | Self Similarity: -0.0070
Sparsity: 124.1 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 104857600 | Self Similarity: -0.0014
Sparsity: 152.8 | Dead Features: 0 | Total Loss: 0.0743 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens

 93%|█████████▎| 51304/55054 [23:36<01:44, 35.77it/s]

Sparsity: 23.5 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 105062400 | Self Similarity: -0.0006
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 105062400 | Self Similarity: 0.0131
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 105062400 | Self Similarity: 0.0022
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 105062400 | Self Similarity: -0.0068
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 105062400 | Self Similarity: -0.0013
Sparsity: 155.3 | Dead Features: 0 | Total Loss: 0.0748 | Reconstruction Loss: 0.0341 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens

 93%|█████████▎| 51404/55054 [23:39<01:40, 36.22it/s]

Sparsity: 25.0 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 105267200 | Self Similarity: -0.0006
Sparsity: 40.1 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 105267200 | Self Similarity: 0.0131
Sparsity: 50.3 | Dead Features: 0 | Total Loss: 0.0209 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 105267200 | Self Similarity: 0.0020
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 105267200 | Self Similarity: -0.0068
Sparsity: 128.1 | Dead Features: 0 | Total Loss: 0.0521 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 105267200 | Self Similarity: -0.0012
Sparsity: 155.7 | Dead Features: 0 | Total Loss: 0.0776 | Reconstruction Loss: 0.0362 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens

 94%|█████████▎| 51505/55054 [23:41<01:39, 35.78it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 105472000 | Self Similarity: -0.0005
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 105472000 | Self Similarity: 0.0130
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 105472000 | Self Similarity: 0.0018
Sparsity: 115.8 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 105472000 | Self Similarity: -0.0070
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 105472000 | Self Similarity: -0.0014
Sparsity: 152.1 | Dead Features: 0 | Total Loss: 0.0750 | Reconstruction Loss: 0.0351 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens

 94%|█████████▎| 51605/55054 [23:44<01:35, 36.04it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 105676800 | Self Similarity: -0.0006
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 105676800 | Self Similarity: 0.0128
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 105676800 | Self Similarity: 0.0021
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 105676800 | Self Similarity: -0.0067
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 105676800 | Self Similarity: -0.0017
Sparsity: 153.0 | Dead Features: 0 | Total Loss: 0.0731 | Reconstruction Loss: 0.0342 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens

 94%|█████████▍| 51705/55054 [23:47<01:32, 36.09it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 105881600 | Self Similarity: -0.0005
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 105881600 | Self Similarity: 0.0129
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 105881600 | Self Similarity: 0.0022
Sparsity: 114.8 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 105881600 | Self Similarity: -0.0068
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 105881600 | Self Similarity: -0.0013
Sparsity: 152.6 | Dead Features: 0 | Total Loss: 0.0729 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens

 94%|█████████▍| 51805/55054 [23:50<01:29, 36.33it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 106086400 | Self Similarity: -0.0004
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 106086400 | Self Similarity: 0.0130
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 106086400 | Self Similarity: 0.0021
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 106086400 | Self Similarity: -0.0074
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0501 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 106086400 | Self Similarity: -0.0013
Sparsity: 149.4 | Dead Features: 0 | Total Loss: 0.0752 | Reconstruction Loss: 0.0352 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens

 94%|█████████▍| 51904/55054 [23:52<01:23, 37.95it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 106291200 | Self Similarity: -0.0006
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 106291200 | Self Similarity: 0.0130
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 106291200 | Self Similarity: 0.0021
Sparsity: 113.2 | Dead Features: 0 | Total Loss: 0.0376 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 106291200 | Self Similarity: -0.0071
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 106291200 | Self Similarity: -0.0014
Sparsity: 152.5 | Dead Features: 0 | Total Loss: 0.0725 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens

 94%|█████████▍| 52004/55054 [23:55<01:25, 35.77it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 106496000 | Self Similarity: -0.0005
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 106496000 | Self Similarity: 0.0129
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 106496000 | Self Similarity: 0.0020
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 106496000 | Self Similarity: -0.0073
Sparsity: 124.4 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 106496000 | Self Similarity: -0.0013
Sparsity: 154.2 | Dead Features: 0 | Total Loss: 0.0765 | Reconstruction Loss: 0.0359 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens

 95%|█████████▍| 52104/55054 [23:58<01:21, 35.99it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 106700800 | Self Similarity: -0.0006
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 106700800 | Self Similarity: 0.0130
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 106700800 | Self Similarity: 0.0024
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 106700800 | Self Similarity: -0.0072
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 106700800 | Self Similarity: -0.0015
Sparsity: 149.7 | Dead Features: 0 | Total Loss: 0.0704 | Reconstruction Loss: 0.0325 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens

 95%|█████████▍| 52206/55054 [24:01<01:19, 35.76it/s]

Sparsity: 18.8 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 106905600 | Self Similarity: -0.0004
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 106905600 | Self Similarity: 0.0130
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 106905600 | Self Similarity: 0.0023
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 106905600 | Self Similarity: -0.0071
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 106905600 | Self Similarity: -0.0013
Sparsity: 132.7 | Dead Features: 0 | Total Loss: 0.0800 | Reconstruction Loss: 0.0429 | L1 Loss: 0.0371 | l1_alpha: 8.0000e-04 | Tokens

 95%|█████████▌| 52307/55054 [24:03<01:09, 39.60it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 107110400 | Self Similarity: -0.0008
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 107110400 | Self Similarity: 0.0131
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 107110400 | Self Similarity: 0.0026
Sparsity: 114.4 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 107110400 | Self Similarity: -0.0071
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 107110400 | Self Similarity: -0.0011
Sparsity: 142.8 | Dead Features: 0 | Total Loss: 0.0737 | Reconstruction Loss: 0.0352 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens

 95%|█████████▌| 52406/55054 [24:06<01:14, 35.63it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 107315200 | Self Similarity: -0.0008
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 107315200 | Self Similarity: 0.0129
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 107315200 | Self Similarity: 0.0025
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 107315200 | Self Similarity: -0.0070
Sparsity: 123.3 | Dead Features: 0 | Total Loss: 0.0498 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 107315200 | Self Similarity: -0.0011
Sparsity: 146.3 | Dead Features: 0 | Total Loss: 0.0730 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens

 95%|█████████▌| 52506/55054 [24:09<01:11, 35.87it/s]

Sparsity: 23.4 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 107520000 | Self Similarity: -0.0007
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 107520000 | Self Similarity: 0.0129
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 107520000 | Self Similarity: 0.0025
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 107520000 | Self Similarity: -0.0071
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 107520000 | Self Similarity: -0.0013
Sparsity: 148.9 | Dead Features: 0 | Total Loss: 0.0755 | Reconstruction Loss: 0.0363 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens

 96%|█████████▌| 52606/55054 [24:11<01:07, 36.21it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 107724800 | Self Similarity: -0.0005
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 107724800 | Self Similarity: 0.0133
Sparsity: 46.4 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 107724800 | Self Similarity: 0.0024
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 107724800 | Self Similarity: -0.0069
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 107724800 | Self Similarity: -0.0013
Sparsity: 147.6 | Dead Features: 0 | Total Loss: 0.0702 | Reconstruction Loss: 0.0333 | L1 Loss: 0.0370 | l1_alpha: 8.0000e-04 | Tokens

 96%|█████████▌| 52706/55054 [24:14<01:04, 36.40it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 107929600 | Self Similarity: -0.0004
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 107929600 | Self Similarity: 0.0132
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 107929600 | Self Similarity: 0.0024
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 107929600 | Self Similarity: -0.0070
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0504 | Reconstruction Loss: 0.0262 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 107929600 | Self Similarity: -0.0012
Sparsity: 150.9 | Dead Features: 0 | Total Loss: 0.0780 | Reconstruction Loss: 0.0380 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens

 96%|█████████▌| 52806/55054 [24:17<01:02, 36.05it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 108134400 | Self Similarity: -0.0005
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 108134400 | Self Similarity: 0.0132
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 108134400 | Self Similarity: 0.0024
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 108134400 | Self Similarity: -0.0070
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 108134400 | Self Similarity: -0.0011
Sparsity: 133.6 | Dead Features: 0 | Total Loss: 0.0742 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0374 | l1_alpha: 8.0000e-04 | Tokens

 96%|█████████▌| 52906/55054 [24:20<00:58, 36.83it/s]

Sparsity: 19.5 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 108339200 | Self Similarity: -0.0009
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 108339200 | Self Similarity: 0.0130
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 108339200 | Self Similarity: 0.0022
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 108339200 | Self Similarity: -0.0071
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 108339200 | Self Similarity: -0.0009
Sparsity: 144.1 | Dead Features: 0 | Total Loss: 0.0747 | Reconstruction Loss: 0.0352 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens

 96%|█████████▋| 53007/55054 [24:22<00:56, 36.02it/s]

Sparsity: 25.6 | Dead Features: 0 | Total Loss: 0.0159 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 108544000 | Self Similarity: -0.0006
Sparsity: 40.9 | Dead Features: 0 | Total Loss: 0.0161 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 108544000 | Self Similarity: 0.0132
Sparsity: 50.7 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 108544000 | Self Similarity: 0.0021
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0431 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 108544000 | Self Similarity: -0.0072
Sparsity: 126.9 | Dead Features: 0 | Total Loss: 0.0561 | Reconstruction Loss: 0.0277 | L1 Loss: 0.0283 | l1_alpha: 8.0000e-04 | Tokens: 108544000 | Self Similarity: -0.0012
Sparsity: 149.4 | Dead Features: 0 | Total Loss: 0.0878 | Reconstruction Loss: 0.0407 | L1 Loss: 0.0472 | l1_alpha: 8.0000e-04 | Tokens

 96%|█████████▋| 53107/55054 [24:25<00:54, 35.78it/s]

Sparsity: 19.5 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 108748800 | Self Similarity: -0.0004
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 108748800 | Self Similarity: 0.0132
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 108748800 | Self Similarity: 0.0021
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 108748800 | Self Similarity: -0.0074
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 108748800 | Self Similarity: -0.0013
Sparsity: 144.0 | Dead Features: 0 | Total Loss: 0.0729 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens

 97%|█████████▋| 53207/55054 [24:28<00:51, 35.90it/s]

Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 108953600 | Self Similarity: -0.0005
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 108953600 | Self Similarity: 0.0132
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 108953600 | Self Similarity: 0.0024
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 108953600 | Self Similarity: -0.0071
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0464 | Reconstruction Loss: 0.0230 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 108953600 | Self Similarity: -0.0013
Sparsity: 147.2 | Dead Features: 0 | Total Loss: 0.0710 | Reconstruction Loss: 0.0334 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens

 97%|█████████▋| 53307/55054 [24:31<00:48, 36.00it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 109158400 | Self Similarity: -0.0004
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 109158400 | Self Similarity: 0.0129
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 109158400 | Self Similarity: 0.0023
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 109158400 | Self Similarity: -0.0069
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 109158400 | Self Similarity: -0.0010
Sparsity: 148.1 | Dead Features: 0 | Total Loss: 0.0707 | Reconstruction Loss: 0.0329 | L1 Loss: 0.0378 | l1_alpha: 8.0000e-04 | Tokens

 97%|█████████▋| 53407/55054 [24:34<00:46, 35.75it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 109363200 | Self Similarity: -0.0004
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 109363200 | Self Similarity: 0.0127
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 109363200 | Self Similarity: 0.0019
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 109363200 | Self Similarity: -0.0066
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0498 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 109363200 | Self Similarity: -0.0009
Sparsity: 157.3 | Dead Features: 0 | Total Loss: 0.0818 | Reconstruction Loss: 0.0392 | L1 Loss: 0.0426 | l1_alpha: 8.0000e-04 | Tokens

 97%|█████████▋| 53507/55054 [24:36<00:42, 36.01it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 109568000 | Self Similarity: -0.0004
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 109568000 | Self Similarity: 0.0130
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 109568000 | Self Similarity: 0.0021
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 109568000 | Self Similarity: -0.0067
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0461 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 109568000 | Self Similarity: -0.0006
Sparsity: 99.7 | Dead Features: 0 | Total Loss: 0.0793 | Reconstruction Loss: 0.0478 | L1 Loss: 0.0315 | l1_alpha: 8.0000e-04 | Tokens:

 97%|█████████▋| 53607/55054 [24:39<00:40, 36.03it/s]

Sparsity: 19.3 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 109772800 | Self Similarity: -0.0016
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 109772800 | Self Similarity: 0.0130
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 109772800 | Self Similarity: 0.0023
Sparsity: 114.8 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 109772800 | Self Similarity: -0.0066
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 109772800 | Self Similarity: -0.0009
Sparsity: 129.0 | Dead Features: 0 | Total Loss: 0.0734 | Reconstruction Loss: 0.0364 | L1 Loss: 0.0370 | l1_alpha: 8.0000e-04 | Tokens

 98%|█████████▊| 53707/55054 [24:42<00:37, 35.52it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 109977600 | Self Similarity: -0.0013
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 109977600 | Self Similarity: 0.0130
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 109977600 | Self Similarity: 0.0024
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 109977600 | Self Similarity: -0.0065
Sparsity: 125.0 | Dead Features: 0 | Total Loss: 0.0524 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0264 | l1_alpha: 8.0000e-04 | Tokens: 109977600 | Self Similarity: -0.0004
Sparsity: 142.9 | Dead Features: 0 | Total Loss: 0.0787 | Reconstruction Loss: 0.0386 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens

 98%|█████████▊| 53807/55054 [24:45<00:34, 36.08it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 110182400 | Self Similarity: -0.0011
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 110182400 | Self Similarity: 0.0128
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 110182400 | Self Similarity: 0.0025
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 110182400 | Self Similarity: -0.0063
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 110182400 | Self Similarity: -0.0008
Sparsity: 136.9 | Dead Features: 0 | Total Loss: 0.0731 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens

 98%|█████████▊| 53907/55054 [24:47<00:31, 35.94it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 110387200 | Self Similarity: -0.0009
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 110387200 | Self Similarity: 0.0128
Sparsity: 49.1 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 110387200 | Self Similarity: 0.0024
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 110387200 | Self Similarity: -0.0066
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 110387200 | Self Similarity: -0.0010
Sparsity: 141.6 | Dead Features: 0 | Total Loss: 0.0747 | Reconstruction Loss: 0.0356 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens

 98%|█████████▊| 54007/55054 [24:50<00:28, 36.43it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 110592000 | Self Similarity: -0.0009
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 110592000 | Self Similarity: 0.0128
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 110592000 | Self Similarity: 0.0020
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 110592000 | Self Similarity: -0.0071
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 110592000 | Self Similarity: -0.0010
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0753 | Reconstruction Loss: 0.0362 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens

 98%|█████████▊| 54105/55054 [24:53<00:26, 35.88it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 110796800 | Self Similarity: -0.0008
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 110796800 | Self Similarity: 0.0129
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 110796800 | Self Similarity: 0.0024
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 110796800 | Self Similarity: -0.0070
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 110796800 | Self Similarity: -0.0009
Sparsity: 145.4 | Dead Features: 0 | Total Loss: 0.0758 | Reconstruction Loss: 0.0356 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens

 98%|█████████▊| 54205/55054 [24:56<00:23, 35.82it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 111001600 | Self Similarity: -0.0009
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 111001600 | Self Similarity: 0.0131
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 111001600 | Self Similarity: 0.0024
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 111001600 | Self Similarity: -0.0068
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 111001600 | Self Similarity: -0.0011
Sparsity: 144.0 | Dead Features: 0 | Total Loss: 0.0746 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0398 | l1_alpha: 8.0000e-04 | Tokens

 99%|█████████▊| 54305/55054 [24:58<00:20, 35.99it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 111206400 | Self Similarity: -0.0007
Sparsity: 39.0 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 111206400 | Self Similarity: 0.0133
Sparsity: 50.6 | Dead Features: 0 | Total Loss: 0.0209 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 111206400 | Self Similarity: 0.0023
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 111206400 | Self Similarity: -0.0070
Sparsity: 126.3 | Dead Features: 0 | Total Loss: 0.0528 | Reconstruction Loss: 0.0265 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 111206400 | Self Similarity: -0.0012
Sparsity: 152.9 | Dead Features: 0 | Total Loss: 0.0803 | Reconstruction Loss: 0.0388 | L1 Loss: 0.0415 | l1_alpha: 8.0000e-04 | Tokens

 99%|█████████▉| 54405/55054 [25:01<00:18, 36.05it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 111411200 | Self Similarity: -0.0007
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 111411200 | Self Similarity: 0.0130
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 111411200 | Self Similarity: 0.0021
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 111411200 | Self Similarity: -0.0067
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 111411200 | Self Similarity: -0.0009
Sparsity: 148.0 | Dead Features: 0 | Total Loss: 0.0705 | Reconstruction Loss: 0.0326 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens

 99%|█████████▉| 54505/55054 [25:04<00:15, 35.57it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 111616000 | Self Similarity: -0.0003
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 111616000 | Self Similarity: 0.0130
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 111616000 | Self Similarity: 0.0020
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 111616000 | Self Similarity: -0.0068
Sparsity: 123.4 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 111616000 | Self Similarity: -0.0011
Sparsity: 155.3 | Dead Features: 0 | Total Loss: 0.0768 | Reconstruction Loss: 0.0364 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens

 99%|█████████▉| 54605/55054 [25:07<00:12, 35.44it/s]

Sparsity: 23.1 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 111820800 | Self Similarity: -0.0003
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 111820800 | Self Similarity: 0.0128
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 111820800 | Self Similarity: 0.0023
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 111820800 | Self Similarity: -0.0070
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 111820800 | Self Similarity: -0.0012
Sparsity: 148.2 | Dead Features: 0 | Total Loss: 0.0737 | Reconstruction Loss: 0.0345 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens

 99%|█████████▉| 54705/55054 [25:10<00:09, 35.95it/s]

Sparsity: 24.1 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 112025600 | Self Similarity: -0.0003
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 112025600 | Self Similarity: 0.0130
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 112025600 | Self Similarity: 0.0022
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 112025600 | Self Similarity: -0.0069
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 112025600 | Self Similarity: -0.0012
Sparsity: 156.9 | Dead Features: 0 | Total Loss: 0.0761 | Reconstruction Loss: 0.0360 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens

100%|█████████▉| 54806/55054 [25:12<00:07, 34.98it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 112230400 | Self Similarity: -0.0004
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 112230400 | Self Similarity: 0.0128
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 112230400 | Self Similarity: 0.0020
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 112230400 | Self Similarity: -0.0071
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 112230400 | Self Similarity: -0.0012
Sparsity: 148.7 | Dead Features: 0 | Total Loss: 0.0710 | Reconstruction Loss: 0.0329 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens

100%|█████████▉| 54904/55054 [25:15<00:04, 34.05it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 112435200 | Self Similarity: -0.0004
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 112435200 | Self Similarity: 0.0129
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 112435200 | Self Similarity: 0.0018
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 112435200 | Self Similarity: -0.0068
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 112435200 | Self Similarity: -0.0010
Sparsity: 148.3 | Dead Features: 0 | Total Loss: 0.0713 | Reconstruction Loss: 0.0331 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens

100%|█████████▉| 55004/55054 [25:18<00:01, 34.06it/s]

Sparsity: 24.1 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 112640000 | Self Similarity: -0.0003
Sparsity: 40.0 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 112640000 | Self Similarity: 0.0130
Sparsity: 50.3 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 112640000 | Self Similarity: 0.0020
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 112640000 | Self Similarity: -0.0066
Sparsity: 126.7 | Dead Features: 0 | Total Loss: 0.0527 | Reconstruction Loss: 0.0265 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 112640000 | Self Similarity: -0.0012
Sparsity: 159.8 | Dead Features: 0 | Total Loss: 0.0814 | Reconstruction Loss: 0.0392 | L1 Loss: 0.0423 | l1_alpha: 8.0000e-04 | Tokens

100%|██████████| 55054/55054 [25:20<00:00, 36.22it/s]
  0%|          | 9/55054 [00:00<23:02, 39.82it/s]

Sparsity: 23.5 | Dead Features: 1024 | Total Loss: 0.0143 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0001
Sparsity: 37.2 | Dead Features: 1024 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0129
Sparsity: 47.1 | Dead Features: 1024 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0019
Sparsity: 117.8 | Dead Features: 1024 | Total Loss: 0.0391 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0068
Sparsity: 121.7 | Dead Features: 1024 | Total Loss: 0.0482 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0011
Sparsity: 153.4 | Dead Features: 1024 | Total Loss: 0.0731 | Reconstruction Loss: 0.0342 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity:

  0%|          | 105/55054 [00:02<23:20, 39.22it/s]

Sparsity: 25.4 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0001
Sparsity: 40.0 | Dead Features: 0 | Total Loss: 0.0159 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0131
Sparsity: 51.3 | Dead Features: 0 | Total Loss: 0.0214 | Reconstruction Loss: 0.0132 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0021
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0432 | Reconstruction Loss: 0.0218 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0067
Sparsity: 129.3 | Dead Features: 0 | Total Loss: 0.0549 | Reconstruction Loss: 0.0281 | L1 Loss: 0.0268 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0012
Sparsity: 160.7 | Dead Features: 0 | Total Loss: 0.0806 | Reconstruction Loss: 0.0377 | L1 Loss: 0.0429 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self

  0%|          | 205/55054 [00:05<25:27, 35.92it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0001
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0128
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0020
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0065
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0014
Sparsity: 156.2 | Dead Features: 0 | Total Loss: 0.0760 | Reconstruction Loss: 0.0367 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self

  1%|          | 305/55054 [00:08<22:46, 40.05it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0000
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0127
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0019
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0065
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0013
Sparsity: 151.9 | Dead Features: 0 | Total Loss: 0.0719 | Reconstruction Loss: 0.0337 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self

  1%|          | 404/55054 [00:10<25:40, 35.47it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0001
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0127
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0022
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0068
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0010
Sparsity: 154.8 | Dead Features: 0 | Total Loss: 0.0737 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self 

  1%|          | 506/55054 [00:13<24:17, 37.42it/s]

Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0000
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0127
Sparsity: 46.4 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0023
Sparsity: 113.8 | Dead Features: 0 | Total Loss: 0.0377 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0069
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0464 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0011
Sparsity: 155.6 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 1024000 

  1%|          | 606/55054 [00:16<25:31, 35.54it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0002
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0126
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0022
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0068
Sparsity: 123.2 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0008
Sparsity: 100.8 | Dead Features: 0 | Total Loss: 0.1018 | Reconstruction Loss: 0.0660 | L1 Loss: 0.0358 | l1_alpha: 8.0000e-04 | Tokens: 1228800 

  1%|▏         | 706/55054 [00:19<26:15, 34.50it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0011
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0126
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0020
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0064
Sparsity: 123.8 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0010
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0819 | Reconstruction Loss: 0.0444 | L1 Loss: 0.0375 | l1_alpha: 8.0000e-04 | Tokens: 1433600 

  1%|▏         | 807/55054 [00:22<26:06, 34.64it/s]

Sparsity: 19.2 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0011
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0126
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0021
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0066
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0507 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0013
Sparsity: 129.4 | Dead Features: 0 | Total Loss: 0.0795 | Reconstruction Loss: 0.0406 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 1638400 

  2%|▏         | 904/55054 [00:25<26:03, 34.63it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0010
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0126
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0020
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0067
Sparsity: 123.0 | Dead Features: 0 | Total Loss: 0.0507 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0011
Sparsity: 133.5 | Dead Features: 0 | Total Loss: 0.0795 | Reconstruction Loss: 0.0400 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 1843200 

  2%|▏         | 1004/55054 [00:27<25:57, 34.69it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0008
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0127
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0026
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0067
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0015
Sparsity: 133.0 | Dead Features: 0 | Total Loss: 0.0727 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0373 | l1_alpha: 8.0000e-04 | Tokens: 2048000 

  2%|▏         | 1104/55054 [00:30<26:03, 34.51it/s]

Sparsity: 19.5 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0007
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0128
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0027
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0068
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0012
Sparsity: 134.2 | Dead Features: 0 | Total Loss: 0.0737 | Reconstruction Loss: 0.0353 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 2252800 

  2%|▏         | 1206/55054 [00:33<25:28, 35.22it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0005
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0124
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0023
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0068
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0013
Sparsity: 136.9 | Dead Features: 0 | Total Loss: 0.0750 | Reconstruction Loss: 0.0361 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 2457600 

  2%|▏         | 1304/55054 [00:36<22:45, 39.38it/s]

Sparsity: 23.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0003
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0124
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0024
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0067
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0014
Sparsity: 143.2 | Dead Features: 0 | Total Loss: 0.0742 | Reconstruction Loss: 0.0356 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 2662400 

  3%|▎         | 1405/55054 [00:39<23:31, 38.01it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0004
Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0125
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0025
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0069
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0009
Sparsity: 143.3 | Dead Features: 0 | Total Loss: 0.0734 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 2867200 

  3%|▎         | 1505/55054 [00:41<24:50, 35.92it/s]

Sparsity: 23.9 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0004
Sparsity: 39.6 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0126
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0023
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0070
Sparsity: 125.2 | Dead Features: 0 | Total Loss: 0.0514 | Reconstruction Loss: 0.0266 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0011
Sparsity: 149.9 | Dead Features: 0 | Total Loss: 0.0785 | Reconstruction Loss: 0.0386 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 3072000 

  3%|▎         | 1605/55054 [00:44<25:39, 34.72it/s]

Sparsity: 19.1 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: -0.0003
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: 0.0127
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: 0.0021
Sparsity: 114.3 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: -0.0066
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: -0.0007
Sparsity: 145.6 | Dead Features: 0 | Total Loss: 0.0741 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 3276800 

  3%|▎         | 1706/55054 [00:47<24:45, 35.92it/s]

Sparsity: 19.4 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: -0.0003
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: 0.0128
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: 0.0023
Sparsity: 114.0 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: -0.0067
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: -0.0009
Sparsity: 142.2 | Dead Features: 0 | Total Loss: 0.0703 | Reconstruction Loss: 0.0326 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens: 3481600 

  3%|▎         | 1805/55054 [00:49<23:37, 37.58it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: -0.0002
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: 0.0128
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: 0.0021
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: -0.0068
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: -0.0010
Sparsity: 147.5 | Dead Features: 0 | Total Loss: 0.0736 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 3686400 

  3%|▎         | 1905/55054 [00:52<21:48, 40.63it/s]

Sparsity: 23.7 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: -0.0000
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: 0.0126
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: 0.0023
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: -0.0069
Sparsity: 122.6 | Dead Features: 0 | Total Loss: 0.0505 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: -0.0009
Sparsity: 154.8 | Dead Features: 0 | Total Loss: 0.0783 | Reconstruction Loss: 0.0372 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 3891200 

  4%|▎         | 2007/55054 [00:55<24:16, 36.43it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: -0.0001
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: 0.0125
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: 0.0024
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: -0.0067
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: -0.0009
Sparsity: 147.7 | Dead Features: 0 | Total Loss: 0.0722 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0378 | l1_alpha: 8.0000e-04 | Tokens: 4096000 

  4%|▍         | 2107/55054 [00:57<24:48, 35.57it/s]

Sparsity: 21.0 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0001
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0124
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0024
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: -0.0070
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: -0.0009
Sparsity: 149.9 | Dead Features: 0 | Total Loss: 0.0726 | Reconstruction Loss: 0.0341 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 4300800 |

  4%|▍         | 2207/55054 [01:00<24:27, 36.00it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0001
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0124
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0023
Sparsity: 113.1 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: -0.0069
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: -0.0008
Sparsity: 151.3 | Dead Features: 0 | Total Loss: 0.0742 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 4505600 |

  4%|▍         | 2304/55054 [01:03<22:56, 38.33it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0003
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0125
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0024
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: -0.0070
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0498 | Reconstruction Loss: 0.0254 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: -0.0009
Sparsity: 155.8 | Dead Features: 0 | Total Loss: 0.0772 | Reconstruction Loss: 0.0364 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 4710400 |

  4%|▍         | 2406/55054 [01:05<24:16, 36.15it/s]

Sparsity: 25.7 | Dead Features: 0 | Total Loss: 0.0162 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0002
Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0123
Sparsity: 50.4 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0025
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: -0.0070
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0511 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: -0.0008
Sparsity: 154.2 | Dead Features: 0 | Total Loss: 0.0792 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0426 | l1_alpha: 8.0000e-04 | Tokens: 4915200 |

  5%|▍         | 2507/55054 [01:08<24:07, 36.29it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0001
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0126
Sparsity: 49.9 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0024
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: -0.0070
Sparsity: 123.8 | Dead Features: 0 | Total Loss: 0.0512 | Reconstruction Loss: 0.0261 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: -0.0007
Sparsity: 158.7 | Dead Features: 0 | Total Loss: 0.0786 | Reconstruction Loss: 0.0368 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 5120000 |

  5%|▍         | 2605/55054 [01:11<23:13, 37.64it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0000
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0123
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0023
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: -0.0066
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: -0.0008
Sparsity: 147.7 | Dead Features: 0 | Total Loss: 0.0702 | Reconstruction Loss: 0.0324 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 5324800 |

  5%|▍         | 2705/55054 [01:14<24:25, 35.71it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0002
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0125
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0023
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: -0.0068
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: -0.0010
Sparsity: 152.7 | Dead Features: 0 | Total Loss: 0.0734 | Reconstruction Loss: 0.0337 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 5529600 |

  5%|▌         | 2805/55054 [01:16<24:08, 36.07it/s]

Sparsity: 24.4 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0003
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0124
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0026
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: -0.0067
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: -0.0009
Sparsity: 154.3 | Dead Features: 0 | Total Loss: 0.0794 | Reconstruction Loss: 0.0362 | L1 Loss: 0.0432 | l1_alpha: 8.0000e-04 | Tokens: 5734400 |

  5%|▌         | 2905/55054 [01:19<24:05, 36.08it/s]

Sparsity: 19.2 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0003
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0125
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0025
Sparsity: 114.3 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: -0.0069
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: -0.0009
Sparsity: 148.2 | Dead Features: 0 | Total Loss: 0.0695 | Reconstruction Loss: 0.0321 | L1 Loss: 0.0374 | l1_alpha: 8.0000e-04 | Tokens: 5939200 |

  5%|▌         | 3005/55054 [01:22<24:20, 35.65it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0002
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0123
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0028
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: -0.0066
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: -0.0012
Sparsity: 131.3 | Dead Features: 0 | Total Loss: 0.0747 | Reconstruction Loss: 0.0386 | L1 Loss: 0.0362 | l1_alpha: 8.0000e-04 | Tokens: 6144000 |

  6%|▌         | 3104/55054 [01:25<22:52, 37.86it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: -0.0006
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0126
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0025
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: -0.0066
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: -0.0011
Sparsity: 146.1 | Dead Features: 0 | Total Loss: 0.0742 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 6348800 

  6%|▌         | 3204/55054 [01:27<25:16, 34.19it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: -0.0004
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0127
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0026
Sparsity: 115.8 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: -0.0063
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: -0.0011
Sparsity: 148.9 | Dead Features: 0 | Total Loss: 0.0731 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 6553600 

  6%|▌         | 3307/55054 [01:30<24:27, 35.27it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: -0.0003
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0125
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0027
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: -0.0065
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: -0.0011
Sparsity: 151.1 | Dead Features: 0 | Total Loss: 0.0736 | Reconstruction Loss: 0.0341 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 6758400 

  6%|▌         | 3406/55054 [01:33<24:56, 34.51it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: -0.0000
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: 0.0122
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: 0.0029
Sparsity: 110.4 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: -0.0061
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: -0.0009
Sparsity: 142.4 | Dead Features: 0 | Total Loss: 0.0699 | Reconstruction Loss: 0.0328 | L1 Loss: 0.0371 | l1_alpha: 8.0000e-04 | Tokens: 6963200 

  6%|▋         | 3506/55054 [01:36<24:53, 34.52it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: -0.0001
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0125
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0025
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: -0.0063
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: -0.0010
Sparsity: 150.9 | Dead Features: 0 | Total Loss: 0.0731 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 7168000 

  7%|▋         | 3606/55054 [01:39<25:05, 34.16it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: -0.0000
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0126
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0024
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: -0.0061
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: -0.0011
Sparsity: 152.6 | Dead Features: 0 | Total Loss: 0.0747 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 7372800 

  7%|▋         | 3706/55054 [01:42<24:55, 34.34it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: -0.0001
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: 0.0125
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: 0.0024
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: -0.0060
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: -0.0009
Sparsity: 152.1 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0333 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 7577600 

  7%|▋         | 3806/55054 [01:45<24:41, 34.60it/s]

Sparsity: 18.8 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: -0.0001
Sparsity: 33.5 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0127
Sparsity: 45.6 | Dead Features: 0 | Total Loss: 0.0189 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0026
Sparsity: 113.8 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: -0.0061
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: -0.0011
Sparsity: 149.4 | Dead Features: 0 | Total Loss: 0.0710 | Reconstruction Loss: 0.0331 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 7782400 

  7%|▋         | 3907/55054 [01:47<23:30, 36.27it/s]

Sparsity: 19.5 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: -0.0002
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0123
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0029
Sparsity: 114.7 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: -0.0063
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: -0.0011
Sparsity: 148.5 | Dead Features: 0 | Total Loss: 0.0711 | Reconstruction Loss: 0.0332 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 7987200 

  7%|▋         | 4007/55054 [01:50<23:29, 36.21it/s]

Sparsity: 18.9 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: -0.0001
Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0127
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0189 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0027
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: -0.0062
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: -0.0008
Sparsity: 150.4 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0329 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 8192000 

  7%|▋         | 4107/55054 [01:53<24:00, 35.38it/s]

Sparsity: 23.8 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: -0.0001
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: 0.0125
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: 0.0025
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: -0.0063
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: -0.0011
Sparsity: 156.4 | Dead Features: 0 | Total Loss: 0.0798 | Reconstruction Loss: 0.0385 | L1 Loss: 0.0413 | l1_alpha: 8.0000e-04 | Tokens: 8396800 

  8%|▊         | 4207/55054 [01:56<23:49, 35.57it/s]

Sparsity: 26.0 | Dead Features: 0 | Total Loss: 0.0159 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: -0.0001
Sparsity: 39.5 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0125
Sparsity: 51.1 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0025
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: -0.0064
Sparsity: 126.7 | Dead Features: 0 | Total Loss: 0.0524 | Reconstruction Loss: 0.0269 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: -0.0009
Sparsity: 157.8 | Dead Features: 0 | Total Loss: 0.0775 | Reconstruction Loss: 0.0367 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 8601600 

  8%|▊         | 4304/55054 [01:58<23:27, 36.05it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0001
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0126
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0130 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0026
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: -0.0067
Sparsity: 125.4 | Dead Features: 0 | Total Loss: 0.0530 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0266 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: -0.0011
Sparsity: 158.9 | Dead Features: 0 | Total Loss: 0.0794 | Reconstruction Loss: 0.0368 | L1 Loss: 0.0426 | l1_alpha: 8.0000e-04 | Tokens: 8806400 |

  8%|▊         | 4404/55054 [02:01<23:53, 35.32it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0003
Sparsity: 33.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0129
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0024
Sparsity: 114.8 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: -0.0064
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0462 | Reconstruction Loss: 0.0229 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: -0.0009
Sparsity: 138.7 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0364 | L1 Loss: 0.0371 | l1_alpha: 8.0000e-04 | Tokens: 9011200 |

  8%|▊         | 4507/55054 [02:04<23:20, 36.10it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0001
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0121
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0025
Sparsity: 113.6 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: -0.0065
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: -0.0009
Sparsity: 147.7 | Dead Features: 0 | Total Loss: 0.0744 | Reconstruction Loss: 0.0357 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 9216000 |

  8%|▊         | 4607/55054 [02:07<24:03, 34.96it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0001
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0126
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0024
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: -0.0064
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: -0.0011
Sparsity: 148.1 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 9420800 |

  9%|▊         | 4704/55054 [02:09<24:28, 34.29it/s]

Sparsity: 18.1 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0061 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0002
Sparsity: 33.9 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0124
Sparsity: 44.8 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0027
Sparsity: 113.0 | Dead Features: 0 | Total Loss: 0.0375 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: -0.0065
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0462 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: -0.0010
Sparsity: 144.2 | Dead Features: 0 | Total Loss: 0.0703 | Reconstruction Loss: 0.0332 | L1 Loss: 0.0371 | l1_alpha: 8.0000e-04 | Tokens: 9625600 |

  9%|▊         | 4804/55054 [02:12<24:32, 34.13it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0002
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0124
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0023
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: -0.0063
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: -0.0007
Sparsity: 149.6 | Dead Features: 0 | Total Loss: 0.0719 | Reconstruction Loss: 0.0334 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 9830400 |

  9%|▉         | 4904/55054 [02:15<23:37, 35.38it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0001
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0122
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0024
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: -0.0062
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: -0.0012
Sparsity: 149.3 | Dead Features: 0 | Total Loss: 0.0708 | Reconstruction Loss: 0.0326 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 1003

  9%|▉         | 5004/55054 [02:18<23:18, 35.78it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0004
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0124
Sparsity: 46.4 | Dead Features: 0 | Total Loss: 0.0189 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0019
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: -0.0063
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: -0.0008
Sparsity: 148.2 | Dead Features: 0 | Total Loss: 0.0716 | Reconstruction Loss: 0.0335 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 1024

  9%|▉         | 5107/55054 [02:21<20:28, 40.65it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0003
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0126
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0022
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: -0.0062
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: -0.0013
Sparsity: 151.4 | Dead Features: 0 | Total Loss: 0.0743 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 1044

  9%|▉         | 5204/55054 [02:23<23:20, 35.59it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: 0.0005
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: 0.0128
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: 0.0023
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0377 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: -0.0062
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0463 | Reconstruction Loss: 0.0229 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: -0.0010
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0700 | Reconstruction Loss: 0.0326 | L1 Loss: 0.0374 | l1_alpha: 8.0000e-04 | Tokens: 1064

 10%|▉         | 5304/55054 [02:26<23:29, 35.30it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: 0.0003
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: 0.0126
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: 0.0022
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: -0.0063
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: -0.0010
Sparsity: 151.1 | Dead Features: 0 | Total Loss: 0.0709 | Reconstruction Loss: 0.0331 | L1 Loss: 0.0377 | l1_alpha: 8.0000e-04 | Tokens: 1085

 10%|▉         | 5404/55054 [02:29<23:20, 35.46it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: 0.0003
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: 0.0124
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: 0.0022
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: -0.0062
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: -0.0010
Sparsity: 152.1 | Dead Features: 0 | Total Loss: 0.0709 | Reconstruction Loss: 0.0325 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 1105

 10%|▉         | 5504/55054 [02:32<23:06, 35.73it/s]

Sparsity: 23.8 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: 0.0004
Sparsity: 39.4 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: 0.0127
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: 0.0022
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: -0.0063
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: -0.0009
Sparsity: 152.2 | Dead Features: 0 | Total Loss: 0.0720 | Reconstruction Loss: 0.0325 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 1126

 10%|█         | 5605/55054 [02:35<22:25, 36.75it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: 0.0003
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: 0.0128
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: 0.0020
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: -0.0064
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: -0.0013
Sparsity: 153.8 | Dead Features: 0 | Total Loss: 0.0719 | Reconstruction Loss: 0.0333 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 1146

 10%|█         | 5705/55054 [02:37<22:44, 36.16it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: 0.0003
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: 0.0129
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: 0.0024
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: -0.0062
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: -0.0010
Sparsity: 155.2 | Dead Features: 0 | Total Loss: 0.0723 | Reconstruction Loss: 0.0328 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 1167

 11%|█         | 5805/55054 [02:40<23:13, 35.33it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: 0.0004
Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: 0.0127
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: 0.0025
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: -0.0061
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: -0.0011
Sparsity: 145.4 | Dead Features: 0 | Total Loss: 0.1270 | Reconstruction Loss: 0.0754 | L1 Loss: 0.0516 | l1_alpha: 8.0000e-04 | Tokens: 1187

 11%|█         | 5905/55054 [02:43<23:06, 35.45it/s]

Sparsity: 25.8 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: -0.0008
Sparsity: 40.0 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: 0.0127
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: 0.0025
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: -0.0060
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: -0.0009
Sparsity: 111.9 | Dead Features: 0 | Total Loss: 0.0796 | Reconstruction Loss: 0.0438 | L1 Loss: 0.0357 | l1_alpha: 8.0000e-04 | Tokens: 120

 11%|█         | 6005/55054 [02:46<22:42, 36.00it/s]

Sparsity: 19.1 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: -0.0010
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: 0.0124
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: 0.0025
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: -0.0061
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: -0.0011
Sparsity: 127.1 | Dead Features: 0 | Total Loss: 0.0749 | Reconstruction Loss: 0.0373 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens: 122

 11%|█         | 6106/55054 [02:49<23:15, 35.08it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: -0.0007
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: 0.0126
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: 0.0023
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: -0.0064
Sparsity: 121.6 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: -0.0008
Sparsity: 131.5 | Dead Features: 0 | Total Loss: 0.0746 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0377 | l1_alpha: 8.0000e-04 | Tokens: 124

 11%|█▏        | 6206/55054 [02:51<22:37, 35.98it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: -0.0006
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: 0.0127
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: 0.0023
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: -0.0062
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: -0.0006
Sparsity: 136.5 | Dead Features: 0 | Total Loss: 0.0729 | Reconstruction Loss: 0.0359 | L1 Loss: 0.0369 | l1_alpha: 8.0000e-04 | Tokens: 126

 11%|█▏        | 6306/55054 [02:54<23:02, 35.25it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: -0.0003
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: 0.0125
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: 0.0019
Sparsity: 113.4 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: -0.0063
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: -0.0009
Sparsity: 136.9 | Dead Features: 0 | Total Loss: 0.0725 | Reconstruction Loss: 0.0347 | L1 Loss: 0.0378 | l1_alpha: 8.0000e-04 | Tokens: 129

 12%|█▏        | 6407/55054 [02:57<22:48, 35.55it/s]

Sparsity: 19.2 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: -0.0003
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: 0.0127
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: 0.0021
Sparsity: 114.0 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: -0.0061
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: -0.0007
Sparsity: 137.8 | Dead Features: 0 | Total Loss: 0.0709 | Reconstruction Loss: 0.0336 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 131

 12%|█▏        | 6507/55054 [03:00<22:44, 35.58it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: -0.0001
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: 0.0125
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: 0.0023
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: -0.0061
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: -0.0009
Sparsity: 143.9 | Dead Features: 0 | Total Loss: 0.0743 | Reconstruction Loss: 0.0353 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 133

 12%|█▏        | 6607/55054 [03:02<22:42, 35.55it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: -0.0002
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: 0.0127
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: 0.0025
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: -0.0064
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: -0.0008
Sparsity: 141.3 | Dead Features: 0 | Total Loss: 0.0728 | Reconstruction Loss: 0.0350 | L1 Loss: 0.0378 | l1_alpha: 8.0000e-04 | Tokens: 135

 12%|█▏        | 6704/55054 [03:05<22:07, 36.42it/s]

Sparsity: 23.3 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: -0.0003
Sparsity: 39.4 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: 0.0129
Sparsity: 50.5 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: 0.0021
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: -0.0063
Sparsity: 125.1 | Dead Features: 0 | Total Loss: 0.0529 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0272 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: -0.0011
Sparsity: 151.5 | Dead Features: 0 | Total Loss: 0.0786 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0417 | l1_alpha: 8.0000e-04 | Tokens: 137

 12%|█▏        | 6806/55054 [03:08<21:45, 36.96it/s]

Sparsity: 21.0 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: -0.0002
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: 0.0128
Sparsity: 49.1 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: 0.0020
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: -0.0064
Sparsity: 122.9 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: -0.0010
Sparsity: 149.4 | Dead Features: 0 | Total Loss: 0.0758 | Reconstruction Loss: 0.0364 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 139

 13%|█▎        | 6906/55054 [03:11<22:13, 36.11it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: -0.0001
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: 0.0131
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: 0.0021
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: -0.0064
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: -0.0009
Sparsity: 146.1 | Dead Features: 0 | Total Loss: 0.0699 | Reconstruction Loss: 0.0331 | L1 Loss: 0.0369 | l1_alpha: 8.0000e-04 | Tokens: 141

 13%|█▎        | 7007/55054 [03:13<21:17, 37.62it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: -0.0001
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: 0.0134
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: 0.0026
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: -0.0062
Sparsity: 124.2 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: -0.0008
Sparsity: 153.9 | Dead Features: 0 | Total Loss: 0.0787 | Reconstruction Loss: 0.0377 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 143

 13%|█▎        | 7105/55054 [03:16<22:00, 36.32it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: -0.0001
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: 0.0129
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: 0.0024
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: -0.0063
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: -0.0008
Sparsity: 141.0 | Dead Features: 0 | Total Loss: 0.0769 | Reconstruction Loss: 0.0386 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 145

 13%|█▎        | 7205/55054 [03:19<22:07, 36.03it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: -0.0002
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: 0.0128
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: 0.0026
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: -0.0063
Sparsity: 123.4 | Dead Features: 0 | Total Loss: 0.0511 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: -0.0011
Sparsity: 151.7 | Dead Features: 0 | Total Loss: 0.0784 | Reconstruction Loss: 0.0375 | L1 Loss: 0.0409 | l1_alpha: 8.0000e-04 | Tokens: 147

 13%|█▎        | 7305/55054 [03:21<22:31, 35.34it/s]

Sparsity: 18.1 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: -0.0000
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: 0.0125
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: 0.0025
Sparsity: 115.8 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: -0.0061
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: -0.0009
Sparsity: 148.1 | Dead Features: 0 | Total Loss: 0.0717 | Reconstruction Loss: 0.0335 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 149

 13%|█▎        | 7407/55054 [03:24<20:22, 38.98it/s]

Sparsity: 23.5 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: 0.0001
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: 0.0126
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: 0.0023
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: -0.0060
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: -0.0007
Sparsity: 151.1 | Dead Features: 0 | Total Loss: 0.0750 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 1515

 14%|█▎        | 7507/55054 [03:27<22:15, 35.61it/s]

Sparsity: 24.2 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: 0.0003
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: 0.0124
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: 0.0025
Sparsity: 114.2 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: -0.0063
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: -0.0006
Sparsity: 152.5 | Dead Features: 0 | Total Loss: 0.0755 | Reconstruction Loss: 0.0353 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 1536

 14%|█▍        | 7607/55054 [03:30<22:06, 35.77it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: 0.0003
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: 0.0124
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: 0.0026
Sparsity: 114.7 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: -0.0064
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: -0.0009
Sparsity: 167.5 | Dead Features: 0 | Total Loss: 0.0973 | Reconstruction Loss: 0.0536 | L1 Loss: 0.0437 | l1_alpha: 8.0000e-04 | Tokens: 1556

 14%|█▍        | 7707/55054 [03:32<21:53, 36.04it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: 0.0002
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: 0.0121
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: 0.0027
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: -0.0061
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0507 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0259 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: -0.0011
Sparsity: 110.0 | Dead Features: 0 | Total Loss: 0.0793 | Reconstruction Loss: 0.0421 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 1576

 14%|█▍        | 7804/55054 [03:35<21:52, 36.01it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: -0.0006
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: 0.0122
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: 0.0026
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: -0.0060
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: -0.0005
Sparsity: 130.3 | Dead Features: 0 | Total Loss: 0.0736 | Reconstruction Loss: 0.0361 | L1 Loss: 0.0375 | l1_alpha: 8.0000e-04 | Tokens: 159

 14%|█▍        | 7904/55054 [03:38<21:52, 35.91it/s]

Sparsity: 19.3 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: -0.0007
Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: 0.0123
Sparsity: 45.9 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: 0.0023
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: -0.0063
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: -0.0006
Sparsity: 132.6 | Dead Features: 0 | Total Loss: 0.0717 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0369 | l1_alpha: 8.0000e-04 | Tokens: 161

 15%|█▍        | 8004/55054 [03:41<22:31, 34.81it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: -0.0005
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: 0.0127
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: 0.0020
Sparsity: 115.8 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: -0.0061
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: -0.0010
Sparsity: 135.8 | Dead Features: 0 | Total Loss: 0.0743 | Reconstruction Loss: 0.0360 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 163

 15%|█▍        | 8104/55054 [03:44<22:36, 34.60it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: -0.0002
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: 0.0128
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: 0.0020
Sparsity: 114.3 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: -0.0058
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: -0.0009
Sparsity: 134.7 | Dead Features: 0 | Total Loss: 0.0700 | Reconstruction Loss: 0.0333 | L1 Loss: 0.0366 | l1_alpha: 8.0000e-04 | Tokens: 165

 15%|█▍        | 8204/55054 [03:46<22:34, 34.59it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: -0.0003
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: 0.0128
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: 0.0022
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: -0.0056
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: -0.0007
Sparsity: 138.4 | Dead Features: 0 | Total Loss: 0.0757 | Reconstruction Loss: 0.0363 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 167

 15%|█▌        | 8308/55054 [03:49<20:40, 37.69it/s]

Sparsity: 18.7 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: -0.0003
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: 0.0126
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: 0.0021
Sparsity: 113.9 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: -0.0060
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: -0.0011
Sparsity: 140.5 | Dead Features: 0 | Total Loss: 0.0716 | Reconstruction Loss: 0.0336 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 169

 15%|█▌        | 8405/55054 [03:52<22:34, 34.44it/s]

Sparsity: 19.5 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: -0.0003
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: 0.0128
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: 0.0024
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: -0.0058
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0465 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: -0.0013
Sparsity: 137.0 | Dead Features: 0 | Total Loss: 0.0708 | Reconstruction Loss: 0.0345 | L1 Loss: 0.0363 | l1_alpha: 8.0000e-04 | Tokens: 172

 15%|█▌        | 8505/55054 [03:55<21:22, 36.31it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: -0.0004
Sparsity: 38.5 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: 0.0130
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: 0.0021
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: -0.0062
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: -0.0012
Sparsity: 145.4 | Dead Features: 0 | Total Loss: 0.0753 | Reconstruction Loss: 0.0358 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 174

 16%|█▌        | 8606/55054 [03:58<23:15, 33.29it/s]

Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: -0.0003
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: 0.0132
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: 0.0025
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: -0.0060
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: -0.0012
Sparsity: 145.6 | Dead Features: 0 | Total Loss: 0.0728 | Reconstruction Loss: 0.0343 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 176

 16%|█▌        | 8705/55054 [04:00<20:27, 37.75it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: -0.0002
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: 0.0131
Sparsity: 46.4 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: 0.0026
Sparsity: 114.8 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: -0.0060
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: -0.0010
Sparsity: 143.7 | Dead Features: 0 | Total Loss: 0.0697 | Reconstruction Loss: 0.0327 | L1 Loss: 0.0370 | l1_alpha: 8.0000e-04 | Tokens: 178

 16%|█▌        | 8807/55054 [04:03<21:23, 36.04it/s]

Sparsity: 24.4 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: 0.0000
Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: 0.0130
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: 0.0026
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: -0.0059
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: -0.0011
Sparsity: 147.2 | Dead Features: 0 | Total Loss: 0.0712 | Reconstruction Loss: 0.0333 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 1802

 16%|█▌        | 8907/55054 [04:06<21:57, 35.04it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: 0.0002
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: 0.0132
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: 0.0024
Sparsity: 114.2 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: -0.0060
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: -0.0008
Sparsity: 149.2 | Dead Features: 0 | Total Loss: 0.0739 | Reconstruction Loss: 0.0350 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 1822

 16%|█▋        | 9004/55054 [04:09<21:40, 35.40it/s]

Sparsity: 19.5 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: 0.0002
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: 0.0130
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: 0.0023
Sparsity: 114.0 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: -0.0062
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: -0.0009
Sparsity: 147.8 | Dead Features: 0 | Total Loss: 0.0723 | Reconstruction Loss: 0.0340 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 1843

 17%|█▋        | 9104/55054 [04:11<21:36, 35.44it/s]

Sparsity: 26.4 | Dead Features: 0 | Total Loss: 0.0163 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: 0.0002
Sparsity: 41.8 | Dead Features: 0 | Total Loss: 0.0163 | Reconstruction Loss: 0.0107 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: 0.0134
Sparsity: 52.4 | Dead Features: 0 | Total Loss: 0.0214 | Reconstruction Loss: 0.0133 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: 0.0024
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0424 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: -0.0062
Sparsity: 128.4 | Dead Features: 0 | Total Loss: 0.0530 | Reconstruction Loss: 0.0270 | L1 Loss: 0.0261 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: -0.0010
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0868 | Reconstruction Loss: 0.0499 | L1 Loss: 0.0368 | l1_alpha: 8.0000e-04 | Tokens: 1863

 17%|█▋        | 9204/55054 [04:14<21:24, 35.70it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: -0.0004
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: 0.0133
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: 0.0024
Sparsity: 113.5 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: -0.0059
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: -0.0010
Sparsity: 136.4 | Dead Features: 0 | Total Loss: 0.0739 | Reconstruction Loss: 0.0362 | L1 Loss: 0.0377 | l1_alpha: 8.0000e-04 | Tokens: 188

 17%|█▋        | 9304/55054 [04:17<21:35, 35.31it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: -0.0005
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: 0.0130
Sparsity: 45.3 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: 0.0023
Sparsity: 111.6 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: -0.0062
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0466 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: -0.0012
Sparsity: 138.6 | Dead Features: 0 | Total Loss: 0.0705 | Reconstruction Loss: 0.0341 | L1 Loss: 0.0364 | l1_alpha: 8.0000e-04 | Tokens: 190

 17%|█▋        | 9404/55054 [04:20<22:01, 34.54it/s]

Sparsity: 23.7 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: -0.0004
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: 0.0130
Sparsity: 51.6 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: 0.0021
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: -0.0061
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0527 | Reconstruction Loss: 0.0275 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: -0.0010
Sparsity: 147.7 | Dead Features: 0 | Total Loss: 0.0781 | Reconstruction Loss: 0.0367 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 192

 17%|█▋        | 9506/55054 [04:23<21:23, 35.48it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: -0.0002
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: 0.0130
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: 0.0022
Sparsity: 113.9 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: -0.0058
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: -0.0012
Sparsity: 146.1 | Dead Features: 0 | Total Loss: 0.0719 | Reconstruction Loss: 0.0340 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 194

 17%|█▋        | 9604/55054 [04:25<21:14, 35.65it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: 0.0001
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: 0.0133
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: 0.0021
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: -0.0057
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: -0.0009
Sparsity: 146.4 | Dead Features: 0 | Total Loss: 0.0722 | Reconstruction Loss: 0.0338 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 1966

 18%|█▊        | 9704/55054 [04:28<21:27, 35.23it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: 0.0002
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: 0.0131
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: 0.0020
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: -0.0060
Sparsity: 125.8 | Dead Features: 0 | Total Loss: 0.0520 | Reconstruction Loss: 0.0262 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: -0.0012
Sparsity: 152.4 | Dead Features: 0 | Total Loss: 0.0764 | Reconstruction Loss: 0.0363 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens: 1986

 18%|█▊        | 9805/55054 [04:31<21:20, 35.33it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: 0.0002
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: 0.0131
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: 0.0020
Sparsity: 114.7 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: -0.0060
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: -0.0013
Sparsity: 147.0 | Dead Features: 0 | Total Loss: 0.0711 | Reconstruction Loss: 0.0330 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 2007

 18%|█▊        | 9904/55054 [04:34<18:53, 39.82it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: 0.0002
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: 0.0131
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: 0.0021
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: -0.0060
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: -0.0012
Sparsity: 149.9 | Dead Features: 0 | Total Loss: 0.0736 | Reconstruction Loss: 0.0340 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 2027

 18%|█▊        | 10005/55054 [04:36<21:09, 35.48it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: 0.0001
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: 0.0133
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: 0.0025
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: -0.0061
Sparsity: 120.5 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: -0.0014
Sparsity: 138.0 | Dead Features: 0 | Total Loss: 0.0741 | Reconstruction Loss: 0.0365 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens: 2048

 18%|█▊        | 10105/55054 [04:39<21:12, 35.32it/s]

Sparsity: 24.7 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: 0.0000
Sparsity: 39.8 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: 0.0131
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: 0.0022
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: -0.0058
Sparsity: 124.2 | Dead Features: 0 | Total Loss: 0.0512 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: -0.0015
Sparsity: 149.8 | Dead Features: 0 | Total Loss: 0.0777 | Reconstruction Loss: 0.0375 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 2068

 19%|█▊        | 10205/55054 [04:42<21:32, 34.71it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: 0.0000
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: 0.0129
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: 0.0018
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: -0.0058
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: -0.0011
Sparsity: 150.0 | Dead Features: 0 | Total Loss: 0.0739 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 2088

 19%|█▊        | 10305/55054 [04:45<20:54, 35.67it/s]

Sparsity: 23.8 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: 0.0003
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: 0.0126
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: 0.0022
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: -0.0060
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: -0.0010
Sparsity: 152.0 | Dead Features: 0 | Total Loss: 0.0740 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 2109

 19%|█▉        | 10405/55054 [04:48<20:45, 35.85it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: 0.0004
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: 0.0126
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: 0.0024
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: -0.0062
Sparsity: 122.7 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: -0.0009
Sparsity: 143.6 | Dead Features: 0 | Total Loss: 0.0740 | Reconstruction Loss: 0.0356 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 2129

 19%|█▉        | 10505/55054 [04:50<20:53, 35.54it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: 0.0002
Sparsity: 38.8 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: 0.0127
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: 0.0025
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: -0.0061
Sparsity: 124.9 | Dead Features: 0 | Total Loss: 0.0513 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0261 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: -0.0012
Sparsity: 153.2 | Dead Features: 0 | Total Loss: 0.0771 | Reconstruction Loss: 0.0360 | L1 Loss: 0.0412 | l1_alpha: 8.0000e-04 | Tokens: 2150

 19%|█▉        | 10604/55054 [04:53<19:19, 38.35it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: 0.0004
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: 0.0126
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: 0.0024
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: -0.0063
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: -0.0009
Sparsity: 150.6 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0333 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 2170

 19%|█▉        | 10704/55054 [04:56<19:19, 38.24it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: 0.0004
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: 0.0124
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: 0.0024
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: -0.0063
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: -0.0010
Sparsity: 150.9 | Dead Features: 0 | Total Loss: 0.0737 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 2191

 20%|█▉        | 10805/55054 [04:59<20:55, 35.23it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: 0.0004
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: 0.0123
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: 0.0021
Sparsity: 114.4 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: -0.0058
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: -0.0010
Sparsity: 151.1 | Dead Features: 0 | Total Loss: 0.0707 | Reconstruction Loss: 0.0334 | L1 Loss: 0.0373 | l1_alpha: 8.0000e-04 | Tokens: 2211

 20%|█▉        | 10905/55054 [05:01<20:31, 35.84it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: 0.0005
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: 0.0124
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: 0.0022
Sparsity: 115.4 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: -0.0058
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: -0.0009
Sparsity: 142.9 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0347 | L1 Loss: 0.0374 | l1_alpha: 8.0000e-04 | Tokens: 2232

 20%|█▉        | 11006/55054 [05:04<22:26, 32.72it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: 0.0004
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: 0.0126
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: 0.0020
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0375 | Reconstruction Loss: 0.0178 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: -0.0058
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0462 | Reconstruction Loss: 0.0231 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: -0.0009
Sparsity: 147.5 | Dead Features: 0 | Total Loss: 0.0685 | Reconstruction Loss: 0.0315 | L1 Loss: 0.0369 | l1_alpha: 8.0000e-04 | Tokens: 2252

 20%|██        | 11104/55054 [05:07<21:40, 33.80it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: 0.0003
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: 0.0129
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: 0.0022
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: -0.0057
Sparsity: 125.3 | Dead Features: 0 | Total Loss: 0.0509 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: -0.0011
Sparsity: 153.9 | Dead Features: 0 | Total Loss: 0.0757 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 2273

 20%|██        | 11204/55054 [05:10<22:10, 32.97it/s]

Sparsity: 19.5 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: 0.0003
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: 0.0129
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: 0.0023
Sparsity: 115.4 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: -0.0058
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: -0.0010
Sparsity: 152.0 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0338 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 2293

 21%|██        | 11304/55054 [05:13<20:57, 34.78it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: 0.0003
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: 0.0130
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: 0.0023
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: -0.0056
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: -0.0008
Sparsity: 162.5 | Dead Features: 0 | Total Loss: 0.0864 | Reconstruction Loss: 0.0427 | L1 Loss: 0.0437 | l1_alpha: 8.0000e-04 | Tokens: 2314

 21%|██        | 11404/55054 [05:16<21:03, 34.54it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: 0.0005
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: 0.0128
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: 0.0023
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0056
Sparsity: 121.9 | Dead Features: 0 | Total Loss: 0.0504 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0011
Sparsity: 154.6 | Dead Features: 0 | Total Loss: 0.0766 | Reconstruction Loss: 0.0361 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens: 2334

 21%|██        | 11506/55054 [05:18<20:34, 35.26it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: 0.0002
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: 0.0130
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: 0.0024
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: -0.0056
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: -0.0008
Sparsity: 153.4 | Dead Features: 0 | Total Loss: 0.0746 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 2355

 21%|██        | 11606/55054 [05:21<20:31, 35.29it/s]

Sparsity: 21.0 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: 0.0002
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: 0.0128
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: 0.0025
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: -0.0056
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: -0.0008
Sparsity: 145.5 | Dead Features: 0 | Total Loss: 0.0801 | Reconstruction Loss: 0.0413 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 2375

 21%|██▏       | 11706/55054 [05:24<20:41, 34.91it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: 0.0004
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: 0.0127
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: 0.0025
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: -0.0056
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: -0.0009
Sparsity: 145.5 | Dead Features: 0 | Total Loss: 0.0714 | Reconstruction Loss: 0.0329 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 2396

 21%|██▏       | 11806/55054 [05:27<20:35, 35.01it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: 0.0003
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: 0.0129
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: 0.0024
Sparsity: 115.8 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: -0.0056
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: -0.0009
Sparsity: 151.6 | Dead Features: 0 | Total Loss: 0.0749 | Reconstruction Loss: 0.0356 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 2416

 22%|██▏       | 11906/55054 [05:30<21:18, 33.76it/s]

Sparsity: 24.4 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: 0.0005
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0153 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: 0.0127
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: 0.0024
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: -0.0059
Sparsity: 124.6 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: -0.0010
Sparsity: 156.9 | Dead Features: 0 | Total Loss: 0.0777 | Reconstruction Loss: 0.0364 | L1 Loss: 0.0413 | l1_alpha: 8.0000e-04 | Tokens: 2437

 22%|██▏       | 12006/55054 [05:33<19:55, 36.02it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: 0.0005
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: 0.0129
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: 0.0024
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: -0.0064
Sparsity: 125.7 | Dead Features: 0 | Total Loss: 0.0502 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: -0.0007
Sparsity: 157.8 | Dead Features: 0 | Total Loss: 0.0759 | Reconstruction Loss: 0.0353 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 2457

 22%|██▏       | 12106/55054 [05:35<20:01, 35.74it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: 0.0005
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: 0.0129
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: 0.0021
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: -0.0061
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: -0.0010
Sparsity: 153.5 | Dead Features: 0 | Total Loss: 0.0738 | Reconstruction Loss: 0.0341 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 2478

 22%|██▏       | 12207/55054 [05:38<19:44, 36.19it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: 0.0005
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: 0.0131
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: 0.0024
Sparsity: 115.8 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: -0.0061
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: -0.0009
Sparsity: 151.5 | Dead Features: 0 | Total Loss: 0.0716 | Reconstruction Loss: 0.0335 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 2498

 22%|██▏       | 12307/55054 [05:41<19:54, 35.78it/s]

Sparsity: 27.0 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0068 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: 0.0006
Sparsity: 39.6 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: 0.0131
Sparsity: 52.8 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: 0.0020
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0422 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: -0.0060
Sparsity: 126.2 | Dead Features: 0 | Total Loss: 0.0523 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0267 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: -0.0011
Sparsity: 155.9 | Dead Features: 0 | Total Loss: 0.0798 | Reconstruction Loss: 0.0364 | L1 Loss: 0.0433 | l1_alpha: 8.0000e-04 | Tokens: 2519

 23%|██▎       | 12407/55054 [05:44<19:36, 36.24it/s]

Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: 0.0006
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: 0.0130
Sparsity: 45.4 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: 0.0023
Sparsity: 114.7 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: -0.0058
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: -0.0011
Sparsity: 144.5 | Dead Features: 0 | Total Loss: 0.0707 | Reconstruction Loss: 0.0338 | L1 Loss: 0.0369 | l1_alpha: 8.0000e-04 | Tokens: 2539

 23%|██▎       | 12507/55054 [05:46<18:29, 38.35it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: 0.0006
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: 0.0133
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: 0.0024
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: -0.0056
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: -0.0011
Sparsity: 152.8 | Dead Features: 0 | Total Loss: 0.0738 | Reconstruction Loss: 0.0343 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 2560

 23%|██▎       | 12607/55054 [05:49<19:46, 35.79it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: 0.0005
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: 0.0129
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: 0.0024
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: -0.0059
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: -0.0009
Sparsity: 154.1 | Dead Features: 0 | Total Loss: 0.0745 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 2580

 23%|██▎       | 12707/55054 [05:52<19:58, 35.33it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: 0.0007
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: 0.0131
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: 0.0025
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: -0.0059
Sparsity: 124.5 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: -0.0010
Sparsity: 156.3 | Dead Features: 0 | Total Loss: 0.0749 | Reconstruction Loss: 0.0352 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 2600

 23%|██▎       | 12806/55054 [05:54<18:58, 37.10it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: 0.0007
Sparsity: 38.8 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: 0.0131
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: 0.0025
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: -0.0060
Sparsity: 124.1 | Dead Features: 0 | Total Loss: 0.0525 | Reconstruction Loss: 0.0272 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: -0.0006
Sparsity: 154.9 | Dead Features: 0 | Total Loss: 0.0759 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 2621

 23%|██▎       | 12906/55054 [05:57<19:35, 35.85it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: 0.0004
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: 0.0129
Sparsity: 44.9 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: 0.0024
Sparsity: 113.6 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: -0.0058
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: -0.0006
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0728 | Reconstruction Loss: 0.0355 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 2641

 24%|██▎       | 13006/55054 [06:00<19:26, 36.04it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: 0.0007
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: 0.0127
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: 0.0027
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: -0.0058
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: -0.0007
Sparsity: 135.9 | Dead Features: 0 | Total Loss: 0.0744 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0375 | l1_alpha: 8.0000e-04 | Tokens: 2662

 24%|██▍       | 13106/55054 [06:03<19:33, 35.76it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: 0.0005
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: 0.0128
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: 0.0028
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: -0.0058
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: -0.0008
Sparsity: 141.0 | Dead Features: 0 | Total Loss: 0.0722 | Reconstruction Loss: 0.0335 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 2682

 24%|██▍       | 13206/55054 [06:05<19:33, 35.66it/s]

Sparsity: 21.0 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: 0.0004
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: 0.0128
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: 0.0030
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: -0.0054
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: -0.0008
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0698 | Reconstruction Loss: 0.0326 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 2703

 24%|██▍       | 13304/55054 [06:08<19:32, 35.62it/s]

Sparsity: 19.1 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: 0.0003
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: 0.0128
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: 0.0028
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: -0.0055
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: -0.0007
Sparsity: 148.5 | Dead Features: 0 | Total Loss: 0.0740 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 2723

 24%|██▍       | 13404/55054 [06:11<19:03, 36.41it/s]

Sparsity: 18.8 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: 0.0006
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: 0.0128
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: 0.0024
Sparsity: 112.5 | Dead Features: 0 | Total Loss: 0.0374 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: -0.0055
Sparsity: 115.4 | Dead Features: 0 | Total Loss: 0.0458 | Reconstruction Loss: 0.0229 | L1 Loss: 0.0229 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: -0.0008
Sparsity: 145.4 | Dead Features: 0 | Total Loss: 0.0689 | Reconstruction Loss: 0.0320 | L1 Loss: 0.0369 | l1_alpha: 8.0000e-04 | Tokens: 2744

 25%|██▍       | 13504/55054 [06:14<19:11, 36.07it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: 0.0007
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: 0.0129
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: 0.0024
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: -0.0055
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: -0.0004
Sparsity: 150.1 | Dead Features: 0 | Total Loss: 0.0701 | Reconstruction Loss: 0.0324 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens: 2764

 25%|██▍       | 13604/55054 [06:16<19:26, 35.52it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: 0.0006
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: 0.0127
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: 0.0026
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: -0.0056
Sparsity: 121.6 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: -0.0004
Sparsity: 156.9 | Dead Features: 0 | Total Loss: 0.0754 | Reconstruction Loss: 0.0352 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 2785

 25%|██▍       | 13704/55054 [06:19<18:58, 36.33it/s]

Sparsity: 23.4 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: 0.0007
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: 0.0129
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: 0.0023
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: -0.0055
Sparsity: 123.3 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: -0.0004
Sparsity: 158.0 | Dead Features: 0 | Total Loss: 0.0801 | Reconstruction Loss: 0.0376 | L1 Loss: 0.0424 | l1_alpha: 8.0000e-04 | Tokens: 2805

 25%|██▌       | 13804/55054 [06:22<19:03, 36.08it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: 0.0006
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: 0.0125
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: 0.0021
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: -0.0056
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: -0.0005
Sparsity: 139.8 | Dead Features: 0 | Total Loss: 0.0748 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 2826

 25%|██▌       | 13904/55054 [06:25<19:00, 36.07it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: 0.0002
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: 0.0126
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: 0.0017
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: -0.0056
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: -0.0007
Sparsity: 150.7 | Dead Features: 0 | Total Loss: 0.0743 | Reconstruction Loss: 0.0351 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 2846

 25%|██▌       | 14004/55054 [06:27<19:15, 35.53it/s]

Sparsity: 18.5 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: 0.0007
Sparsity: 34.4 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: 0.0129
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0189 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: 0.0022
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: -0.0058
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: -0.0006
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0715 | Reconstruction Loss: 0.0339 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens: 2867

 26%|██▌       | 14108/55054 [06:30<16:42, 40.85it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: 0.0009
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: 0.0132
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: 0.0019
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: -0.0055
Sparsity: 122.7 | Dead Features: 0 | Total Loss: 0.0523 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0271 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: -0.0006
Sparsity: 133.9 | Dead Features: 0 | Total Loss: 0.0885 | Reconstruction Loss: 0.0482 | L1 Loss: 0.0403 | l1_alpha: 8.0000e-04 | Tokens: 2887

 26%|██▌       | 14206/55054 [06:33<18:33, 36.69it/s]

Sparsity: 18.2 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: 0.0005
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: 0.0132
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: 0.0023
Sparsity: 115.4 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: -0.0055
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: -0.0007
Sparsity: 139.1 | Dead Features: 0 | Total Loss: 0.0726 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 2908

 26%|██▌       | 14306/55054 [06:35<18:56, 35.84it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: 0.0003
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: 0.0132
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: 0.0024
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: -0.0055
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: -0.0005
Sparsity: 146.1 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0345 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 2928

 26%|██▌       | 14406/55054 [06:38<18:47, 36.06it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: 0.0006
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: 0.0132
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: 0.0021
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: -0.0057
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: -0.0007
Sparsity: 147.0 | Dead Features: 0 | Total Loss: 0.0722 | Reconstruction Loss: 0.0339 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 2949

 26%|██▋       | 14506/55054 [06:41<18:41, 36.17it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: 0.0006
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: 0.0130
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: 0.0023
Sparsity: 117.3 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: -0.0059
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: -0.0007
Sparsity: 147.4 | Dead Features: 0 | Total Loss: 0.0741 | Reconstruction Loss: 0.0358 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 2969

 27%|██▋       | 14607/55054 [06:44<18:38, 36.15it/s]

Sparsity: 24.5 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: 0.0008
Sparsity: 40.0 | Dead Features: 0 | Total Loss: 0.0163 | Reconstruction Loss: 0.0108 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: 0.0129
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.0219 | Reconstruction Loss: 0.0136 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: 0.0020
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0433 | Reconstruction Loss: 0.0218 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: -0.0058
Sparsity: 131.3 | Dead Features: 0 | Total Loss: 0.0549 | Reconstruction Loss: 0.0280 | L1 Loss: 0.0268 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: -0.0008
Sparsity: 160.1 | Dead Features: 0 | Total Loss: 0.0802 | Reconstruction Loss: 0.0384 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 2990

 27%|██▋       | 14707/55054 [06:46<18:22, 36.59it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 30105600 | Self Similarity: 0.0008
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 30105600 | Self Similarity: 0.0130
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 30105600 | Self Similarity: 0.0022
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 30105600 | Self Similarity: -0.0058
Sparsity: 123.1 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 30105600 | Self Similarity: -0.0005
Sparsity: 154.4 | Dead Features: 0 | Total Loss: 0.0761 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 3010

 27%|██▋       | 14807/55054 [06:49<18:45, 35.76it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 30310400 | Self Similarity: 0.0007
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 30310400 | Self Similarity: 0.0131
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 30310400 | Self Similarity: 0.0023
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 30310400 | Self Similarity: -0.0057
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 30310400 | Self Similarity: -0.0006
Sparsity: 153.2 | Dead Features: 0 | Total Loss: 0.0757 | Reconstruction Loss: 0.0351 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 3031

 27%|██▋       | 14904/55054 [06:52<18:37, 35.92it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 30515200 | Self Similarity: 0.0008
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 30515200 | Self Similarity: 0.0131
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 30515200 | Self Similarity: 0.0019
Sparsity: 115.4 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 30515200 | Self Similarity: -0.0058
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 30515200 | Self Similarity: -0.0006
Sparsity: 149.5 | Dead Features: 0 | Total Loss: 0.0728 | Reconstruction Loss: 0.0347 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 3051

 27%|██▋       | 15008/55054 [06:55<18:04, 36.91it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 30720000 | Self Similarity: 0.0009
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 30720000 | Self Similarity: 0.0129
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 30720000 | Self Similarity: 0.0016
Sparsity: 114.4 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 30720000 | Self Similarity: -0.0059
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 30720000 | Self Similarity: -0.0009
Sparsity: 146.5 | Dead Features: 0 | Total Loss: 0.0726 | Reconstruction Loss: 0.0342 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 3072

 27%|██▋       | 15104/55054 [06:57<18:22, 36.23it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 30924800 | Self Similarity: 0.0009
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 30924800 | Self Similarity: 0.0130
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 30924800 | Self Similarity: 0.0018
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 30924800 | Self Similarity: -0.0061
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 30924800 | Self Similarity: -0.0008
Sparsity: 153.6 | Dead Features: 0 | Total Loss: 0.0749 | Reconstruction Loss: 0.0352 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 3092

 28%|██▊       | 15204/55054 [07:00<18:07, 36.64it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 31129600 | Self Similarity: 0.0007
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 31129600 | Self Similarity: 0.0130
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 31129600 | Self Similarity: 0.0019
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 31129600 | Self Similarity: -0.0059
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 31129600 | Self Similarity: -0.0008
Sparsity: 153.0 | Dead Features: 0 | Total Loss: 0.0717 | Reconstruction Loss: 0.0331 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 3112

 28%|██▊       | 15304/55054 [07:03<18:30, 35.79it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 31334400 | Self Similarity: 0.0009
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 31334400 | Self Similarity: 0.0132
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 31334400 | Self Similarity: 0.0021
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 31334400 | Self Similarity: -0.0061
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 31334400 | Self Similarity: -0.0004
Sparsity: 97.5 | Dead Features: 0 | Total Loss: 0.1241 | Reconstruction Loss: 0.0852 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 31334

 28%|██▊       | 15404/55054 [07:06<18:13, 36.25it/s]

Sparsity: 19.3 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 31539200 | Self Similarity: 0.0002
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 31539200 | Self Similarity: 0.0128
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 31539200 | Self Similarity: 0.0020
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 31539200 | Self Similarity: -0.0058
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 31539200 | Self Similarity: -0.0006
Sparsity: 112.3 | Dead Features: 0 | Total Loss: 0.0795 | Reconstruction Loss: 0.0449 | L1 Loss: 0.0346 | l1_alpha: 8.0000e-04 | Tokens: 3153

 28%|██▊       | 15504/55054 [07:08<18:29, 35.66it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 31744000 | Self Similarity: 0.0001
Sparsity: 38.7 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 31744000 | Self Similarity: 0.0126
Sparsity: 48.9 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 31744000 | Self Similarity: 0.0021
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 31744000 | Self Similarity: -0.0060
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 31744000 | Self Similarity: -0.0008
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0783 | Reconstruction Loss: 0.0407 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens: 3174

 28%|██▊       | 15604/55054 [07:11<17:52, 36.78it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 31948800 | Self Similarity: 0.0001
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 31948800 | Self Similarity: 0.0124
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 31948800 | Self Similarity: 0.0020
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 31948800 | Self Similarity: -0.0060
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 31948800 | Self Similarity: -0.0008
Sparsity: 126.5 | Dead Features: 0 | Total Loss: 0.0754 | Reconstruction Loss: 0.0386 | L1 Loss: 0.0368 | l1_alpha: 8.0000e-04 | Tokens: 3194

 29%|██▊       | 15704/55054 [07:14<18:11, 36.05it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 32153600 | Self Similarity: 0.0001
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 32153600 | Self Similarity: 0.0128
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 32153600 | Self Similarity: 0.0025
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 32153600 | Self Similarity: -0.0059
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0498 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 32153600 | Self Similarity: -0.0007
Sparsity: 133.7 | Dead Features: 0 | Total Loss: 0.0767 | Reconstruction Loss: 0.0378 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 3215

 29%|██▊       | 15804/55054 [07:17<18:24, 35.54it/s]

Sparsity: 18.5 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 32358400 | Self Similarity: 0.0001
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 32358400 | Self Similarity: 0.0129
Sparsity: 45.8 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 32358400 | Self Similarity: 0.0023
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 32358400 | Self Similarity: -0.0058
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0462 | Reconstruction Loss: 0.0230 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 32358400 | Self Similarity: -0.0009
Sparsity: 130.6 | Dead Features: 0 | Total Loss: 0.0690 | Reconstruction Loss: 0.0331 | L1 Loss: 0.0359 | l1_alpha: 8.0000e-04 | Tokens: 3235

 29%|██▉       | 15904/55054 [07:19<18:00, 36.24it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 32563200 | Self Similarity: 0.0001
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 32563200 | Self Similarity: 0.0131
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 32563200 | Self Similarity: 0.0023
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 32563200 | Self Similarity: -0.0058
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 32563200 | Self Similarity: -0.0006
Sparsity: 136.2 | Dead Features: 0 | Total Loss: 0.0717 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0373 | l1_alpha: 8.0000e-04 | Tokens: 3256

 29%|██▉       | 16004/55054 [07:22<18:14, 35.68it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 32768000 | Self Similarity: 0.0002
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 32768000 | Self Similarity: 0.0131
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 32768000 | Self Similarity: 0.0023
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 32768000 | Self Similarity: -0.0058
Sparsity: 119.4 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 32768000 | Self Similarity: -0.0007
Sparsity: 135.2 | Dead Features: 0 | Total Loss: 0.0709 | Reconstruction Loss: 0.0337 | L1 Loss: 0.0371 | l1_alpha: 8.0000e-04 | Tokens: 3276

 29%|██▉       | 16104/55054 [07:25<17:55, 36.21it/s]

Sparsity: 23.9 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 32972800 | Self Similarity: 0.0005
Sparsity: 39.5 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 32972800 | Self Similarity: 0.0131
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 32972800 | Self Similarity: 0.0024
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 32972800 | Self Similarity: -0.0061
Sparsity: 123.6 | Dead Features: 0 | Total Loss: 0.0514 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 32972800 | Self Similarity: -0.0007
Sparsity: 143.6 | Dead Features: 0 | Total Loss: 0.0776 | Reconstruction Loss: 0.0373 | L1 Loss: 0.0403 | l1_alpha: 8.0000e-04 | Tokens: 3297

 29%|██▉       | 16204/55054 [07:28<17:55, 36.11it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 33177600 | Self Similarity: 0.0004
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 33177600 | Self Similarity: 0.0131
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 33177600 | Self Similarity: 0.0021
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 33177600 | Self Similarity: -0.0059
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 33177600 | Self Similarity: -0.0009
Sparsity: 142.7 | Dead Features: 0 | Total Loss: 0.0740 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 3317

 30%|██▉       | 16304/55054 [07:30<17:54, 36.06it/s]

Sparsity: 18.3 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 33382400 | Self Similarity: 0.0004
Sparsity: 33.1 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 33382400 | Self Similarity: 0.0128
Sparsity: 45.0 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 33382400 | Self Similarity: 0.0019
Sparsity: 112.6 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 33382400 | Self Similarity: -0.0059
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0466 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 33382400 | Self Similarity: -0.0006
Sparsity: 143.2 | Dead Features: 0 | Total Loss: 0.0723 | Reconstruction Loss: 0.0341 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 3338

 30%|██▉       | 16404/55054 [07:33<17:53, 36.01it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 33587200 | Self Similarity: 0.0005
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 33587200 | Self Similarity: 0.0132
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 33587200 | Self Similarity: 0.0020
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 33587200 | Self Similarity: -0.0059
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 33587200 | Self Similarity: -0.0007
Sparsity: 146.6 | Dead Features: 0 | Total Loss: 0.0728 | Reconstruction Loss: 0.0347 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 3358

 30%|██▉       | 16506/55054 [07:36<17:14, 37.27it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 33792000 | Self Similarity: 0.0005
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 33792000 | Self Similarity: 0.0130
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 33792000 | Self Similarity: 0.0022
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 33792000 | Self Similarity: -0.0058
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 33792000 | Self Similarity: -0.0007
Sparsity: 144.7 | Dead Features: 0 | Total Loss: 0.0717 | Reconstruction Loss: 0.0331 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 3379

 30%|███       | 16606/55054 [07:39<17:45, 36.10it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 33996800 | Self Similarity: 0.0007
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 33996800 | Self Similarity: 0.0129
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 33996800 | Self Similarity: 0.0020
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 33996800 | Self Similarity: -0.0058
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 33996800 | Self Similarity: -0.0006
Sparsity: 147.8 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0340 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 3399

 30%|███       | 16706/55054 [07:41<17:37, 36.27it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 34201600 | Self Similarity: 0.0006
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 34201600 | Self Similarity: 0.0128
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 34201600 | Self Similarity: 0.0021
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 34201600 | Self Similarity: -0.0057
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 34201600 | Self Similarity: -0.0006
Sparsity: 144.2 | Dead Features: 0 | Total Loss: 0.0717 | Reconstruction Loss: 0.0333 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 3420

 31%|███       | 16804/55054 [07:44<17:07, 37.23it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 34406400 | Self Similarity: 0.0006
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 34406400 | Self Similarity: 0.0130
Sparsity: 49.9 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 34406400 | Self Similarity: 0.0020
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 34406400 | Self Similarity: -0.0059
Sparsity: 124.3 | Dead Features: 0 | Total Loss: 0.0501 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 34406400 | Self Similarity: -0.0007
Sparsity: 152.0 | Dead Features: 0 | Total Loss: 0.0763 | Reconstruction Loss: 0.0358 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens: 3440

 31%|███       | 16905/55054 [07:47<16:14, 39.14it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 34611200 | Self Similarity: 0.0008
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 34611200 | Self Similarity: 0.0130
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 34611200 | Self Similarity: 0.0023
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 34611200 | Self Similarity: -0.0060
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 34611200 | Self Similarity: -0.0006
Sparsity: 148.6 | Dead Features: 0 | Total Loss: 0.0760 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 3461

 31%|███       | 17004/55054 [07:49<17:34, 36.09it/s]

Sparsity: 19.3 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 34816000 | Self Similarity: 0.0008
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 34816000 | Self Similarity: 0.0129
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 34816000 | Self Similarity: 0.0020
Sparsity: 114.1 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 34816000 | Self Similarity: -0.0056
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0465 | Reconstruction Loss: 0.0230 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 34816000 | Self Similarity: -0.0007
Sparsity: 146.5 | Dead Features: 0 | Total Loss: 0.0714 | Reconstruction Loss: 0.0332 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 3481

 31%|███       | 17104/55054 [07:52<17:38, 35.85it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 35020800 | Self Similarity: 0.0007
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 35020800 | Self Similarity: 0.0127
Sparsity: 46.4 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 35020800 | Self Similarity: 0.0023
Sparsity: 114.5 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 35020800 | Self Similarity: -0.0056
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 35020800 | Self Similarity: -0.0007
Sparsity: 147.2 | Dead Features: 0 | Total Loss: 0.0709 | Reconstruction Loss: 0.0334 | L1 Loss: 0.0375 | l1_alpha: 8.0000e-04 | Tokens: 3502

 31%|███       | 17204/55054 [07:55<17:28, 36.09it/s]

Sparsity: 24.2 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 35225600 | Self Similarity: 0.0008
Sparsity: 40.5 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 35225600 | Self Similarity: 0.0129
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0213 | Reconstruction Loss: 0.0132 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 35225600 | Self Similarity: 0.0022
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 35225600 | Self Similarity: -0.0060
Sparsity: 126.2 | Dead Features: 0 | Total Loss: 0.0525 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0270 | l1_alpha: 8.0000e-04 | Tokens: 35225600 | Self Similarity: -0.0005
Sparsity: 159.2 | Dead Features: 0 | Total Loss: 0.0796 | Reconstruction Loss: 0.0365 | L1 Loss: 0.0430 | l1_alpha: 8.0000e-04 | Tokens: 3522

 31%|███▏      | 17308/55054 [07:57<15:49, 39.74it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 35430400 | Self Similarity: 0.0007
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 35430400 | Self Similarity: 0.0128
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 35430400 | Self Similarity: 0.0025
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 35430400 | Self Similarity: -0.0059
Sparsity: 121.9 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 35430400 | Self Similarity: -0.0006
Sparsity: 151.0 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0338 | L1 Loss: 0.0398 | l1_alpha: 8.0000e-04 | Tokens: 3543

 32%|███▏      | 17404/55054 [08:00<17:20, 36.17it/s]

Sparsity: 23.3 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 35635200 | Self Similarity: 0.0007
Sparsity: 39.4 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 35635200 | Self Similarity: 0.0125
Sparsity: 51.3 | Dead Features: 0 | Total Loss: 0.0211 | Reconstruction Loss: 0.0130 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 35635200 | Self Similarity: 0.0023
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 35635200 | Self Similarity: -0.0057
Sparsity: 123.2 | Dead Features: 0 | Total Loss: 0.0511 | Reconstruction Loss: 0.0265 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 35635200 | Self Similarity: -0.0005
Sparsity: 153.1 | Dead Features: 0 | Total Loss: 0.0760 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 3563

 32%|███▏      | 17505/55054 [08:03<15:06, 41.40it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 35840000 | Self Similarity: 0.0008
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 35840000 | Self Similarity: 0.0129
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 35840000 | Self Similarity: 0.0025
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 35840000 | Self Similarity: -0.0057
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 35840000 | Self Similarity: -0.0002
Sparsity: 154.4 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0334 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 3584

 32%|███▏      | 17607/55054 [08:05<17:16, 36.13it/s]

Sparsity: 23.2 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 36044800 | Self Similarity: 0.0008
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 36044800 | Self Similarity: 0.0127
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 36044800 | Self Similarity: 0.0027
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 36044800 | Self Similarity: -0.0056
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0501 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 36044800 | Self Similarity: -0.0003
Sparsity: 160.7 | Dead Features: 0 | Total Loss: 0.0766 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0412 | l1_alpha: 8.0000e-04 | Tokens: 3604

 32%|███▏      | 17707/55054 [08:08<17:06, 36.40it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 36249600 | Self Similarity: 0.0007
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 36249600 | Self Similarity: 0.0124
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 36249600 | Self Similarity: 0.0025
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 36249600 | Self Similarity: -0.0056
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 36249600 | Self Similarity: -0.0004
Sparsity: 151.9 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0340 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 3624

 32%|███▏      | 17807/55054 [08:11<17:09, 36.19it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 36454400 | Self Similarity: 0.0009
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 36454400 | Self Similarity: 0.0124
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 36454400 | Self Similarity: 0.0023
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 36454400 | Self Similarity: -0.0056
Sparsity: 124.2 | Dead Features: 0 | Total Loss: 0.0508 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 36454400 | Self Similarity: -0.0007
Sparsity: 157.2 | Dead Features: 0 | Total Loss: 0.0764 | Reconstruction Loss: 0.0361 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 3645

 33%|███▎      | 17907/55054 [08:14<17:14, 35.90it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 36659200 | Self Similarity: 0.0010
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 36659200 | Self Similarity: 0.0123
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 36659200 | Self Similarity: 0.0025
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 36659200 | Self Similarity: -0.0060
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 36659200 | Self Similarity: -0.0006
Sparsity: 136.5 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0357 | L1 Loss: 0.0364 | l1_alpha: 8.0000e-04 | Tokens: 3665

 33%|███▎      | 18007/55054 [08:16<17:09, 35.98it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 36864000 | Self Similarity: 0.0003
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 36864000 | Self Similarity: 0.0127
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 36864000 | Self Similarity: 0.0023
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 36864000 | Self Similarity: -0.0057
Sparsity: 122.1 | Dead Features: 0 | Total Loss: 0.0492 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 36864000 | Self Similarity: -0.0005
Sparsity: 143.7 | Dead Features: 0 | Total Loss: 0.0724 | Reconstruction Loss: 0.0343 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 3686

 33%|███▎      | 18107/55054 [08:19<16:58, 36.29it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 37068800 | Self Similarity: 0.0005
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 37068800 | Self Similarity: 0.0124
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 37068800 | Self Similarity: 0.0024
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 37068800 | Self Similarity: -0.0057
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 37068800 | Self Similarity: -0.0008
Sparsity: 148.3 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0334 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 3706

 33%|███▎      | 18207/55054 [08:22<17:06, 35.88it/s]

Sparsity: 24.3 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 37273600 | Self Similarity: 0.0007
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 37273600 | Self Similarity: 0.0127
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0209 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 37273600 | Self Similarity: 0.0021
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 37273600 | Self Similarity: -0.0057
Sparsity: 124.6 | Dead Features: 0 | Total Loss: 0.0534 | Reconstruction Loss: 0.0275 | L1 Loss: 0.0259 | l1_alpha: 8.0000e-04 | Tokens: 37273600 | Self Similarity: -0.0005
Sparsity: 153.5 | Dead Features: 0 | Total Loss: 0.0789 | Reconstruction Loss: 0.0374 | L1 Loss: 0.0415 | l1_alpha: 8.0000e-04 | Tokens: 3727

 33%|███▎      | 18307/55054 [08:25<17:03, 35.92it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 37478400 | Self Similarity: 0.0009
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 37478400 | Self Similarity: 0.0127
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 37478400 | Self Similarity: 0.0022
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 37478400 | Self Similarity: -0.0058
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 37478400 | Self Similarity: -0.0003
Sparsity: 152.9 | Dead Features: 0 | Total Loss: 0.0751 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0403 | l1_alpha: 8.0000e-04 | Tokens: 3747

 33%|███▎      | 18407/55054 [08:27<16:54, 36.11it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 37683200 | Self Similarity: 0.0008
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 37683200 | Self Similarity: 0.0129
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 37683200 | Self Similarity: 0.0025
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 37683200 | Self Similarity: -0.0056
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 37683200 | Self Similarity: -0.0005
Sparsity: 148.8 | Dead Features: 0 | Total Loss: 0.0704 | Reconstruction Loss: 0.0322 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 3768

 34%|███▎      | 18504/55054 [08:30<17:02, 35.76it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 37888000 | Self Similarity: 0.0008
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 37888000 | Self Similarity: 0.0129
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 37888000 | Self Similarity: 0.0025
Sparsity: 114.8 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 37888000 | Self Similarity: -0.0057
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 37888000 | Self Similarity: -0.0004
Sparsity: 151.1 | Dead Features: 0 | Total Loss: 0.0712 | Reconstruction Loss: 0.0329 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 3788

 34%|███▍      | 18604/55054 [08:33<17:07, 35.49it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 38092800 | Self Similarity: 0.0008
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 38092800 | Self Similarity: 0.0129
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 38092800 | Self Similarity: 0.0026
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 38092800 | Self Similarity: -0.0057
Sparsity: 121.6 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 38092800 | Self Similarity: -0.0003
Sparsity: 154.1 | Dead Features: 0 | Total Loss: 0.0723 | Reconstruction Loss: 0.0329 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 3809

 34%|███▍      | 18704/55054 [08:36<17:00, 35.62it/s]

Sparsity: 19.0 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 38297600 | Self Similarity: 0.0007
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 38297600 | Self Similarity: 0.0128
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 38297600 | Self Similarity: 0.0023
Sparsity: 117.4 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 38297600 | Self Similarity: -0.0053
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 38297600 | Self Similarity: -0.0001
Sparsity: 153.2 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0327 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 3829

 34%|███▍      | 18804/55054 [08:38<16:55, 35.68it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 38502400 | Self Similarity: 0.0008
Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 38502400 | Self Similarity: 0.0128
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 38502400 | Self Similarity: 0.0022
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 38502400 | Self Similarity: -0.0055
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0231 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 38502400 | Self Similarity: -0.0005
Sparsity: 153.4 | Dead Features: 0 | Total Loss: 0.0697 | Reconstruction Loss: 0.0316 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 3850

 34%|███▍      | 18904/55054 [08:41<16:57, 35.54it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 38707200 | Self Similarity: 0.0008
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 38707200 | Self Similarity: 0.0126
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 38707200 | Self Similarity: 0.0023
Sparsity: 112.3 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 38707200 | Self Similarity: -0.0053
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 38707200 | Self Similarity: -0.0006
Sparsity: 143.6 | Dead Features: 0 | Total Loss: 0.0695 | Reconstruction Loss: 0.0329 | L1 Loss: 0.0365 | l1_alpha: 8.0000e-04 | Tokens: 3870

 35%|███▍      | 19004/55054 [08:44<17:18, 34.72it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 38912000 | Self Similarity: 0.0006
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 38912000 | Self Similarity: 0.0129
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 38912000 | Self Similarity: 0.0023
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 38912000 | Self Similarity: -0.0055
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 38912000 | Self Similarity: -0.0006
Sparsity: 151.2 | Dead Features: 0 | Total Loss: 0.0737 | Reconstruction Loss: 0.0351 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 3891

 35%|███▍      | 19107/55054 [08:47<15:27, 38.77it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 39116800 | Self Similarity: 0.0010
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 39116800 | Self Similarity: 0.0129
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 39116800 | Self Similarity: 0.0020
Sparsity: 114.1 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 39116800 | Self Similarity: -0.0057
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 39116800 | Self Similarity: -0.0006
Sparsity: 150.0 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0335 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 3911

 35%|███▍      | 19206/55054 [08:50<16:20, 36.56it/s]

Sparsity: 19.0 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 39321600 | Self Similarity: 0.0008
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 39321600 | Self Similarity: 0.0129
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 39321600 | Self Similarity: 0.0024
Sparsity: 114.7 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 39321600 | Self Similarity: -0.0057
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 39321600 | Self Similarity: -0.0005
Sparsity: 151.0 | Dead Features: 0 | Total Loss: 0.0713 | Reconstruction Loss: 0.0331 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 3932

 35%|███▌      | 19306/55054 [08:52<17:08, 34.76it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 39526400 | Self Similarity: 0.0009
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 39526400 | Self Similarity: 0.0129
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 39526400 | Self Similarity: 0.0024
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 39526400 | Self Similarity: -0.0057
Sparsity: 120.9 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 39526400 | Self Similarity: -0.0006
Sparsity: 148.6 | Dead Features: 0 | Total Loss: 0.0750 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 3952

 35%|███▌      | 19406/55054 [08:55<17:12, 34.54it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 39731200 | Self Similarity: 0.0008
Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 39731200 | Self Similarity: 0.0127
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 39731200 | Self Similarity: 0.0020
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 39731200 | Self Similarity: -0.0058
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 39731200 | Self Similarity: -0.0003
Sparsity: 150.5 | Dead Features: 0 | Total Loss: 0.0718 | Reconstruction Loss: 0.0327 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 3973

 35%|███▌      | 19506/55054 [08:58<17:19, 34.19it/s]

Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 39936000 | Self Similarity: 0.0008
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 39936000 | Self Similarity: 0.0129
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 39936000 | Self Similarity: 0.0022
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 39936000 | Self Similarity: -0.0056
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 39936000 | Self Similarity: -0.0006
Sparsity: 155.4 | Dead Features: 0 | Total Loss: 0.0757 | Reconstruction Loss: 0.0357 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 3993

 36%|███▌      | 19606/55054 [09:01<16:56, 34.87it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 40140800 | Self Similarity: 0.0007
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 40140800 | Self Similarity: 0.0129
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 40140800 | Self Similarity: 0.0021
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 40140800 | Self Similarity: -0.0058
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 40140800 | Self Similarity: -0.0005
Sparsity: 148.6 | Dead Features: 0 | Total Loss: 0.0709 | Reconstruction Loss: 0.0320 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 4014

 36%|███▌      | 19704/55054 [09:04<17:09, 34.33it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 40345600 | Self Similarity: 0.0008
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 40345600 | Self Similarity: 0.0129
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 40345600 | Self Similarity: 0.0017
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 40345600 | Self Similarity: -0.0057
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0501 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 40345600 | Self Similarity: -0.0006
Sparsity: 148.4 | Dead Features: 0 | Total Loss: 0.0745 | Reconstruction Loss: 0.0355 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 4034

 36%|███▌      | 19804/55054 [09:07<16:33, 35.48it/s]

Sparsity: 26.1 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 40550400 | Self Similarity: 0.0011
Sparsity: 40.2 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 40550400 | Self Similarity: 0.0132
Sparsity: 51.1 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 40550400 | Self Similarity: 0.0020
Sparsity: 120.4 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 40550400 | Self Similarity: -0.0057
Sparsity: 125.5 | Dead Features: 0 | Total Loss: 0.0522 | Reconstruction Loss: 0.0264 | L1 Loss: 0.0259 | l1_alpha: 8.0000e-04 | Tokens: 40550400 | Self Similarity: -0.0008
Sparsity: 159.0 | Dead Features: 0 | Total Loss: 0.0793 | Reconstruction Loss: 0.0375 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 4055

 36%|███▌      | 19904/55054 [09:09<16:52, 34.72it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 40755200 | Self Similarity: 0.0011
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 40755200 | Self Similarity: 0.0129
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 40755200 | Self Similarity: 0.0023
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 40755200 | Self Similarity: -0.0058
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 40755200 | Self Similarity: -0.0003
Sparsity: 151.3 | Dead Features: 0 | Total Loss: 0.0729 | Reconstruction Loss: 0.0343 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 4075

 36%|███▋      | 20004/55054 [09:12<16:45, 34.85it/s]

Sparsity: 21.0 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 40960000 | Self Similarity: 0.0011
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 40960000 | Self Similarity: 0.0132
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 40960000 | Self Similarity: 0.0021
Sparsity: 114.0 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 40960000 | Self Similarity: -0.0055
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 40960000 | Self Similarity: -0.0005
Sparsity: 151.4 | Dead Features: 0 | Total Loss: 0.0711 | Reconstruction Loss: 0.0327 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 4096

 37%|███▋      | 20105/55054 [09:15<15:58, 36.47it/s]

Sparsity: 19.2 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 41164800 | Self Similarity: 0.0013
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 41164800 | Self Similarity: 0.0131
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 41164800 | Self Similarity: 0.0021
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 41164800 | Self Similarity: -0.0059
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 41164800 | Self Similarity: -0.0007
Sparsity: 151.3 | Dead Features: 0 | Total Loss: 0.0730 | Reconstruction Loss: 0.0340 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 4116

 37%|███▋      | 20206/55054 [09:18<14:55, 38.90it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 41369600 | Self Similarity: 0.0013
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 41369600 | Self Similarity: 0.0133
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 41369600 | Self Similarity: 0.0020
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 41369600 | Self Similarity: -0.0059
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 41369600 | Self Similarity: -0.0008
Sparsity: 149.7 | Dead Features: 0 | Total Loss: 0.0718 | Reconstruction Loss: 0.0336 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 4136

 37%|███▋      | 20306/55054 [09:21<16:46, 34.53it/s]

Sparsity: 22.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 41574400 | Self Similarity: 0.0010
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 41574400 | Self Similarity: 0.0128
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 41574400 | Self Similarity: 0.0017
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 41574400 | Self Similarity: -0.0057
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 41574400 | Self Similarity: -0.0006
Sparsity: 153.7 | Dead Features: 0 | Total Loss: 0.0715 | Reconstruction Loss: 0.0330 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 4157

 37%|███▋      | 20406/55054 [09:23<16:41, 34.58it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 41779200 | Self Similarity: 0.0011
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 41779200 | Self Similarity: 0.0130
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 41779200 | Self Similarity: 0.0022
Sparsity: 114.0 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 41779200 | Self Similarity: -0.0060
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 41779200 | Self Similarity: -0.0006
Sparsity: 152.6 | Dead Features: 0 | Total Loss: 0.0712 | Reconstruction Loss: 0.0326 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 4177

 37%|███▋      | 20506/55054 [09:26<16:05, 35.77it/s]

Sparsity: 25.1 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 41984000 | Self Similarity: 0.0012
Sparsity: 40.5 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 41984000 | Self Similarity: 0.0128
Sparsity: 51.1 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0134 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 41984000 | Self Similarity: 0.0020
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 41984000 | Self Similarity: -0.0059
Sparsity: 126.4 | Dead Features: 0 | Total Loss: 0.0527 | Reconstruction Loss: 0.0265 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 41984000 | Self Similarity: -0.0006
Sparsity: 154.2 | Dead Features: 0 | Total Loss: 0.0771 | Reconstruction Loss: 0.0365 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 4198

 37%|███▋      | 20606/55054 [09:29<16:04, 35.73it/s]

Sparsity: 17.7 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0062 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 42188800 | Self Similarity: 0.0013
Sparsity: 33.3 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 42188800 | Self Similarity: 0.0127
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 42188800 | Self Similarity: 0.0016
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 42188800 | Self Similarity: -0.0055
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 42188800 | Self Similarity: -0.0006
Sparsity: 137.7 | Dead Features: 0 | Total Loss: 0.1078 | Reconstruction Loss: 0.0650 | L1 Loss: 0.0428 | l1_alpha: 8.0000e-04 | Tokens: 4218

 38%|███▊      | 20706/55054 [09:32<16:11, 35.35it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 42393600 | Self Similarity: 0.0001
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 42393600 | Self Similarity: 0.0127
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 42393600 | Self Similarity: 0.0019
Sparsity: 114.3 | Dead Features: 0 | Total Loss: 0.0375 | Reconstruction Loss: 0.0178 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 42393600 | Self Similarity: -0.0056
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0461 | Reconstruction Loss: 0.0230 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 42393600 | Self Similarity: -0.0007
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0728 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0359 | l1_alpha: 8.0000e-04 | Tokens: 4239

 38%|███▊      | 20807/55054 [09:35<15:59, 35.70it/s]

Sparsity: 24.8 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 42598400 | Self Similarity: -0.0002
Sparsity: 40.8 | Dead Features: 0 | Total Loss: 0.0160 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 42598400 | Self Similarity: 0.0131
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0213 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 42598400 | Self Similarity: 0.0021
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 42598400 | Self Similarity: -0.0057
Sparsity: 126.0 | Dead Features: 0 | Total Loss: 0.0524 | Reconstruction Loss: 0.0262 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 42598400 | Self Similarity: -0.0006
Sparsity: 140.8 | Dead Features: 0 | Total Loss: 0.0819 | Reconstruction Loss: 0.0404 | L1 Loss: 0.0415 | l1_alpha: 8.0000e-04 | Tokens: 425

 38%|███▊      | 20905/55054 [09:37<15:44, 36.16it/s]

Sparsity: 23.9 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 42803200 | Self Similarity: 0.0001
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 42803200 | Self Similarity: 0.0131
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 42803200 | Self Similarity: 0.0021
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 42803200 | Self Similarity: -0.0055
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 42803200 | Self Similarity: -0.0007
Sparsity: 137.5 | Dead Features: 0 | Total Loss: 0.0733 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 4280

 38%|███▊      | 21005/55054 [09:40<15:43, 36.10it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 43008000 | Self Similarity: 0.0003
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 43008000 | Self Similarity: 0.0130
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 43008000 | Self Similarity: 0.0022
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 43008000 | Self Similarity: -0.0055
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 43008000 | Self Similarity: -0.0008
Sparsity: 140.4 | Dead Features: 0 | Total Loss: 0.0729 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 4300

 38%|███▊      | 21107/55054 [09:43<15:49, 35.76it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 43212800 | Self Similarity: 0.0002
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 43212800 | Self Similarity: 0.0130
Sparsity: 44.9 | Dead Features: 0 | Total Loss: 0.0188 | Reconstruction Loss: 0.0112 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 43212800 | Self Similarity: 0.0021
Sparsity: 113.6 | Dead Features: 0 | Total Loss: 0.0377 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 43212800 | Self Similarity: -0.0056
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 43212800 | Self Similarity: -0.0005
Sparsity: 133.6 | Dead Features: 0 | Total Loss: 0.0731 | Reconstruction Loss: 0.0359 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 4321

 39%|███▊      | 21206/55054 [09:45<16:26, 34.31it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 43417600 | Self Similarity: 0.0002
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 43417600 | Self Similarity: 0.0128
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 43417600 | Self Similarity: 0.0025
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 43417600 | Self Similarity: -0.0056
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 43417600 | Self Similarity: -0.0007
Sparsity: 141.2 | Dead Features: 0 | Total Loss: 0.0714 | Reconstruction Loss: 0.0342 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 4341

 39%|███▊      | 21306/55054 [09:48<16:23, 34.30it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 43622400 | Self Similarity: 0.0002
Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 43622400 | Self Similarity: 0.0130
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 43622400 | Self Similarity: 0.0025
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 43622400 | Self Similarity: -0.0056
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 43622400 | Self Similarity: -0.0008
Sparsity: 147.8 | Dead Features: 0 | Total Loss: 0.0745 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 4362

 39%|███▉      | 21406/55054 [09:51<16:21, 34.30it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 43827200 | Self Similarity: 0.0005
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 43827200 | Self Similarity: 0.0125
Sparsity: 46.4 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 43827200 | Self Similarity: 0.0022
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 43827200 | Self Similarity: -0.0054
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 43827200 | Self Similarity: -0.0007
Sparsity: 143.3 | Dead Features: 0 | Total Loss: 0.0720 | Reconstruction Loss: 0.0335 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 4382

 39%|███▉      | 21506/55054 [09:54<16:05, 34.74it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 44032000 | Self Similarity: 0.0007
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 44032000 | Self Similarity: 0.0127
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 44032000 | Self Similarity: 0.0024
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0376 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 44032000 | Self Similarity: -0.0055
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0462 | Reconstruction Loss: 0.0230 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 44032000 | Self Similarity: -0.0004
Sparsity: 141.6 | Dead Features: 0 | Total Loss: 0.0695 | Reconstruction Loss: 0.0328 | L1 Loss: 0.0367 | l1_alpha: 8.0000e-04 | Tokens: 4403

 39%|███▉      | 21607/55054 [09:57<16:18, 34.18it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 44236800 | Self Similarity: 0.0007
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 44236800 | Self Similarity: 0.0127
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 44236800 | Self Similarity: 0.0028
Sparsity: 114.7 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 44236800 | Self Similarity: -0.0053
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 44236800 | Self Similarity: -0.0006
Sparsity: 147.5 | Dead Features: 0 | Total Loss: 0.0729 | Reconstruction Loss: 0.0335 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 4423

 39%|███▉      | 21707/55054 [10:00<15:28, 35.93it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 44441600 | Self Similarity: 0.0007
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 44441600 | Self Similarity: 0.0129
Sparsity: 45.6 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 44441600 | Self Similarity: 0.0027
Sparsity: 114.2 | Dead Features: 0 | Total Loss: 0.0377 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 44441600 | Self Similarity: -0.0058
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 44441600 | Self Similarity: -0.0005
Sparsity: 145.2 | Dead Features: 0 | Total Loss: 0.0710 | Reconstruction Loss: 0.0331 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 4444

 40%|███▉      | 21807/55054 [10:03<15:47, 35.08it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 44646400 | Self Similarity: 0.0007
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 44646400 | Self Similarity: 0.0130
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 44646400 | Self Similarity: 0.0026
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 44646400 | Self Similarity: -0.0057
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 44646400 | Self Similarity: -0.0009
Sparsity: 151.9 | Dead Features: 0 | Total Loss: 0.0747 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 4464

 40%|███▉      | 21907/55054 [10:06<16:05, 34.34it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 44851200 | Self Similarity: 0.0007
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 44851200 | Self Similarity: 0.0131
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 44851200 | Self Similarity: 0.0028
Sparsity: 113.9 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 44851200 | Self Similarity: -0.0056
Sparsity: 117.5 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 44851200 | Self Similarity: -0.0010
Sparsity: 149.2 | Dead Features: 0 | Total Loss: 0.0706 | Reconstruction Loss: 0.0328 | L1 Loss: 0.0378 | l1_alpha: 8.0000e-04 | Tokens: 4485

 40%|███▉      | 22005/55054 [10:08<15:56, 34.56it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 45056000 | Self Similarity: 0.0007
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 45056000 | Self Similarity: 0.0126
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 45056000 | Self Similarity: 0.0024
Sparsity: 110.3 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 45056000 | Self Similarity: -0.0056
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 45056000 | Self Similarity: -0.0009
Sparsity: 144.5 | Dead Features: 0 | Total Loss: 0.0712 | Reconstruction Loss: 0.0336 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens: 4505

 40%|████      | 22104/55054 [10:11<15:51, 34.63it/s]

Sparsity: 19.4 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 45260800 | Self Similarity: 0.0008
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 45260800 | Self Similarity: 0.0128
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 45260800 | Self Similarity: 0.0026
Sparsity: 112.9 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 45260800 | Self Similarity: -0.0057
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 45260800 | Self Similarity: -0.0008
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0708 | Reconstruction Loss: 0.0329 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 4526

 40%|████      | 22205/55054 [10:14<14:22, 38.11it/s]

Sparsity: 24.4 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 45465600 | Self Similarity: 0.0009
Sparsity: 39.9 | Dead Features: 0 | Total Loss: 0.0156 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 45465600 | Self Similarity: 0.0127
Sparsity: 50.5 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 45465600 | Self Similarity: 0.0024
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 45465600 | Self Similarity: -0.0056
Sparsity: 124.7 | Dead Features: 0 | Total Loss: 0.0515 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 45465600 | Self Similarity: -0.0006
Sparsity: 151.9 | Dead Features: 0 | Total Loss: 0.0757 | Reconstruction Loss: 0.0358 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 4546

 41%|████      | 22306/55054 [10:16<13:32, 40.30it/s]

Sparsity: 23.5 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 45670400 | Self Similarity: 0.0010
Sparsity: 38.5 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 45670400 | Self Similarity: 0.0129
Sparsity: 50.1 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 45670400 | Self Similarity: 0.0026
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 45670400 | Self Similarity: -0.0055
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 45670400 | Self Similarity: -0.0005
Sparsity: 154.7 | Dead Features: 0 | Total Loss: 0.0764 | Reconstruction Loss: 0.0354 | L1 Loss: 0.0410 | l1_alpha: 8.0000e-04 | Tokens: 4567

 41%|████      | 22408/55054 [10:19<13:17, 40.96it/s]

Sparsity: 23.2 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 45875200 | Self Similarity: 0.0011
Sparsity: 38.5 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 45875200 | Self Similarity: 0.0131
Sparsity: 50.2 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 45875200 | Self Similarity: 0.0024
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 45875200 | Self Similarity: -0.0056
Sparsity: 123.2 | Dead Features: 0 | Total Loss: 0.0502 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 45875200 | Self Similarity: -0.0010
Sparsity: 153.7 | Dead Features: 0 | Total Loss: 0.0752 | Reconstruction Loss: 0.0343 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 4587

 41%|████      | 22507/55054 [10:22<13:31, 40.12it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 46080000 | Self Similarity: 0.0011
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 46080000 | Self Similarity: 0.0135
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 46080000 | Self Similarity: 0.0024
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 46080000 | Self Similarity: -0.0059
Sparsity: 120.6 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 46080000 | Self Similarity: -0.0008
Sparsity: 160.9 | Dead Features: 0 | Total Loss: 0.0839 | Reconstruction Loss: 0.0428 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 4608

 41%|████      | 22606/55054 [10:24<13:40, 39.53it/s]

Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 46284800 | Self Similarity: 0.0008
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 46284800 | Self Similarity: 0.0137
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 46284800 | Self Similarity: 0.0022
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 46284800 | Self Similarity: -0.0059
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 46284800 | Self Similarity: -0.0008
Sparsity: 147.8 | Dead Features: 0 | Total Loss: 0.0711 | Reconstruction Loss: 0.0327 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 4628

 41%|████      | 22707/55054 [10:27<14:06, 38.20it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 46489600 | Self Similarity: 0.0006
Sparsity: 38.5 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 46489600 | Self Similarity: 0.0132
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 46489600 | Self Similarity: 0.0024
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 46489600 | Self Similarity: -0.0063
Sparsity: 125.4 | Dead Features: 0 | Total Loss: 0.0515 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 46489600 | Self Similarity: -0.0011
Sparsity: 158.3 | Dead Features: 0 | Total Loss: 0.0770 | Reconstruction Loss: 0.0370 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 4648

 41%|████▏     | 22806/55054 [10:29<15:00, 35.83it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 46694400 | Self Similarity: 0.0009
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 46694400 | Self Similarity: 0.0132
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 46694400 | Self Similarity: 0.0024
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 46694400 | Self Similarity: -0.0061
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 46694400 | Self Similarity: -0.0011
Sparsity: 149.1 | Dead Features: 0 | Total Loss: 0.0729 | Reconstruction Loss: 0.0343 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 4669

 42%|████▏     | 22904/55054 [10:32<14:32, 36.85it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 46899200 | Self Similarity: 0.0007
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 46899200 | Self Similarity: 0.0133
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 46899200 | Self Similarity: 0.0025
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 46899200 | Self Similarity: -0.0061
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 46899200 | Self Similarity: -0.0013
Sparsity: 155.7 | Dead Features: 0 | Total Loss: 0.0742 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 4689

 42%|████▏     | 23004/55054 [10:35<14:53, 35.85it/s]

Sparsity: 19.4 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 47104000 | Self Similarity: 0.0007
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 47104000 | Self Similarity: 0.0133
Sparsity: 45.2 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 47104000 | Self Similarity: 0.0023
Sparsity: 114.0 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 47104000 | Self Similarity: -0.0059
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 47104000 | Self Similarity: -0.0010
Sparsity: 150.8 | Dead Features: 0 | Total Loss: 0.0704 | Reconstruction Loss: 0.0324 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 4710

 42%|████▏     | 23104/55054 [10:38<14:45, 36.09it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 47308800 | Self Similarity: 0.0008
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 47308800 | Self Similarity: 0.0129
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 47308800 | Self Similarity: 0.0025
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 47308800 | Self Similarity: -0.0058
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 47308800 | Self Similarity: -0.0009
Sparsity: 151.7 | Dead Features: 0 | Total Loss: 0.0703 | Reconstruction Loss: 0.0323 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 4730

 42%|████▏     | 23204/55054 [10:40<14:51, 35.72it/s]

Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 47513600 | Self Similarity: 0.0007
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 47513600 | Self Similarity: 0.0129
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 47513600 | Self Similarity: 0.0029
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 47513600 | Self Similarity: -0.0059
Sparsity: 123.6 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 47513600 | Self Similarity: -0.0008
Sparsity: 157.7 | Dead Features: 0 | Total Loss: 0.0747 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens: 4751

 42%|████▏     | 23304/55054 [10:43<14:48, 35.74it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 47718400 | Self Similarity: 0.0008
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 47718400 | Self Similarity: 0.0130
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 47718400 | Self Similarity: 0.0023
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 47718400 | Self Similarity: -0.0060
Sparsity: 122.3 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 47718400 | Self Similarity: -0.0005
Sparsity: 159.9 | Dead Features: 0 | Total Loss: 0.0761 | Reconstruction Loss: 0.0350 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 4771

 43%|████▎     | 23404/55054 [10:46<16:09, 32.66it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 47923200 | Self Similarity: 0.0011
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 47923200 | Self Similarity: 0.0130
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 47923200 | Self Similarity: 0.0025
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 47923200 | Self Similarity: -0.0060
Sparsity: 122.8 | Dead Features: 0 | Total Loss: 0.0501 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 47923200 | Self Similarity: -0.0006
Sparsity: 152.9 | Dead Features: 0 | Total Loss: 0.0825 | Reconstruction Loss: 0.0409 | L1 Loss: 0.0417 | l1_alpha: 8.0000e-04 | Tokens: 4792

 43%|████▎     | 23507/55054 [10:49<15:28, 33.97it/s]

Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 48128000 | Self Similarity: 0.0009
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 48128000 | Self Similarity: 0.0132
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 48128000 | Self Similarity: 0.0025
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 48128000 | Self Similarity: -0.0057
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 48128000 | Self Similarity: -0.0007
Sparsity: 128.0 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0380 | L1 Loss: 0.0354 | l1_alpha: 8.0000e-04 | Tokens: 4812

 43%|████▎     | 23607/55054 [10:52<15:38, 33.51it/s]

Sparsity: 23.7 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 48332800 | Self Similarity: 0.0003
Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 48332800 | Self Similarity: 0.0132
Sparsity: 51.0 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0130 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 48332800 | Self Similarity: 0.0023
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0423 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 48332800 | Self Similarity: -0.0058
Sparsity: 124.8 | Dead Features: 0 | Total Loss: 0.0529 | Reconstruction Loss: 0.0276 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 48332800 | Self Similarity: -0.0005
Sparsity: 151.7 | Dead Features: 0 | Total Loss: 0.0800 | Reconstruction Loss: 0.0387 | L1 Loss: 0.0413 | l1_alpha: 8.0000e-04 | Tokens: 4833

 43%|████▎     | 23707/55054 [10:55<15:15, 34.26it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 48537600 | Self Similarity: 0.0002
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 48537600 | Self Similarity: 0.0133
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 48537600 | Self Similarity: 0.0025
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 48537600 | Self Similarity: -0.0060
Sparsity: 122.8 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 48537600 | Self Similarity: -0.0008
Sparsity: 148.7 | Dead Features: 0 | Total Loss: 0.0736 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 4853

 43%|████▎     | 23807/55054 [10:58<15:10, 34.32it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 48742400 | Self Similarity: 0.0004
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 48742400 | Self Similarity: 0.0130
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 48742400 | Self Similarity: 0.0027
Sparsity: 114.5 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 48742400 | Self Similarity: -0.0060
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 48742400 | Self Similarity: -0.0007
Sparsity: 147.2 | Dead Features: 0 | Total Loss: 0.0707 | Reconstruction Loss: 0.0327 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 4874

 43%|████▎     | 23907/55054 [11:00<14:51, 34.94it/s]

Sparsity: 17.9 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0062 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 48947200 | Self Similarity: 0.0004
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 48947200 | Self Similarity: 0.0134
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 48947200 | Self Similarity: 0.0026
Sparsity: 115.7 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 48947200 | Self Similarity: -0.0059
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 48947200 | Self Similarity: -0.0004
Sparsity: 145.9 | Dead Features: 0 | Total Loss: 0.0728 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 4894

 44%|████▎     | 24007/55054 [11:03<14:20, 36.06it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 49152000 | Self Similarity: 0.0007
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 49152000 | Self Similarity: 0.0132
Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 49152000 | Self Similarity: 0.0027
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0433 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 49152000 | Self Similarity: -0.0057
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0523 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0260 | l1_alpha: 8.0000e-04 | Tokens: 49152000 | Self Similarity: -0.0002
Sparsity: 150.4 | Dead Features: 0 | Total Loss: 0.0770 | Reconstruction Loss: 0.0363 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 4915

 44%|████▍     | 24107/55054 [11:06<14:12, 36.31it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 49356800 | Self Similarity: 0.0006
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 49356800 | Self Similarity: 0.0130
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 49356800 | Self Similarity: 0.0027
Sparsity: 113.4 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 49356800 | Self Similarity: -0.0060
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0245 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 49356800 | Self Similarity: -0.0001
Sparsity: 149.3 | Dead Features: 0 | Total Loss: 0.0740 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 4935

 44%|████▍     | 24207/55054 [11:09<14:14, 36.11it/s]

Sparsity: 25.6 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 49561600 | Self Similarity: 0.0006
Sparsity: 42.6 | Dead Features: 0 | Total Loss: 0.0165 | Reconstruction Loss: 0.0108 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 49561600 | Self Similarity: 0.0131
Sparsity: 53.5 | Dead Features: 0 | Total Loss: 0.0216 | Reconstruction Loss: 0.0135 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 49561600 | Self Similarity: 0.0027
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 49561600 | Self Similarity: -0.0057
Sparsity: 130.0 | Dead Features: 0 | Total Loss: 0.0542 | Reconstruction Loss: 0.0271 | L1 Loss: 0.0272 | l1_alpha: 8.0000e-04 | Tokens: 49561600 | Self Similarity: -0.0003
Sparsity: 159.8 | Dead Features: 0 | Total Loss: 0.0784 | Reconstruction Loss: 0.0367 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 4956

 44%|████▍     | 24307/55054 [11:12<14:10, 36.16it/s]

Sparsity: 21.0 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 49766400 | Self Similarity: 0.0007
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 49766400 | Self Similarity: 0.0130
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 49766400 | Self Similarity: 0.0027
Sparsity: 114.1 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 49766400 | Self Similarity: -0.0054
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 49766400 | Self Similarity: -0.0006
Sparsity: 157.7 | Dead Features: 0 | Total Loss: 0.0773 | Reconstruction Loss: 0.0360 | L1 Loss: 0.0413 | l1_alpha: 8.0000e-04 | Tokens: 4976

 44%|████▍     | 24407/55054 [11:14<14:06, 36.19it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 49971200 | Self Similarity: 0.0007
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 49971200 | Self Similarity: 0.0129
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 49971200 | Self Similarity: 0.0025
Sparsity: 115.0 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 49971200 | Self Similarity: -0.0057
Sparsity: 121.7 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 49971200 | Self Similarity: -0.0004
Sparsity: 156.6 | Dead Features: 0 | Total Loss: 0.0759 | Reconstruction Loss: 0.0360 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 4997

 45%|████▍     | 24507/55054 [11:17<14:03, 36.20it/s]

Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 50176000 | Self Similarity: 0.0008
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 50176000 | Self Similarity: 0.0129
Sparsity: 46.3 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 50176000 | Self Similarity: 0.0024
Sparsity: 113.2 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 50176000 | Self Similarity: -0.0056
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0466 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 50176000 | Self Similarity: -0.0004
Sparsity: 149.3 | Dead Features: 0 | Total Loss: 0.0685 | Reconstruction Loss: 0.0312 | L1 Loss: 0.0373 | l1_alpha: 8.0000e-04 | Tokens: 5017

 45%|████▍     | 24607/55054 [11:20<14:06, 35.99it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 50380800 | Self Similarity: 0.0010
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 50380800 | Self Similarity: 0.0125
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 50380800 | Self Similarity: 0.0026
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 50380800 | Self Similarity: -0.0059
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 50380800 | Self Similarity: -0.0009
Sparsity: 149.6 | Dead Features: 0 | Total Loss: 0.0721 | Reconstruction Loss: 0.0338 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 5038

 45%|████▍     | 24707/55054 [11:23<14:03, 35.98it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 50585600 | Self Similarity: 0.0011
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 50585600 | Self Similarity: 0.0126
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 50585600 | Self Similarity: 0.0025
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 50585600 | Self Similarity: -0.0059
Sparsity: 125.6 | Dead Features: 0 | Total Loss: 0.0511 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 50585600 | Self Similarity: -0.0008
Sparsity: 175.8 | Dead Features: 0 | Total Loss: 0.1325 | Reconstruction Loss: 0.0802 | L1 Loss: 0.0523 | l1_alpha: 8.0000e-04 | Tokens: 5058

 45%|████▌     | 24807/55054 [11:25<13:53, 36.30it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 50790400 | Self Similarity: 0.0008
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 50790400 | Self Similarity: 0.0127
Sparsity: 49.9 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 50790400 | Self Similarity: 0.0025
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 50790400 | Self Similarity: -0.0062
Sparsity: 125.2 | Dead Features: 0 | Total Loss: 0.0508 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 50790400 | Self Similarity: -0.0006
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0786 | Reconstruction Loss: 0.0413 | L1 Loss: 0.0373 | l1_alpha: 8.0000e-04 | Tokens: 5079

 45%|████▌     | 24907/55054 [11:28<14:04, 35.70it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 50995200 | Self Similarity: 0.0001
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 50995200 | Self Similarity: 0.0130
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 50995200 | Self Similarity: 0.0025
Sparsity: 114.0 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 50995200 | Self Similarity: -0.0061
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 50995200 | Self Similarity: -0.0008
Sparsity: 131.9 | Dead Features: 0 | Total Loss: 0.0747 | Reconstruction Loss: 0.0367 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 5099

 45%|████▌     | 25007/55054 [11:31<13:47, 36.30it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 51200000 | Self Similarity: 0.0003
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 51200000 | Self Similarity: 0.0129
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 51200000 | Self Similarity: 0.0023
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 51200000 | Self Similarity: -0.0059
Sparsity: 120.7 | Dead Features: 0 | Total Loss: 0.0481 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 51200000 | Self Similarity: -0.0007
Sparsity: 133.8 | Dead Features: 0 | Total Loss: 0.0720 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0371 | l1_alpha: 8.0000e-04 | Tokens: 5120

 46%|████▌     | 25107/55054 [11:34<13:39, 36.56it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 51404800 | Self Similarity: 0.0003
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 51404800 | Self Similarity: 0.0131
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 51404800 | Self Similarity: 0.0024
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 51404800 | Self Similarity: -0.0056
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 51404800 | Self Similarity: -0.0007
Sparsity: 138.5 | Dead Features: 0 | Total Loss: 0.0733 | Reconstruction Loss: 0.0350 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 5140

 46%|████▌     | 25207/55054 [11:37<13:50, 35.94it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 51609600 | Self Similarity: 0.0004
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 51609600 | Self Similarity: 0.0129
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 51609600 | Self Similarity: 0.0026
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 51609600 | Self Similarity: -0.0056
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 51609600 | Self Similarity: -0.0007
Sparsity: 142.1 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 5160

 46%|████▌     | 25307/55054 [11:39<14:57, 33.14it/s]

Sparsity: 19.1 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 51814400 | Self Similarity: 0.0006
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 51814400 | Self Similarity: 0.0131
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 51814400 | Self Similarity: 0.0023
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 51814400 | Self Similarity: -0.0059
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 51814400 | Self Similarity: -0.0005
Sparsity: 139.9 | Dead Features: 0 | Total Loss: 0.0712 | Reconstruction Loss: 0.0330 | L1 Loss: 0.0382 | l1_alpha: 8.0000e-04 | Tokens: 5181

 46%|████▌     | 25407/55054 [11:42<13:50, 35.68it/s]

Sparsity: 21.0 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 52019200 | Self Similarity: 0.0005
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 52019200 | Self Similarity: 0.0129
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 52019200 | Self Similarity: 0.0025
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 52019200 | Self Similarity: -0.0059
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0466 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 52019200 | Self Similarity: -0.0006
Sparsity: 143.7 | Dead Features: 0 | Total Loss: 0.0708 | Reconstruction Loss: 0.0336 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 5201

 46%|████▋     | 25507/55054 [11:45<13:53, 35.46it/s]

Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 52224000 | Self Similarity: 0.0005
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 52224000 | Self Similarity: 0.0130
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 52224000 | Self Similarity: 0.0022
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 52224000 | Self Similarity: -0.0055
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 52224000 | Self Similarity: -0.0007
Sparsity: 144.6 | Dead Features: 0 | Total Loss: 0.0704 | Reconstruction Loss: 0.0327 | L1 Loss: 0.0377 | l1_alpha: 8.0000e-04 | Tokens: 5222

 47%|████▋     | 25607/55054 [11:48<13:41, 35.85it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 52428800 | Self Similarity: 0.0006
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0154 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 52428800 | Self Similarity: 0.0132
Sparsity: 50.1 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 52428800 | Self Similarity: 0.0027
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 52428800 | Self Similarity: -0.0053
Sparsity: 124.9 | Dead Features: 0 | Total Loss: 0.0516 | Reconstruction Loss: 0.0264 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 52428800 | Self Similarity: -0.0008
Sparsity: 153.4 | Dead Features: 0 | Total Loss: 0.0769 | Reconstruction Loss: 0.0364 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 5242

 47%|████▋     | 25707/55054 [11:51<13:38, 35.87it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 52633600 | Self Similarity: 0.0005
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 52633600 | Self Similarity: 0.0131
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 52633600 | Self Similarity: 0.0022
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 52633600 | Self Similarity: -0.0059
Sparsity: 123.9 | Dead Features: 0 | Total Loss: 0.0523 | Reconstruction Loss: 0.0271 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 52633600 | Self Similarity: -0.0005
Sparsity: 155.7 | Dead Features: 0 | Total Loss: 0.0794 | Reconstruction Loss: 0.0379 | L1 Loss: 0.0415 | l1_alpha: 8.0000e-04 | Tokens: 5263

 47%|████▋     | 25807/55054 [11:53<13:47, 35.35it/s]

Sparsity: 19.5 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 52838400 | Self Similarity: 0.0005
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 52838400 | Self Similarity: 0.0128
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 52838400 | Self Similarity: 0.0024
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 52838400 | Self Similarity: -0.0056
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0465 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 52838400 | Self Similarity: -0.0008
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0693 | Reconstruction Loss: 0.0322 | L1 Loss: 0.0371 | l1_alpha: 8.0000e-04 | Tokens: 5283

 47%|████▋     | 25907/55054 [11:56<14:03, 34.54it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 53043200 | Self Similarity: 0.0007
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 53043200 | Self Similarity: 0.0128
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 53043200 | Self Similarity: 0.0024
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 53043200 | Self Similarity: -0.0056
Sparsity: 122.4 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 53043200 | Self Similarity: -0.0008
Sparsity: 148.4 | Dead Features: 0 | Total Loss: 0.0733 | Reconstruction Loss: 0.0343 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 5304

 47%|████▋     | 26007/55054 [11:59<13:58, 34.66it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 53248000 | Self Similarity: 0.0007
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 53248000 | Self Similarity: 0.0134
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 53248000 | Self Similarity: 0.0026
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 53248000 | Self Similarity: -0.0054
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 53248000 | Self Similarity: -0.0008
Sparsity: 151.0 | Dead Features: 0 | Total Loss: 0.0723 | Reconstruction Loss: 0.0334 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 5324

 47%|████▋     | 26107/55054 [12:02<13:51, 34.81it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 53452800 | Self Similarity: 0.0009
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0144 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 53452800 | Self Similarity: 0.0133
Sparsity: 48.4 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 53452800 | Self Similarity: 0.0028
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 53452800 | Self Similarity: -0.0052
Sparsity: 123.8 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 53452800 | Self Similarity: -0.0008
Sparsity: 155.7 | Dead Features: 0 | Total Loss: 0.0742 | Reconstruction Loss: 0.0343 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 5345

 48%|████▊     | 26204/55054 [12:05<13:16, 36.20it/s]

Sparsity: 16.7 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0061 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 53657600 | Self Similarity: 0.0008
Sparsity: 33.4 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 53657600 | Self Similarity: 0.0132
Sparsity: 45.8 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 53657600 | Self Similarity: 0.0024
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 53657600 | Self Similarity: -0.0050
Sparsity: 117.9 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 53657600 | Self Similarity: -0.0005
Sparsity: 104.3 | Dead Features: 0 | Total Loss: 0.0849 | Reconstruction Loss: 0.0509 | L1 Loss: 0.0340 | l1_alpha: 8.0000e-04 | Tokens: 5365

 48%|████▊     | 26304/55054 [12:08<13:58, 34.28it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 53862400 | Self Similarity: -0.0007
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 53862400 | Self Similarity: 0.0131
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 53862400 | Self Similarity: 0.0024
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 53862400 | Self Similarity: -0.0053
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0499 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 53862400 | Self Similarity: -0.0004
Sparsity: 123.3 | Dead Features: 0 | Total Loss: 0.0776 | Reconstruction Loss: 0.0399 | L1 Loss: 0.0377 | l1_alpha: 8.0000e-04 | Tokens: 538

 48%|████▊     | 26405/55054 [12:10<13:35, 35.14it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 54067200 | Self Similarity: -0.0004
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 54067200 | Self Similarity: 0.0130
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 54067200 | Self Similarity: 0.0026
Sparsity: 113.9 | Dead Features: 0 | Total Loss: 0.0377 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 54067200 | Self Similarity: -0.0052
Sparsity: 118.3 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 54067200 | Self Similarity: -0.0009
Sparsity: 129.2 | Dead Features: 0 | Total Loss: 0.0733 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0368 | l1_alpha: 8.0000e-04 | Tokens: 540

 48%|████▊     | 26505/55054 [12:13<13:34, 35.07it/s]

Sparsity: 19.1 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 54272000 | Self Similarity: -0.0001
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 54272000 | Self Similarity: 0.0129
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 54272000 | Self Similarity: 0.0025
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 54272000 | Self Similarity: -0.0054
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 54272000 | Self Similarity: -0.0007
Sparsity: 134.1 | Dead Features: 0 | Total Loss: 0.0717 | Reconstruction Loss: 0.0347 | L1 Loss: 0.0370 | l1_alpha: 8.0000e-04 | Tokens: 542

 48%|████▊     | 26605/55054 [12:16<13:32, 35.01it/s]

Sparsity: 23.0 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 54476800 | Self Similarity: 0.0001
Sparsity: 38.5 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 54476800 | Self Similarity: 0.0131
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0129 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 54476800 | Self Similarity: 0.0024
Sparsity: 120.8 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 54476800 | Self Similarity: -0.0053
Sparsity: 126.2 | Dead Features: 0 | Total Loss: 0.0532 | Reconstruction Loss: 0.0267 | L1 Loss: 0.0265 | l1_alpha: 8.0000e-04 | Tokens: 54476800 | Self Similarity: -0.0005
Sparsity: 144.2 | Dead Features: 0 | Total Loss: 0.0820 | Reconstruction Loss: 0.0394 | L1 Loss: 0.0425 | l1_alpha: 8.0000e-04 | Tokens: 5447

 49%|████▊     | 26705/55054 [12:19<13:26, 35.14it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 54681600 | Self Similarity: 0.0001
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 54681600 | Self Similarity: 0.0130
Sparsity: 50.4 | Dead Features: 0 | Total Loss: 0.0207 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 54681600 | Self Similarity: 0.0026
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 54681600 | Self Similarity: -0.0051
Sparsity: 124.5 | Dead Features: 0 | Total Loss: 0.0516 | Reconstruction Loss: 0.0261 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 54681600 | Self Similarity: -0.0005
Sparsity: 141.2 | Dead Features: 0 | Total Loss: 0.0752 | Reconstruction Loss: 0.0352 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 5468

 49%|████▊     | 26805/55054 [12:22<13:33, 34.72it/s]

Sparsity: 19.0 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 54886400 | Self Similarity: 0.0001
Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 54886400 | Self Similarity: 0.0132
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 54886400 | Self Similarity: 0.0024
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 54886400 | Self Similarity: -0.0053
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 54886400 | Self Similarity: -0.0006
Sparsity: 140.3 | Dead Features: 0 | Total Loss: 0.0713 | Reconstruction Loss: 0.0333 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 5488

 49%|████▉     | 26905/55054 [12:25<13:35, 34.51it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 55091200 | Self Similarity: 0.0001
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 55091200 | Self Similarity: 0.0131
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 55091200 | Self Similarity: 0.0026
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 55091200 | Self Similarity: -0.0053
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 55091200 | Self Similarity: -0.0007
Sparsity: 142.1 | Dead Features: 0 | Total Loss: 0.0718 | Reconstruction Loss: 0.0338 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 5509

 49%|████▉     | 27005/55054 [12:27<13:05, 35.72it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 55296000 | Self Similarity: 0.0004
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0152 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 55296000 | Self Similarity: 0.0134
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 55296000 | Self Similarity: 0.0022
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 55296000 | Self Similarity: -0.0054
Sparsity: 121.4 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 55296000 | Self Similarity: -0.0006
Sparsity: 148.6 | Dead Features: 0 | Total Loss: 0.0754 | Reconstruction Loss: 0.0359 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 5529

 49%|████▉     | 27105/55054 [12:30<12:58, 35.91it/s]

Sparsity: 18.7 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 55500800 | Self Similarity: 0.0005
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 55500800 | Self Similarity: 0.0131
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 55500800 | Self Similarity: 0.0023
Sparsity: 113.6 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 55500800 | Self Similarity: -0.0052
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 55500800 | Self Similarity: -0.0009
Sparsity: 142.9 | Dead Features: 0 | Total Loss: 0.0715 | Reconstruction Loss: 0.0330 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 5550

 49%|████▉     | 27206/55054 [12:33<12:55, 35.93it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 55705600 | Self Similarity: 0.0004
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 55705600 | Self Similarity: 0.0132
Sparsity: 49.2 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 55705600 | Self Similarity: 0.0026
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 55705600 | Self Similarity: -0.0053
Sparsity: 117.1 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 55705600 | Self Similarity: -0.0006
Sparsity: 144.1 | Dead Features: 0 | Total Loss: 0.0742 | Reconstruction Loss: 0.0342 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 5570

 50%|████▉     | 27307/55054 [12:36<12:46, 36.20it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 55910400 | Self Similarity: 0.0004
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 55910400 | Self Similarity: 0.0134
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 55910400 | Self Similarity: 0.0026
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 55910400 | Self Similarity: -0.0053
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 55910400 | Self Similarity: -0.0008
Sparsity: 147.7 | Dead Features: 0 | Total Loss: 0.0732 | Reconstruction Loss: 0.0340 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 5591

 50%|████▉     | 27407/55054 [12:39<12:56, 35.60it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 56115200 | Self Similarity: 0.0006
Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 56115200 | Self Similarity: 0.0134
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0215 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 56115200 | Self Similarity: 0.0028
Sparsity: 120.3 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 56115200 | Self Similarity: -0.0052
Sparsity: 125.7 | Dead Features: 0 | Total Loss: 0.0522 | Reconstruction Loss: 0.0264 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 56115200 | Self Similarity: -0.0005
Sparsity: 155.0 | Dead Features: 0 | Total Loss: 0.0790 | Reconstruction Loss: 0.0371 | L1 Loss: 0.0419 | l1_alpha: 8.0000e-04 | Tokens: 5611

 50%|████▉     | 27507/55054 [12:41<11:27, 40.08it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 56320000 | Self Similarity: 0.0006
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 56320000 | Self Similarity: 0.0132
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 56320000 | Self Similarity: 0.0026
Sparsity: 117.2 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 56320000 | Self Similarity: -0.0052
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 56320000 | Self Similarity: -0.0007
Sparsity: 152.2 | Dead Features: 0 | Total Loss: 0.0754 | Reconstruction Loss: 0.0355 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 5632

 50%|█████     | 27604/55054 [12:44<12:40, 36.10it/s]

Sparsity: 22.6 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 56524800 | Self Similarity: 0.0006
Sparsity: 38.7 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 56524800 | Self Similarity: 0.0128
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 56524800 | Self Similarity: 0.0027
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0395 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 56524800 | Self Similarity: -0.0054
Sparsity: 122.7 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 56524800 | Self Similarity: -0.0007
Sparsity: 152.4 | Dead Features: 0 | Total Loss: 0.0749 | Reconstruction Loss: 0.0351 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 5652

 50%|█████     | 27704/55054 [12:47<12:41, 35.93it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 56729600 | Self Similarity: 0.0004
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 56729600 | Self Similarity: 0.0130
Sparsity: 47.5 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 56729600 | Self Similarity: 0.0025
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 56729600 | Self Similarity: -0.0058
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 56729600 | Self Similarity: -0.0007
Sparsity: 149.3 | Dead Features: 0 | Total Loss: 0.0703 | Reconstruction Loss: 0.0321 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 5672

 51%|█████     | 27804/55054 [12:49<12:44, 35.65it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 56934400 | Self Similarity: 0.0005
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 56934400 | Self Similarity: 0.0131
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 56934400 | Self Similarity: 0.0027
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 56934400 | Self Similarity: -0.0056
Sparsity: 122.5 | Dead Features: 0 | Total Loss: 0.0495 | Reconstruction Loss: 0.0250 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 56934400 | Self Similarity: -0.0007
Sparsity: 150.4 | Dead Features: 0 | Total Loss: 0.0733 | Reconstruction Loss: 0.0350 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 5693

 51%|█████     | 27904/55054 [12:52<11:22, 39.77it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 57139200 | Self Similarity: 0.0005
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 57139200 | Self Similarity: 0.0132
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 57139200 | Self Similarity: 0.0028
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 57139200 | Self Similarity: -0.0057
Sparsity: 117.8 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 57139200 | Self Similarity: -0.0005
Sparsity: 150.9 | Dead Features: 0 | Total Loss: 0.0723 | Reconstruction Loss: 0.0335 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 5713

 51%|█████     | 28004/55054 [12:55<12:36, 35.76it/s]

Sparsity: 21.9 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 57344000 | Self Similarity: 0.0004
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 57344000 | Self Similarity: 0.0132
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 57344000 | Self Similarity: 0.0029
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 57344000 | Self Similarity: -0.0057
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 57344000 | Self Similarity: -0.0004
Sparsity: 150.6 | Dead Features: 0 | Total Loss: 0.0705 | Reconstruction Loss: 0.0328 | L1 Loss: 0.0377 | l1_alpha: 8.0000e-04 | Tokens: 5734

 51%|█████     | 28105/55054 [12:58<12:35, 35.65it/s]

Sparsity: 19.3 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 57548800 | Self Similarity: 0.0005
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 57548800 | Self Similarity: 0.0131
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 57548800 | Self Similarity: 0.0028
Sparsity: 114.8 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 57548800 | Self Similarity: -0.0061
Sparsity: 120.2 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 57548800 | Self Similarity: -0.0005
Sparsity: 150.2 | Dead Features: 0 | Total Loss: 0.0712 | Reconstruction Loss: 0.0324 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 5754

 51%|█████     | 28205/55054 [13:00<12:30, 35.80it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 57753600 | Self Similarity: 0.0005
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 57753600 | Self Similarity: 0.0130
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 57753600 | Self Similarity: 0.0029
Sparsity: 116.4 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 57753600 | Self Similarity: -0.0060
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 57753600 | Self Similarity: -0.0007
Sparsity: 153.3 | Dead Features: 0 | Total Loss: 0.0713 | Reconstruction Loss: 0.0324 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 5775

 51%|█████▏    | 28305/55054 [13:03<12:19, 36.16it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 57958400 | Self Similarity: 0.0008
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 57958400 | Self Similarity: 0.0130
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 57958400 | Self Similarity: 0.0028
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 57958400 | Self Similarity: -0.0061
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 57958400 | Self Similarity: -0.0005
Sparsity: 157.8 | Dead Features: 0 | Total Loss: 0.0773 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 5795

 52%|█████▏    | 28405/55054 [13:06<12:16, 36.19it/s]

Sparsity: 24.5 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 58163200 | Self Similarity: 0.0006
Sparsity: 38.8 | Dead Features: 0 | Total Loss: 0.0151 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 58163200 | Self Similarity: 0.0129
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0209 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 58163200 | Self Similarity: 0.0028
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0417 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 58163200 | Self Similarity: -0.0058
Sparsity: 125.9 | Dead Features: 0 | Total Loss: 0.0525 | Reconstruction Loss: 0.0267 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 58163200 | Self Similarity: -0.0004
Sparsity: 153.7 | Dead Features: 0 | Total Loss: 0.0765 | Reconstruction Loss: 0.0361 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 5816

 52%|█████▏    | 28505/55054 [13:09<12:21, 35.80it/s]

Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 58368000 | Self Similarity: 0.0005
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 58368000 | Self Similarity: 0.0128
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 58368000 | Self Similarity: 0.0026
Sparsity: 114.1 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 58368000 | Self Similarity: -0.0059
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0470 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 58368000 | Self Similarity: -0.0007
Sparsity: 149.4 | Dead Features: 0 | Total Loss: 0.0685 | Reconstruction Loss: 0.0317 | L1 Loss: 0.0368 | l1_alpha: 8.0000e-04 | Tokens: 5836

 52%|█████▏    | 28605/55054 [13:12<12:25, 35.47it/s]

Sparsity: 19.3 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 58572800 | Self Similarity: 0.0006
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 58572800 | Self Similarity: 0.0130
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 58572800 | Self Similarity: 0.0030
Sparsity: 115.4 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 58572800 | Self Similarity: -0.0059
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 58572800 | Self Similarity: -0.0008
Sparsity: 150.8 | Dead Features: 0 | Total Loss: 0.0694 | Reconstruction Loss: 0.0314 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 5857

 52%|█████▏    | 28705/55054 [13:14<12:08, 36.18it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 58777600 | Self Similarity: 0.0006
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 58777600 | Self Similarity: 0.0130
Sparsity: 47.4 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 58777600 | Self Similarity: 0.0028
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 58777600 | Self Similarity: -0.0060
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 58777600 | Self Similarity: -0.0004
Sparsity: 138.6 | Dead Features: 0 | Total Loss: 0.0709 | Reconstruction Loss: 0.0336 | L1 Loss: 0.0373 | l1_alpha: 8.0000e-04 | Tokens: 5877

 52%|█████▏    | 28805/55054 [13:17<12:29, 35.01it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 58982400 | Self Similarity: 0.0004
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 58982400 | Self Similarity: 0.0130
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 58982400 | Self Similarity: 0.0025
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 58982400 | Self Similarity: -0.0059
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 58982400 | Self Similarity: -0.0007
Sparsity: 144.4 | Dead Features: 0 | Total Loss: 0.0700 | Reconstruction Loss: 0.0324 | L1 Loss: 0.0376 | l1_alpha: 8.0000e-04 | Tokens: 5898

 53%|█████▎    | 28907/55054 [13:20<11:31, 37.80it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 59187200 | Self Similarity: 0.0004
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 59187200 | Self Similarity: 0.0129
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 59187200 | Self Similarity: 0.0024
Sparsity: 117.6 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 59187200 | Self Similarity: -0.0056
Sparsity: 124.6 | Dead Features: 0 | Total Loss: 0.0521 | Reconstruction Loss: 0.0262 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 59187200 | Self Similarity: -0.0008
Sparsity: 155.3 | Dead Features: 0 | Total Loss: 0.0783 | Reconstruction Loss: 0.0365 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 5918

 53%|█████▎    | 29004/55054 [13:23<11:58, 36.25it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 59392000 | Self Similarity: 0.0004
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 59392000 | Self Similarity: 0.0128
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0122 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 59392000 | Self Similarity: 0.0023
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 59392000 | Self Similarity: -0.0059
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0498 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 59392000 | Self Similarity: -0.0005
Sparsity: 148.2 | Dead Features: 0 | Total Loss: 0.0751 | Reconstruction Loss: 0.0351 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 5939

 53%|█████▎    | 29104/55054 [13:25<12:00, 36.03it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 59596800 | Self Similarity: 0.0006
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 59596800 | Self Similarity: 0.0129
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 0.0189 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 59596800 | Self Similarity: 0.0024
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 59596800 | Self Similarity: -0.0057
Sparsity: 118.5 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 59596800 | Self Similarity: -0.0006
Sparsity: 150.4 | Dead Features: 0 | Total Loss: 0.0723 | Reconstruction Loss: 0.0332 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 5959

 53%|█████▎    | 29206/55054 [13:28<12:02, 35.80it/s]

Sparsity: 18.4 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 59801600 | Self Similarity: 0.0006
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 59801600 | Self Similarity: 0.0128
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 59801600 | Self Similarity: 0.0022
Sparsity: 115.8 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 59801600 | Self Similarity: -0.0055
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 59801600 | Self Similarity: -0.0006
Sparsity: 146.8 | Dead Features: 0 | Total Loss: 0.0703 | Reconstruction Loss: 0.0323 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 5980

 53%|█████▎    | 29305/55054 [13:31<12:08, 35.35it/s]

Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 60006400 | Self Similarity: 0.0007
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 60006400 | Self Similarity: 0.0131
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 60006400 | Self Similarity: 0.0025
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 60006400 | Self Similarity: -0.0057
Sparsity: 122.2 | Dead Features: 0 | Total Loss: 0.0485 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 60006400 | Self Similarity: -0.0007
Sparsity: 153.7 | Dead Features: 0 | Total Loss: 0.0717 | Reconstruction Loss: 0.0324 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 6000

 53%|█████▎    | 29407/55054 [13:33<12:11, 35.04it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 60211200 | Self Similarity: 0.0009
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 60211200 | Self Similarity: 0.0129
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 60211200 | Self Similarity: 0.0027
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 60211200 | Self Similarity: -0.0055
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 60211200 | Self Similarity: -0.0006
Sparsity: 151.6 | Dead Features: 0 | Total Loss: 0.0707 | Reconstruction Loss: 0.0334 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 6021

 54%|█████▎    | 29506/55054 [13:36<11:19, 37.59it/s]

Sparsity: 19.1 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 60416000 | Self Similarity: 0.0010
Sparsity: 34.2 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 60416000 | Self Similarity: 0.0130
Sparsity: 45.9 | Dead Features: 0 | Total Loss: 0.0188 | Reconstruction Loss: 0.0111 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 60416000 | Self Similarity: 0.0026
Sparsity: 113.9 | Dead Features: 0 | Total Loss: 0.0376 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 60416000 | Self Similarity: -0.0052
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0456 | Reconstruction Loss: 0.0226 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 60416000 | Self Similarity: -0.0005
Sparsity: 148.9 | Dead Features: 0 | Total Loss: 0.0672 | Reconstruction Loss: 0.0305 | L1 Loss: 0.0367 | l1_alpha: 8.0000e-04 | Tokens: 6041

 54%|█████▍    | 29606/55054 [13:39<12:13, 34.69it/s]

Sparsity: 21.1 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 60620800 | Self Similarity: 0.0010
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 60620800 | Self Similarity: 0.0130
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 60620800 | Self Similarity: 0.0021
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 60620800 | Self Similarity: -0.0055
Sparsity: 120.0 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 60620800 | Self Similarity: -0.0009
Sparsity: 154.8 | Dead Features: 0 | Total Loss: 0.0728 | Reconstruction Loss: 0.0339 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 6062

 54%|█████▍    | 29707/55054 [13:42<10:29, 40.26it/s]

Sparsity: 21.7 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 60825600 | Self Similarity: 0.0009
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 60825600 | Self Similarity: 0.0128
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 60825600 | Self Similarity: 0.0022
Sparsity: 112.1 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 60825600 | Self Similarity: -0.0052
Sparsity: 118.1 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 60825600 | Self Similarity: -0.0004
Sparsity: 152.5 | Dead Features: 0 | Total Loss: 0.0717 | Reconstruction Loss: 0.0323 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 6082

 54%|█████▍    | 29804/55054 [13:44<11:53, 35.39it/s]

Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 61030400 | Self Similarity: 0.0008
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 61030400 | Self Similarity: 0.0131
Sparsity: 45.7 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 61030400 | Self Similarity: 0.0020
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 61030400 | Self Similarity: -0.0052
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 61030400 | Self Similarity: -0.0006
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0798 | Reconstruction Loss: 0.0448 | L1 Loss: 0.0350 | l1_alpha: 8.0000e-04 | Tokens: 6103

 54%|█████▍    | 29904/55054 [13:47<11:34, 36.20it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 61235200 | Self Similarity: -0.0001
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 61235200 | Self Similarity: 0.0128
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 61235200 | Self Similarity: 0.0022
Sparsity: 118.4 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 61235200 | Self Similarity: -0.0052
Sparsity: 121.3 | Dead Features: 0 | Total Loss: 0.0505 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 61235200 | Self Similarity: -0.0008
Sparsity: 138.5 | Dead Features: 0 | Total Loss: 0.0798 | Reconstruction Loss: 0.0410 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 612

 55%|█████▍    | 30005/55054 [13:50<12:16, 33.99it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 61440000 | Self Similarity: -0.0001
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 61440000 | Self Similarity: 0.0127
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 61440000 | Self Similarity: 0.0020
Sparsity: 116.5 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 61440000 | Self Similarity: -0.0055
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0487 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 61440000 | Self Similarity: -0.0008
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0733 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 614

 55%|█████▍    | 30105/55054 [13:53<12:04, 34.45it/s]

Sparsity: 19.1 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 61644800 | Self Similarity: 0.0002
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 61644800 | Self Similarity: 0.0129
Sparsity: 46.0 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 61644800 | Self Similarity: 0.0020
Sparsity: 113.3 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 61644800 | Self Similarity: -0.0054
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0465 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 61644800 | Self Similarity: -0.0010
Sparsity: 139.5 | Dead Features: 0 | Total Loss: 0.0710 | Reconstruction Loss: 0.0337 | L1 Loss: 0.0373 | l1_alpha: 8.0000e-04 | Tokens: 6164

 55%|█████▍    | 30205/55054 [13:56<11:50, 34.99it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 61849600 | Self Similarity: 0.0003
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 61849600 | Self Similarity: 0.0127
Sparsity: 46.9 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 61849600 | Self Similarity: 0.0021
Sparsity: 113.8 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 61849600 | Self Similarity: -0.0054
Sparsity: 119.3 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 61849600 | Self Similarity: -0.0008
Sparsity: 142.4 | Dead Features: 0 | Total Loss: 0.0710 | Reconstruction Loss: 0.0332 | L1 Loss: 0.0378 | l1_alpha: 8.0000e-04 | Tokens: 6184

 55%|█████▌    | 30307/55054 [13:59<12:05, 34.10it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 62054400 | Self Similarity: 0.0003
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 62054400 | Self Similarity: 0.0127
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 62054400 | Self Similarity: 0.0022
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 62054400 | Self Similarity: -0.0055
Sparsity: 120.1 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 62054400 | Self Similarity: -0.0009
Sparsity: 146.0 | Dead Features: 0 | Total Loss: 0.0696 | Reconstruction Loss: 0.0325 | L1 Loss: 0.0371 | l1_alpha: 8.0000e-04 | Tokens: 6205

 55%|█████▌    | 30406/55054 [14:01<11:52, 34.61it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 62259200 | Self Similarity: 0.0003
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 62259200 | Self Similarity: 0.0128
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 62259200 | Self Similarity: 0.0026
Sparsity: 119.0 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 62259200 | Self Similarity: -0.0055
Sparsity: 121.5 | Dead Features: 0 | Total Loss: 0.0509 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 62259200 | Self Similarity: -0.0004
Sparsity: 152.1 | Dead Features: 0 | Total Loss: 0.0745 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 6225

 55%|█████▌    | 30508/55054 [14:04<10:04, 40.61it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 62464000 | Self Similarity: 0.0002
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 62464000 | Self Similarity: 0.0127
Sparsity: 47.9 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 62464000 | Self Similarity: 0.0022
Sparsity: 118.0 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 62464000 | Self Similarity: -0.0056
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0494 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 62464000 | Self Similarity: -0.0008
Sparsity: 154.2 | Dead Features: 0 | Total Loss: 0.0728 | Reconstruction Loss: 0.0344 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 6246

 56%|█████▌    | 30605/55054 [14:07<11:57, 34.07it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 62668800 | Self Similarity: 0.0003
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 62668800 | Self Similarity: 0.0128
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 62668800 | Self Similarity: 0.0021
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0186 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 62668800 | Self Similarity: -0.0054
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0243 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 62668800 | Self Similarity: -0.0008
Sparsity: 151.3 | Dead Features: 0 | Total Loss: 0.0722 | Reconstruction Loss: 0.0335 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 6266

 56%|█████▌    | 30707/55054 [14:10<11:40, 34.77it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 62873600 | Self Similarity: 0.0004
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 62873600 | Self Similarity: 0.0128
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0188 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 62873600 | Self Similarity: 0.0023
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 62873600 | Self Similarity: -0.0054
Sparsity: 119.6 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 62873600 | Self Similarity: -0.0006
Sparsity: 153.2 | Dead Features: 0 | Total Loss: 0.0707 | Reconstruction Loss: 0.0326 | L1 Loss: 0.0381 | l1_alpha: 8.0000e-04 | Tokens: 6287

 56%|█████▌    | 30805/55054 [14:12<11:26, 35.33it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 63078400 | Self Similarity: 0.0004
Sparsity: 34.4 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 63078400 | Self Similarity: 0.0128
Sparsity: 45.8 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 63078400 | Self Similarity: 0.0023
Sparsity: 114.2 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 63078400 | Self Similarity: -0.0055
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0477 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 63078400 | Self Similarity: -0.0007
Sparsity: 109.8 | Dead Features: 0 | Total Loss: 0.0874 | Reconstruction Loss: 0.0533 | L1 Loss: 0.0341 | l1_alpha: 8.0000e-04 | Tokens: 6307

 56%|█████▌    | 30905/55054 [14:15<11:41, 34.42it/s]

Sparsity: 19.3 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 63283200 | Self Similarity: -0.0001
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 63283200 | Self Similarity: 0.0129
Sparsity: 51.6 | Dead Features: 0 | Total Loss: 0.0223 | Reconstruction Loss: 0.0138 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 63283200 | Self Similarity: 0.0026
Sparsity: 121.9 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0213 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 63283200 | Self Similarity: -0.0057
Sparsity: 126.8 | Dead Features: 0 | Total Loss: 0.0537 | Reconstruction Loss: 0.0278 | L1 Loss: 0.0258 | l1_alpha: 8.0000e-04 | Tokens: 63283200 | Self Similarity: -0.0008
Sparsity: 140.1 | Dead Features: 0 | Total Loss: 0.0815 | Reconstruction Loss: 0.0394 | L1 Loss: 0.0421 | l1_alpha: 8.0000e-04 | Tokens: 632

 56%|█████▋    | 31005/55054 [14:18<11:31, 34.80it/s]

Sparsity: 19.3 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 63488000 | Self Similarity: 0.0001
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 63488000 | Self Similarity: 0.0128
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0206 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 63488000 | Self Similarity: 0.0025
Sparsity: 119.7 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 63488000 | Self Similarity: -0.0057
Sparsity: 122.7 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 63488000 | Self Similarity: -0.0009
Sparsity: 142.7 | Dead Features: 0 | Total Loss: 0.0752 | Reconstruction Loss: 0.0366 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 6348

 56%|█████▋    | 31105/55054 [14:21<11:31, 34.61it/s]

Sparsity: 21.8 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 63692800 | Self Similarity: 0.0002
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 63692800 | Self Similarity: 0.0131
Sparsity: 48.5 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 63692800 | Self Similarity: 0.0024
Sparsity: 114.4 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 63692800 | Self Similarity: -0.0056
Sparsity: 122.0 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 63692800 | Self Similarity: -0.0010
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0754 | Reconstruction Loss: 0.0360 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 6369

 57%|█████▋    | 31206/55054 [14:24<11:15, 35.28it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 63897600 | Self Similarity: 0.0002
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 63897600 | Self Similarity: 0.0129
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 63897600 | Self Similarity: 0.0025
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 63897600 | Self Similarity: -0.0056
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 63897600 | Self Similarity: -0.0008
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0345 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 6389

 57%|█████▋    | 31306/55054 [14:27<11:06, 35.66it/s]

Sparsity: 18.2 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 64102400 | Self Similarity: 0.0002
Sparsity: 33.7 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 64102400 | Self Similarity: 0.0129
Sparsity: 45.0 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 64102400 | Self Similarity: 0.0022
Sparsity: 112.2 | Dead Features: 0 | Total Loss: 0.0374 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 64102400 | Self Similarity: -0.0054
Sparsity: 116.0 | Dead Features: 0 | Total Loss: 0.0464 | Reconstruction Loss: 0.0229 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 64102400 | Self Similarity: -0.0007
Sparsity: 142.9 | Dead Features: 0 | Total Loss: 0.0697 | Reconstruction Loss: 0.0322 | L1 Loss: 0.0375 | l1_alpha: 8.0000e-04 | Tokens: 6410

 57%|█████▋    | 31407/55054 [14:30<11:03, 35.67it/s]

Sparsity: 22.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 64307200 | Self Similarity: 0.0002
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 64307200 | Self Similarity: 0.0129
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 64307200 | Self Similarity: 0.0023
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 64307200 | Self Similarity: -0.0055
Sparsity: 123.7 | Dead Features: 0 | Total Loss: 0.0506 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 64307200 | Self Similarity: -0.0011
Sparsity: 150.6 | Dead Features: 0 | Total Loss: 0.0773 | Reconstruction Loss: 0.0364 | L1 Loss: 0.0409 | l1_alpha: 8.0000e-04 | Tokens: 6430

 57%|█████▋    | 31507/55054 [14:32<11:04, 35.44it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 64512000 | Self Similarity: 0.0003
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 64512000 | Self Similarity: 0.0127
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0200 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 64512000 | Self Similarity: 0.0019
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 64512000 | Self Similarity: -0.0055
Sparsity: 118.7 | Dead Features: 0 | Total Loss: 0.0489 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 64512000 | Self Similarity: -0.0008
Sparsity: 146.5 | Dead Features: 0 | Total Loss: 0.0713 | Reconstruction Loss: 0.0328 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 6451

 57%|█████▋    | 31607/55054 [14:35<10:50, 36.06it/s]

Sparsity: 21.5 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 64716800 | Self Similarity: 0.0003
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 64716800 | Self Similarity: 0.0129
Sparsity: 46.4 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 64716800 | Self Similarity: 0.0019
Sparsity: 113.9 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 64716800 | Self Similarity: -0.0056
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 64716800 | Self Similarity: -0.0008
Sparsity: 150.0 | Dead Features: 0 | Total Loss: 0.0719 | Reconstruction Loss: 0.0334 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 6471

 58%|█████▊    | 31707/55054 [14:38<10:51, 35.85it/s]

Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 64921600 | Self Similarity: 0.0004
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 64921600 | Self Similarity: 0.0127
Sparsity: 46.8 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 64921600 | Self Similarity: 0.0021
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 64921600 | Self Similarity: -0.0054
Sparsity: 119.2 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 64921600 | Self Similarity: -0.0006
Sparsity: 149.2 | Dead Features: 0 | Total Loss: 0.0704 | Reconstruction Loss: 0.0323 | L1 Loss: 0.0380 | l1_alpha: 8.0000e-04 | Tokens: 6492

 58%|█████▊    | 31807/55054 [14:41<11:01, 35.14it/s]

Sparsity: 23.3 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 65126400 | Self Similarity: 0.0006
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 65126400 | Self Similarity: 0.0130
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 65126400 | Self Similarity: 0.0021
Sparsity: 119.8 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 65126400 | Self Similarity: -0.0053
Sparsity: 124.9 | Dead Features: 0 | Total Loss: 0.0513 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 65126400 | Self Similarity: -0.0004
Sparsity: 159.6 | Dead Features: 0 | Total Loss: 0.0772 | Reconstruction Loss: 0.0360 | L1 Loss: 0.0413 | l1_alpha: 8.0000e-04 | Tokens: 6512

 58%|█████▊    | 31905/55054 [14:43<10:20, 37.30it/s]

Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 65331200 | Self Similarity: 0.0005
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 65331200 | Self Similarity: 0.0127
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 65331200 | Self Similarity: 0.0017
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 65331200 | Self Similarity: -0.0051
Sparsity: 121.1 | Dead Features: 0 | Total Loss: 0.0493 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 65331200 | Self Similarity: -0.0006
Sparsity: 149.4 | Dead Features: 0 | Total Loss: 0.0751 | Reconstruction Loss: 0.0362 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 6533

 58%|█████▊    | 32005/55054 [14:46<10:35, 36.28it/s]

Sparsity: 22.2 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 65536000 | Self Similarity: 0.0005
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 65536000 | Self Similarity: 0.0131
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 65536000 | Self Similarity: 0.0017
Sparsity: 116.8 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 65536000 | Self Similarity: -0.0053
Sparsity: 121.8 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 65536000 | Self Similarity: -0.0005
Sparsity: 152.7 | Dead Features: 0 | Total Loss: 0.0752 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 6553

 58%|█████▊    | 32105/55054 [14:49<11:05, 34.48it/s]

Sparsity: 21.0 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 65740800 | Self Similarity: 0.0005
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 65740800 | Self Similarity: 0.0129
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 65740800 | Self Similarity: 0.0020
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 65740800 | Self Similarity: -0.0054
Sparsity: 122.6 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 65740800 | Self Similarity: -0.0008
Sparsity: 152.9 | Dead Features: 0 | Total Loss: 0.0747 | Reconstruction Loss: 0.0351 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 6574

 58%|█████▊    | 32205/55054 [14:52<10:45, 35.40it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 65945600 | Self Similarity: 0.0005
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 65945600 | Self Similarity: 0.0132
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.0195 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 65945600 | Self Similarity: 0.0022
Sparsity: 116.3 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 65945600 | Self Similarity: -0.0054
Sparsity: 121.9 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 65945600 | Self Similarity: -0.0007
Sparsity: 141.8 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0347 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 6594

 59%|█████▊    | 32305/55054 [14:54<10:38, 35.63it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 66150400 | Self Similarity: 0.0003
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 66150400 | Self Similarity: 0.0131
Sparsity: 48.8 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 66150400 | Self Similarity: 0.0022
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 66150400 | Self Similarity: -0.0052
Sparsity: 121.0 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 66150400 | Self Similarity: -0.0006
Sparsity: 146.4 | Dead Features: 0 | Total Loss: 0.0724 | Reconstruction Loss: 0.0341 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 6615

 59%|█████▉    | 32407/55054 [14:57<10:25, 36.23it/s]

Sparsity: 20.5 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 66355200 | Self Similarity: 0.0004
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 66355200 | Self Similarity: 0.0132
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 66355200 | Self Similarity: 0.0024
Sparsity: 115.6 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 66355200 | Self Similarity: -0.0053
Sparsity: 118.8 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 66355200 | Self Similarity: -0.0007
Sparsity: 148.7 | Dead Features: 0 | Total Loss: 0.0711 | Reconstruction Loss: 0.0325 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 6635

 59%|█████▉    | 32507/55054 [15:00<10:30, 35.74it/s]

Sparsity: 21.6 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 66560000 | Self Similarity: 0.0004
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 66560000 | Self Similarity: 0.0130
Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0212 | Reconstruction Loss: 0.0130 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 66560000 | Self Similarity: 0.0023
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 66560000 | Self Similarity: -0.0055
Sparsity: 124.3 | Dead Features: 0 | Total Loss: 0.0516 | Reconstruction Loss: 0.0261 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 66560000 | Self Similarity: -0.0007
Sparsity: 158.9 | Dead Features: 0 | Total Loss: 0.0790 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0421 | l1_alpha: 8.0000e-04 | Tokens: 6656

 59%|█████▉    | 32607/55054 [15:03<10:23, 35.98it/s]

Sparsity: 23.4 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 66764800 | Self Similarity: 0.0005
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 66764800 | Self Similarity: 0.0131
Sparsity: 48.3 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 66764800 | Self Similarity: 0.0024
Sparsity: 116.7 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 66764800 | Self Similarity: -0.0054
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0244 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 66764800 | Self Similarity: -0.0006
Sparsity: 151.5 | Dead Features: 0 | Total Loss: 0.0733 | Reconstruction Loss: 0.0335 | L1 Loss: 0.0398 | l1_alpha: 8.0000e-04 | Tokens: 6676

 59%|█████▉    | 32707/55054 [15:06<10:18, 36.12it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 66969600 | Self Similarity: 0.0003
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 66969600 | Self Similarity: 0.0130
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 66969600 | Self Similarity: 0.0027
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 66969600 | Self Similarity: -0.0052
Sparsity: 119.9 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 66969600 | Self Similarity: -0.0007
Sparsity: 149.3 | Dead Features: 0 | Total Loss: 0.0735 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 6696

 60%|█████▉    | 32807/55054 [15:08<10:25, 35.56it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 67174400 | Self Similarity: 0.0005
Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 67174400 | Self Similarity: 0.0132
Sparsity: 45.4 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 67174400 | Self Similarity: 0.0027
Sparsity: 115.3 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 67174400 | Self Similarity: -0.0053
Sparsity: 119.5 | Dead Features: 0 | Total Loss: 0.0488 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 67174400 | Self Similarity: -0.0006
Sparsity: 150.6 | Dead Features: 0 | Total Loss: 0.0728 | Reconstruction Loss: 0.0332 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 6717

 60%|█████▉    | 32907/55054 [15:11<10:18, 35.81it/s]

Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 67379200 | Self Similarity: 0.0004
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0141 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 67379200 | Self Similarity: 0.0129
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 67379200 | Self Similarity: 0.0022
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 67379200 | Self Similarity: -0.0055
Sparsity: 119.1 | Dead Features: 0 | Total Loss: 0.0476 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 67379200 | Self Similarity: -0.0010
Sparsity: 148.7 | Dead Features: 0 | Total Loss: 0.0698 | Reconstruction Loss: 0.0324 | L1 Loss: 0.0374 | l1_alpha: 8.0000e-04 | Tokens: 6737

 60%|█████▉    | 33004/55054 [15:14<10:46, 34.08it/s]

Sparsity: 18.9 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 67584000 | Self Similarity: 0.0004
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 67584000 | Self Similarity: 0.0131
Sparsity: 45.7 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 67584000 | Self Similarity: 0.0025
Sparsity: 114.1 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 67584000 | Self Similarity: -0.0055
Sparsity: 118.2 | Dead Features: 0 | Total Loss: 0.0469 | Reconstruction Loss: 0.0231 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 67584000 | Self Similarity: -0.0007
Sparsity: 146.7 | Dead Features: 0 | Total Loss: 0.0699 | Reconstruction Loss: 0.0326 | L1 Loss: 0.0372 | l1_alpha: 8.0000e-04 | Tokens: 6758

 60%|██████    | 33104/55054 [15:17<11:02, 33.15it/s]

Sparsity: 22.4 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 67788800 | Self Similarity: 0.0002
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 67788800 | Self Similarity: 0.0130
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 67788800 | Self Similarity: 0.0025
Sparsity: 115.9 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 67788800 | Self Similarity: -0.0054
Sparsity: 121.9 | Dead Features: 0 | Total Loss: 0.0496 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0248 | l1_alpha: 8.0000e-04 | Tokens: 67788800 | Self Similarity: -0.0004
Sparsity: 155.5 | Dead Features: 0 | Total Loss: 0.0742 | Reconstruction Loss: 0.0346 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 6778

 60%|██████    | 33165/55054 [15:19<10:25, 34.98it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [30]:
# Code that actually starts a full training run!

model_name = "EleutherAI/pythia-70m"
dataset_name = "Elriggs/openwebtext-100k" # "Elriggs/openwebtext-100k"
ratio = 8
layers = [0, 1, 2, 3, 4, 5]
wandb_log = False
seed = 0
split = "train"
epoches = 1

setup_execute_training(model_name,
                       dataset_name,
                       ratio,
                       layers,
                       seed,
                       wandb_log=wandb_log,
                       split=split,
                      epoches=epoches)

Activation size: 512


Found cached dataset parquet (/root/.cache/huggingface/datasets/Elriggs___parquet/Elriggs--openwebtext-100k-79076ecafee8a6d5/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Loading cached processed dataset at /root/.cache/huggingface/datasets/Elriggs___parquet/Elriggs--openwebtext-100k-79076ecafee8a6d5/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-d3de196f0681d12e_*_of_00008.arrow


Number of tokens: 112750592


  0%|          | 3/55054 [00:00<35:28, 25.87it/s]

Sparsity: 2047.0 | Dead Features: 4096 | Total Loss: 0.2620 | Reconstruction Loss: 0.1138 | L1 Loss: 0.1482 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 1.0000
Sparsity: 2022.9 | Dead Features: 4096 | Total Loss: 0.1765 | Reconstruction Loss: 0.0692 | L1 Loss: 0.1073 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0000
Sparsity: 2058.0 | Dead Features: 4096 | Total Loss: 0.6955 | Reconstruction Loss: 0.5341 | L1 Loss: 0.1614 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0009
Sparsity: 2013.3 | Dead Features: 4096 | Total Loss: 0.5138 | Reconstruction Loss: 0.2826 | L1 Loss: 0.2312 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0005
Sparsity: 2054.3 | Dead Features: 4096 | Total Loss: 1.0107 | Reconstruction Loss: 0.6723 | L1 Loss: 0.3384 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0005
Sparsity: 2037.6 | Dead Features: 4096 | Total Loss: 10.2929 | Reconstruction Loss: 8.9718 | L1 Loss: 1.3211 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Sim

  0%|          | 106/55054 [00:02<24:39, 37.14it/s]

Sparsity: 69.6 | Dead Features: 0 | Total Loss: 0.0263 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0000
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0247 | Reconstruction Loss: 0.0213 | L1 Loss: 0.0034 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0017
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0283 | L1 Loss: 0.0099 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0061
Sparsity: 144.4 | Dead Features: 0 | Total Loss: 0.0633 | Reconstruction Loss: 0.0414 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0136
Sparsity: 137.0 | Dead Features: 0 | Total Loss: 0.0932 | Reconstruction Loss: 0.0653 | L1 Loss: 0.0280 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0104
Sparsity: 171.9 | Dead Features: 0 | Total Loss: 0.2911 | Reconstruction Loss: 0.1955 | L1 Loss: 0.0956 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self 

  0%|          | 207/55054 [00:05<25:19, 36.11it/s]

Sparsity: 58.7 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0014
Sparsity: 42.6 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0154 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0031
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0294 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0090
Sparsity: 144.8 | Dead Features: 0 | Total Loss: 0.0539 | Reconstruction Loss: 0.0311 | L1 Loss: 0.0227 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0153
Sparsity: 132.1 | Dead Features: 0 | Total Loss: 0.0738 | Reconstruction Loss: 0.0472 | L1 Loss: 0.0267 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0111
Sparsity: 58.4 | Dead Features: 0 | Total Loss: 0.1968 | Reconstruction Loss: 0.1355 | L1 Loss: 0.0613 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self 

  1%|          | 307/55054 [00:08<27:25, 33.28it/s]

Sparsity: 53.2 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0015
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0138 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0046
Sparsity: 41.0 | Dead Features: 0 | Total Loss: 0.0274 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0114
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0283 | L1 Loss: 0.0227 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0168
Sparsity: 140.9 | Dead Features: 0 | Total Loss: 0.0711 | Reconstruction Loss: 0.0430 | L1 Loss: 0.0281 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0115
Sparsity: 1974.3 | Dead Features: 0 | Total Loss: 272.5262 | Reconstruction Loss: 266.3733 | L1 Loss: 6.1529 | l1_alpha: 8.0000e-04 | Tokens: 614400 |

  1%|          | 407/55054 [00:11<27:24, 33.23it/s]

Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0164 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0037
Sparsity: 50.8 | Dead Features: 0 | Total Loss: 0.0180 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0051
Sparsity: 44.6 | Dead Features: 0 | Total Loss: 0.0267 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0142
Sparsity: 143.0 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0262 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0174
Sparsity: 140.2 | Dead Features: 0 | Total Loss: 0.0673 | Reconstruction Loss: 0.0393 | L1 Loss: 0.0280 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0114
Sparsity: 251.9 | Dead Features: 0 | Total Loss: 0.5232 | Reconstruction Loss: 0.1862 | L1 Loss: 0.3370 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self

  1%|          | 503/55054 [00:14<28:17, 32.13it/s]

Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0035
Sparsity: 54.2 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0047
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0257 | Reconstruction Loss: 0.0178 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0177
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0226 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0179
Sparsity: 146.9 | Dead Features: 0 | Total Loss: 0.0667 | Reconstruction Loss: 0.0381 | L1 Loss: 0.0287 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0111
Sparsity: 118.9 | Dead Features: 0 | Total Loss: 0.3008 | Reconstruction Loss: 0.1541 | L1 Loss: 0.1467 | l1_alpha: 8.0000e-04 | Tokens: 1024000 

  1%|          | 604/55054 [00:17<26:18, 34.50it/s]

Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.0162 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0037
Sparsity: 59.5 | Dead Features: 0 | Total Loss: 0.0180 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0046
Sparsity: 51.0 | Dead Features: 0 | Total Loss: 0.0262 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0207
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0184
Sparsity: 147.1 | Dead Features: 0 | Total Loss: 0.0674 | Reconstruction Loss: 0.0382 | L1 Loss: 0.0292 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0107
Sparsity: 75.8 | Dead Features: 0 | Total Loss: 0.2337 | Reconstruction Loss: 0.1461 | L1 Loss: 0.0876 | l1_alpha: 8.0000e-04 | Tokens: 1228800 |

  1%|▏         | 704/55054 [00:20<28:13, 32.10it/s]

Sparsity: 44.7 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0038
Sparsity: 51.6 | Dead Features: 0 | Total Loss: 0.0160 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0049
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0243 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0239
Sparsity: 142.0 | Dead Features: 0 | Total Loss: 0.0455 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0188
Sparsity: 142.5 | Dead Features: 0 | Total Loss: 0.0621 | Reconstruction Loss: 0.0342 | L1 Loss: 0.0279 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0103
Sparsity: 60.8 | Dead Features: 0 | Total Loss: 0.2142 | Reconstruction Loss: 0.1428 | L1 Loss: 0.0714 | l1_alpha: 8.0000e-04 | Tokens: 1433600 |

  1%|▏         | 804/55054 [00:23<27:24, 32.98it/s]

Sparsity: 50.7 | Dead Features: 0 | Total Loss: 0.0157 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0038
Sparsity: 57.6 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0112 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0049
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0248 | Reconstruction Loss: 0.0166 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0267
Sparsity: 141.7 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0227 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0194
Sparsity: 149.1 | Dead Features: 0 | Total Loss: 0.0657 | Reconstruction Loss: 0.0362 | L1 Loss: 0.0295 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0098
Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.2010 | Reconstruction Loss: 0.1387 | L1 Loss: 0.0623 | l1_alpha: 8.0000e-04 | Tokens: 1638400 |

  2%|▏         | 904/55054 [00:26<28:41, 31.45it/s]

Sparsity: 39.7 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0062 | L1 Loss: 0.0070 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0039
Sparsity: 51.0 | Dead Features: 0 | Total Loss: 0.0155 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0048
Sparsity: 50.5 | Dead Features: 0 | Total Loss: 0.0236 | Reconstruction Loss: 0.0154 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0294
Sparsity: 139.8 | Dead Features: 0 | Total Loss: 0.0442 | Reconstruction Loss: 0.0222 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0202
Sparsity: 142.8 | Dead Features: 0 | Total Loss: 0.0580 | Reconstruction Loss: 0.0306 | L1 Loss: 0.0274 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0094
Sparsity: 43.2 | Dead Features: 0 | Total Loss: 0.2012 | Reconstruction Loss: 0.1361 | L1 Loss: 0.0652 | l1_alpha: 8.0000e-04 | Tokens: 1843200 |

  2%|▏         | 1004/55054 [00:29<27:42, 32.50it/s]

Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0040
Sparsity: 55.5 | Dead Features: 0 | Total Loss: 0.0158 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0047
Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0233 | Reconstruction Loss: 0.0152 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0322
Sparsity: 138.9 | Dead Features: 0 | Total Loss: 0.0444 | Reconstruction Loss: 0.0224 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0206
Sparsity: 148.8 | Dead Features: 0 | Total Loss: 0.0612 | Reconstruction Loss: 0.0330 | L1 Loss: 0.0282 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0090
Sparsity: 40.2 | Dead Features: 0 | Total Loss: 0.1918 | Reconstruction Loss: 0.1388 | L1 Loss: 0.0530 | l1_alpha: 8.0000e-04 | Tokens: 2048000 |

  2%|▏         | 1104/55054 [00:32<27:18, 32.92it/s]

Sparsity: 42.8 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0040
Sparsity: 58.4 | Dead Features: 0 | Total Loss: 0.0163 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0049
Sparsity: 54.3 | Dead Features: 0 | Total Loss: 0.0237 | Reconstruction Loss: 0.0152 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0352
Sparsity: 138.6 | Dead Features: 0 | Total Loss: 0.0444 | Reconstruction Loss: 0.0225 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0215
Sparsity: 149.4 | Dead Features: 0 | Total Loss: 0.0613 | Reconstruction Loss: 0.0325 | L1 Loss: 0.0287 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0087
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.1924 | Reconstruction Loss: 0.1395 | L1 Loss: 0.0529 | l1_alpha: 8.0000e-04 | Tokens: 2252800 |

  2%|▏         | 1204/55054 [00:35<24:38, 36.41it/s]

Sparsity: 41.2 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0071 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0041
Sparsity: 53.3 | Dead Features: 0 | Total Loss: 0.0150 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0049
Sparsity: 53.8 | Dead Features: 0 | Total Loss: 0.0230 | Reconstruction Loss: 0.0146 | L1 Loss: 0.0084 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0379
Sparsity: 139.1 | Dead Features: 0 | Total Loss: 0.0437 | Reconstruction Loss: 0.0220 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0224
Sparsity: 148.5 | Dead Features: 0 | Total Loss: 0.0583 | Reconstruction Loss: 0.0306 | L1 Loss: 0.0277 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0083
Sparsity: 41.9 | Dead Features: 0 | Total Loss: 0.1761 | Reconstruction Loss: 0.1247 | L1 Loss: 0.0515 | l1_alpha: 8.0000e-04 | Tokens: 2457600 |

  2%|▏         | 1304/55054 [00:38<27:36, 32.45it/s]

Sparsity: 40.1 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0060 | L1 Loss: 0.0070 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0040
Sparsity: 51.4 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0045
Sparsity: 51.6 | Dead Features: 0 | Total Loss: 0.0219 | Reconstruction Loss: 0.0138 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0410
Sparsity: 135.7 | Dead Features: 0 | Total Loss: 0.0418 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0232
Sparsity: 144.6 | Dead Features: 0 | Total Loss: 0.0554 | Reconstruction Loss: 0.0285 | L1 Loss: 0.0269 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0079
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.1728 | Reconstruction Loss: 0.1208 | L1 Loss: 0.0520 | l1_alpha: 8.0000e-04 | Tokens: 2662400 |

  3%|▎         | 1404/55054 [00:41<27:29, 32.52it/s]

Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0052 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0040
Sparsity: 54.5 | Dead Features: 0 | Total Loss: 0.0149 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0040
Sparsity: 56.7 | Dead Features: 0 | Total Loss: 0.0231 | Reconstruction Loss: 0.0146 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0433
Sparsity: 142.7 | Dead Features: 0 | Total Loss: 0.0450 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0229 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0241
Sparsity: 148.9 | Dead Features: 0 | Total Loss: 0.0593 | Reconstruction Loss: 0.0304 | L1 Loss: 0.0289 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0071
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.1904 | Reconstruction Loss: 0.1366 | L1 Loss: 0.0538 | l1_alpha: 8.0000e-04 | Tokens: 2867200 |

  3%|▎         | 1504/55054 [00:44<26:18, 33.93it/s]

Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0055 | L1 Loss: 0.0068 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0042
Sparsity: 52.0 | Dead Features: 0 | Total Loss: 0.0145 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0035
Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0136 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0454
Sparsity: 136.8 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0250
Sparsity: 145.3 | Dead Features: 0 | Total Loss: 0.0546 | Reconstruction Loss: 0.0277 | L1 Loss: 0.0269 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0065
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.1834 | Reconstruction Loss: 0.1304 | L1 Loss: 0.0529 | l1_alpha: 8.0000e-04 | Tokens: 3072000 |

  3%|▎         | 1607/55054 [00:47<23:26, 37.99it/s]

Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0056 | L1 Loss: 0.0069 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: -0.0041
Sparsity: 55.5 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: -0.0033
Sparsity: 56.3 | Dead Features: 0 | Total Loss: 0.0224 | Reconstruction Loss: 0.0140 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: 0.0476
Sparsity: 137.9 | Dead Features: 0 | Total Loss: 0.0425 | Reconstruction Loss: 0.0211 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: 0.0260
Sparsity: 149.9 | Dead Features: 0 | Total Loss: 0.0570 | Reconstruction Loss: 0.0291 | L1 Loss: 0.0280 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: -0.0055
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.1830 | Reconstruction Loss: 0.1315 | L1 Loss: 0.0515 | l1_alpha: 8.0000e-04 | Tokens: 3276800 |

  3%|▎         | 1707/55054 [00:50<23:52, 37.24it/s]

Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0053 | L1 Loss: 0.0067 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: -0.0039
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: -0.0034
Sparsity: 53.4 | Dead Features: 0 | Total Loss: 0.0211 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: 0.0495
Sparsity: 136.1 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: 0.0269
Sparsity: 144.4 | Dead Features: 0 | Total Loss: 0.0538 | Reconstruction Loss: 0.0274 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: -0.0044
Sparsity: 33.8 | Dead Features: 0 | Total Loss: 0.1718 | Reconstruction Loss: 0.1212 | L1 Loss: 0.0506 | l1_alpha: 8.0000e-04 | Tokens: 3481600 |

  3%|▎         | 1807/55054 [00:53<25:54, 34.25it/s]

Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0059 | L1 Loss: 0.0071 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: -0.0041
Sparsity: 55.4 | Dead Features: 0 | Total Loss: 0.0148 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: -0.0031
Sparsity: 55.3 | Dead Features: 0 | Total Loss: 0.0221 | Reconstruction Loss: 0.0137 | L1 Loss: 0.0084 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: 0.0509
Sparsity: 135.9 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: 0.0276
Sparsity: 143.6 | Dead Features: 0 | Total Loss: 0.0543 | Reconstruction Loss: 0.0276 | L1 Loss: 0.0268 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: -0.0033
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.1701 | Reconstruction Loss: 0.1187 | L1 Loss: 0.0513 | l1_alpha: 8.0000e-04 | Tokens: 3686400 |

  3%|▎         | 1907/55054 [00:56<26:08, 33.89it/s]

Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0053 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: -0.0040
Sparsity: 54.0 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0087 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: -0.0023
Sparsity: 55.0 | Dead Features: 0 | Total Loss: 0.0211 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: 0.0523
Sparsity: 137.8 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: 0.0289
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0542 | Reconstruction Loss: 0.0280 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: -0.0021
Sparsity: 33.8 | Dead Features: 0 | Total Loss: 0.1687 | Reconstruction Loss: 0.1168 | L1 Loss: 0.0519 | l1_alpha: 8.0000e-04 | Tokens: 3891200 |

  4%|▎         | 2007/55054 [00:59<26:40, 33.14it/s]

Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0052 | L1 Loss: 0.0066 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: -0.0039
Sparsity: 53.1 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: -0.0020
Sparsity: 54.6 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: 0.0535
Sparsity: 134.6 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: 0.0297
Sparsity: 144.2 | Dead Features: 0 | Total Loss: 0.0520 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0260 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: -0.0002
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.1663 | Reconstruction Loss: 0.1136 | L1 Loss: 0.0526 | l1_alpha: 8.0000e-04 | Tokens: 4096000 |

  4%|▍         | 2107/55054 [01:02<26:08, 33.77it/s]

Sparsity: 33.8 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0051 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: -0.0039
Sparsity: 53.1 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: -0.0016
Sparsity: 57.7 | Dead Features: 0 | Total Loss: 0.0209 | Reconstruction Loss: 0.0128 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0548
Sparsity: 139.7 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0304
Sparsity: 146.6 | Dead Features: 0 | Total Loss: 0.0528 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0268 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0011
Sparsity: 34.0 | Dead Features: 0 | Total Loss: 0.1623 | Reconstruction Loss: 0.1103 | L1 Loss: 0.0520 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | 

  4%|▍         | 2207/55054 [01:05<26:21, 33.41it/s]

Sparsity: 30.6 | Dead Features: 0 | Total Loss: 0.0109 | Reconstruction Loss: 0.0046 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: -0.0039
Sparsity: 49.9 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0084 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: -0.0008
Sparsity: 56.0 | Dead Features: 0 | Total Loss: 0.0209 | Reconstruction Loss: 0.0127 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0559
Sparsity: 136.6 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0309
Sparsity: 141.4 | Dead Features: 0 | Total Loss: 0.0528 | Reconstruction Loss: 0.0271 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0029
Sparsity: 86.0 | Dead Features: 0 | Total Loss: 0.2306 | Reconstruction Loss: 0.1662 | L1 Loss: 0.0644 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | 

  4%|▍         | 2307/55054 [01:08<25:20, 34.68it/s]

Sparsity: 31.2 | Dead Features: 0 | Total Loss: 0.0110 | Reconstruction Loss: 0.0047 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: -0.0042
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: -0.0002
Sparsity: 57.0 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0565
Sparsity: 137.0 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0314
Sparsity: 142.8 | Dead Features: 0 | Total Loss: 0.0522 | Reconstruction Loss: 0.0264 | L1 Loss: 0.0259 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0045
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.1831 | Reconstruction Loss: 0.1297 | L1 Loss: 0.0534 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | 

  4%|▍         | 2407/55054 [01:10<26:02, 33.70it/s]

Sparsity: 33.4 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0053 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: -0.0048
Sparsity: 55.4 | Dead Features: 0 | Total Loss: 0.0142 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0004
Sparsity: 58.2 | Dead Features: 0 | Total Loss: 0.0208 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0568
Sparsity: 138.3 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0320
Sparsity: 144.6 | Dead Features: 0 | Total Loss: 0.0527 | Reconstruction Loss: 0.0264 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0060
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.1689 | Reconstruction Loss: 0.1169 | L1 Loss: 0.0520 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | S

  5%|▍         | 2507/55054 [01:13<26:03, 33.60it/s]

Sparsity: 29.0 | Dead Features: 0 | Total Loss: 0.0106 | Reconstruction Loss: 0.0044 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: -0.0048
Sparsity: 50.5 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0011
Sparsity: 58.1 | Dead Features: 0 | Total Loss: 0.0210 | Reconstruction Loss: 0.0126 | L1 Loss: 0.0084 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0578
Sparsity: 140.1 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0326
Sparsity: 145.2 | Dead Features: 0 | Total Loss: 0.0532 | Reconstruction Loss: 0.0271 | L1 Loss: 0.0261 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0081
Sparsity: 40.5 | Dead Features: 0 | Total Loss: 0.1737 | Reconstruction Loss: 0.1228 | L1 Loss: 0.0509 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | S

  5%|▍         | 2607/55054 [01:16<26:03, 33.54it/s]

Sparsity: 30.7 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0049 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: -0.0049
Sparsity: 51.6 | Dead Features: 0 | Total Loss: 0.0137 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0013
Sparsity: 56.8 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0584
Sparsity: 140.0 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0331
Sparsity: 143.9 | Dead Features: 0 | Total Loss: 0.0514 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0097
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.1662 | Reconstruction Loss: 0.1147 | L1 Loss: 0.0515 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | S

  5%|▍         | 2707/55054 [01:19<26:00, 33.55it/s]

Sparsity: 29.9 | Dead Features: 0 | Total Loss: 0.0109 | Reconstruction Loss: 0.0047 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: -0.0047
Sparsity: 52.4 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0025
Sparsity: 59.7 | Dead Features: 0 | Total Loss: 0.0209 | Reconstruction Loss: 0.0125 | L1 Loss: 0.0084 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0591
Sparsity: 139.6 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0336
Sparsity: 146.6 | Dead Features: 0 | Total Loss: 0.0522 | Reconstruction Loss: 0.0259 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0116
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.1697 | Reconstruction Loss: 0.1184 | L1 Loss: 0.0513 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | S

  5%|▌         | 2807/55054 [01:22<26:07, 33.34it/s]

Sparsity: 30.6 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0050 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: -0.0049
Sparsity: 58.5 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0032
Sparsity: 62.9 | Dead Features: 0 | Total Loss: 0.0217 | Reconstruction Loss: 0.0131 | L1 Loss: 0.0086 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0593
Sparsity: 147.9 | Dead Features: 0 | Total Loss: 0.0438 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0335
Sparsity: 149.7 | Dead Features: 0 | Total Loss: 0.0557 | Reconstruction Loss: 0.0275 | L1 Loss: 0.0282 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0136
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.1839 | Reconstruction Loss: 0.1296 | L1 Loss: 0.0543 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | S

  5%|▌         | 2907/55054 [01:25<25:57, 33.48it/s]

Sparsity: 30.8 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0048 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: -0.0049
Sparsity: 51.5 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0041
Sparsity: 60.0 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0119 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0596
Sparsity: 138.2 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0337
Sparsity: 147.0 | Dead Features: 0 | Total Loss: 0.0513 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0154
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.1642 | Reconstruction Loss: 0.1139 | L1 Loss: 0.0502 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | S

  5%|▌         | 3003/55054 [01:28<26:12, 33.10it/s]

Sparsity: 26.9 | Dead Features: 0 | Total Loss: 0.0103 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: -0.0048
Sparsity: 49.4 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0046
Sparsity: 59.4 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0084 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0596
Sparsity: 139.9 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0334
Sparsity: 143.2 | Dead Features: 0 | Total Loss: 0.0508 | Reconstruction Loss: 0.0256 | L1 Loss: 0.0252 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0174
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.1687 | Reconstruction Loss: 0.1197 | L1 Loss: 0.0489 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | S

  6%|▌         | 3107/55054 [01:31<25:34, 33.85it/s]

Sparsity: 30.7 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0048 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: -0.0049
Sparsity: 54.0 | Dead Features: 0 | Total Loss: 0.0139 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0055
Sparsity: 59.9 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0084 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0597
Sparsity: 139.7 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0335
Sparsity: 147.1 | Dead Features: 0 | Total Loss: 0.0504 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0194
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.1640 | Reconstruction Loss: 0.1124 | L1 Loss: 0.0516 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | S

  6%|▌         | 3203/55054 [01:34<26:43, 32.33it/s]

Sparsity: 28.3 | Dead Features: 0 | Total Loss: 0.0107 | Reconstruction Loss: 0.0045 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: -0.0047
Sparsity: 52.0 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0063
Sparsity: 60.0 | Dead Features: 0 | Total Loss: 0.0205 | Reconstruction Loss: 0.0123 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0602
Sparsity: 128.8 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0341
Sparsity: 146.0 | Dead Features: 0 | Total Loss: 0.0518 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0255 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0209
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.1669 | Reconstruction Loss: 0.1159 | L1 Loss: 0.0510 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | S

  6%|▌         | 3303/55054 [01:37<26:41, 32.32it/s]

Sparsity: 31.1 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0050 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: -0.0046
Sparsity: 55.2 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0069
Sparsity: 62.0 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0120 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0603
Sparsity: 137.6 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0335
Sparsity: 150.5 | Dead Features: 0 | Total Loss: 0.0527 | Reconstruction Loss: 0.0260 | L1 Loss: 0.0267 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0229
Sparsity: 39.6 | Dead Features: 0 | Total Loss: 0.1679 | Reconstruction Loss: 0.1162 | L1 Loss: 0.0517 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | S

  6%|▌         | 3403/55054 [01:40<26:18, 32.72it/s]

Sparsity: 29.4 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0049 | L1 Loss: 0.0062 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: -0.0049
Sparsity: 53.3 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: 0.0074
Sparsity: 61.3 | Dead Features: 0 | Total Loss: 0.0203 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: 0.0604
Sparsity: 138.2 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: 0.0339
Sparsity: 148.8 | Dead Features: 0 | Total Loss: 0.0509 | Reconstruction Loss: 0.0255 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: 0.0250
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.1624 | Reconstruction Loss: 0.1125 | L1 Loss: 0.0498 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | S

  6%|▋         | 3507/55054 [01:43<25:37, 33.54it/s]

Sparsity: 28.1 | Dead Features: 0 | Total Loss: 0.0107 | Reconstruction Loss: 0.0046 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: -0.0047
Sparsity: 51.6 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0086
Sparsity: 60.8 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0116 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0605
Sparsity: 138.5 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0335
Sparsity: 145.6 | Dead Features: 0 | Total Loss: 0.0505 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0256 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0275
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.1577 | Reconstruction Loss: 0.1048 | L1 Loss: 0.0529 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | S

  7%|▋         | 3607/55054 [01:46<25:45, 33.30it/s]

Sparsity: 24.4 | Dead Features: 0 | Total Loss: 0.0099 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: -0.0046
Sparsity: 48.2 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0094
Sparsity: 58.5 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0112 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0603
Sparsity: 140.1 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0331
Sparsity: 144.3 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0291
Sparsity: 39.4 | Dead Features: 0 | Total Loss: 0.1561 | Reconstruction Loss: 0.1033 | L1 Loss: 0.0528 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | S

  7%|▋         | 3707/55054 [01:49<25:32, 33.50it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0094 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: -0.0047
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: 0.0098
Sparsity: 57.8 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: 0.0601
Sparsity: 139.9 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: 0.0327
Sparsity: 146.0 | Dead Features: 0 | Total Loss: 0.0484 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: 0.0307
Sparsity: 54.0 | Dead Features: 0 | Total Loss: 0.1727 | Reconstruction Loss: 0.1196 | L1 Loss: 0.0531 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | S

  7%|▋         | 3807/55054 [01:52<25:26, 33.57it/s]

Sparsity: 24.0 | Dead Features: 0 | Total Loss: 0.0099 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: -0.0048
Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0111
Sparsity: 62.6 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0599
Sparsity: 146.8 | Dead Features: 0 | Total Loss: 0.0423 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0324
Sparsity: 145.7 | Dead Features: 0 | Total Loss: 0.0503 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0262 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0322
Sparsity: 38.5 | Dead Features: 0 | Total Loss: 0.1735 | Reconstruction Loss: 0.1178 | L1 Loss: 0.0557 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | S

  7%|▋         | 3907/55054 [01:55<25:21, 33.62it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0098 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: -0.0048
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0116
Sparsity: 62.3 | Dead Features: 0 | Total Loss: 0.0201 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0086 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0595
Sparsity: 143.5 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0316
Sparsity: 140.8 | Dead Features: 0 | Total Loss: 0.0507 | Reconstruction Loss: 0.0247 | L1 Loss: 0.0260 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0342
Sparsity: 43.1 | Dead Features: 0 | Total Loss: 0.1582 | Reconstruction Loss: 0.1071 | L1 Loss: 0.0511 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | S

  7%|▋         | 4007/55054 [01:58<25:24, 33.49it/s]

Sparsity: 27.4 | Dead Features: 0 | Total Loss: 0.0108 | Reconstruction Loss: 0.0047 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: -0.0049
Sparsity: 54.0 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0123
Sparsity: 65.3 | Dead Features: 0 | Total Loss: 0.0204 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0592
Sparsity: 150.1 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0312
Sparsity: 152.6 | Dead Features: 0 | Total Loss: 0.0539 | Reconstruction Loss: 0.0276 | L1 Loss: 0.0263 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0359
Sparsity: 43.0 | Dead Features: 0 | Total Loss: 0.1711 | Reconstruction Loss: 0.1184 | L1 Loss: 0.0526 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | S

  7%|▋         | 4107/55054 [02:01<25:18, 33.56it/s]

Sparsity: 26.6 | Dead Features: 0 | Total Loss: 0.0107 | Reconstruction Loss: 0.0046 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: -0.0048
Sparsity: 53.3 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: 0.0132
Sparsity: 63.4 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0118 | L1 Loss: 0.0084 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: 0.0586
Sparsity: 148.6 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: 0.0304
Sparsity: 150.4 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0249 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: 0.0373
Sparsity: 41.5 | Dead Features: 0 | Total Loss: 0.1575 | Reconstruction Loss: 0.1061 | L1 Loss: 0.0514 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | S

  8%|▊         | 4207/55054 [02:04<25:19, 33.45it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0094 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: -0.0049
Sparsity: 49.1 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0134
Sparsity: 59.9 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0581
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0385 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0298
Sparsity: 150.2 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0388
Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.1574 | Reconstruction Loss: 0.1055 | L1 Loss: 0.0519 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | S

  8%|▊         | 4307/55054 [02:07<25:07, 33.67it/s]

Sparsity: 29.5 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0054 | L1 Loss: 0.0064 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: -0.0046
Sparsity: 56.6 | Dead Features: 0 | Total Loss: 0.0147 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0143
Sparsity: 66.5 | Dead Features: 0 | Total Loss: 0.0211 | Reconstruction Loss: 0.0124 | L1 Loss: 0.0087 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0579
Sparsity: 149.0 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0289
Sparsity: 159.3 | Dead Features: 0 | Total Loss: 0.0540 | Reconstruction Loss: 0.0269 | L1 Loss: 0.0271 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0400
Sparsity: 42.2 | Dead Features: 0 | Total Loss: 0.1740 | Reconstruction Loss: 0.1238 | L1 Loss: 0.0502 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | S

  8%|▊         | 4407/55054 [02:10<25:11, 33.52it/s]

Sparsity: 26.0 | Dead Features: 0 | Total Loss: 0.0107 | Reconstruction Loss: 0.0046 | L1 Loss: 0.0061 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: -0.0045
Sparsity: 54.8 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0151
Sparsity: 60.9 | Dead Features: 0 | Total Loss: 0.0202 | Reconstruction Loss: 0.0121 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0574
Sparsity: 148.2 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0278
Sparsity: 152.6 | Dead Features: 0 | Total Loss: 0.0538 | Reconstruction Loss: 0.0271 | L1 Loss: 0.0267 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0414
Sparsity: 40.5 | Dead Features: 0 | Total Loss: 0.1687 | Reconstruction Loss: 0.1146 | L1 Loss: 0.0542 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | S

  8%|▊         | 4503/55054 [02:13<25:37, 32.88it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0109 | Reconstruction Loss: 0.0050 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: -0.0045
Sparsity: 51.9 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0155
Sparsity: 61.6 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0568
Sparsity: 146.7 | Dead Features: 0 | Total Loss: 0.0396 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0269
Sparsity: 152.9 | Dead Features: 0 | Total Loss: 0.0507 | Reconstruction Loss: 0.0251 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0422
Sparsity: 40.9 | Dead Features: 0 | Total Loss: 0.1656 | Reconstruction Loss: 0.1130 | L1 Loss: 0.0526 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | S

  8%|▊         | 4607/55054 [02:16<25:27, 33.02it/s]

Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: -0.0043
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0159
Sparsity: 59.2 | Dead Features: 0 | Total Loss: 0.0192 | Reconstruction Loss: 0.0112 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0566
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0259
Sparsity: 151.2 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0431
Sparsity: 41.4 | Dead Features: 0 | Total Loss: 0.1595 | Reconstruction Loss: 0.1079 | L1 Loss: 0.0516 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | S

  9%|▊         | 4703/55054 [02:19<25:57, 32.34it/s]

Sparsity: 22.1 | Dead Features: 0 | Total Loss: 0.0099 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: -0.0044
Sparsity: 49.0 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0167
Sparsity: 61.2 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0112 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0563
Sparsity: 151.8 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0178 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0245
Sparsity: 152.0 | Dead Features: 0 | Total Loss: 0.0479 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0439
Sparsity: 42.3 | Dead Features: 0 | Total Loss: 0.1584 | Reconstruction Loss: 0.1039 | L1 Loss: 0.0544 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | S

  9%|▊         | 4807/55054 [02:22<25:08, 33.32it/s]

Sparsity: 23.5 | Dead Features: 0 | Total Loss: 0.0100 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: -0.0044
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0170
Sparsity: 61.4 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0558
Sparsity: 151.0 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0231
Sparsity: 153.0 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0242 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0445
Sparsity: 43.8 | Dead Features: 0 | Total Loss: 0.1588 | Reconstruction Loss: 0.1048 | L1 Loss: 0.0541 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | S

  9%|▉         | 4907/55054 [02:25<24:55, 33.53it/s]

Sparsity: 18.8 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: -0.0044
Sparsity: 45.2 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0175
Sparsity: 58.1 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0551
Sparsity: 145.4 | Dead Features: 0 | Total Loss: 0.0370 | Reconstruction Loss: 0.0173 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0219
Sparsity: 147.9 | Dead Features: 0 | Total Loss: 0.0466 | Reconstruction Loss: 0.0227 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0449
Sparsity: 43.9 | Dead Features: 0 | Total Loss: 0.1576 | Reconstruction Loss: 0.1031 | L1 Loss: 0.0545 | l1_alpha: 8.0000e-04 | Tokens: 100352

  9%|▉         | 5007/55054 [02:28<25:02, 33.31it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: -0.0044
Sparsity: 48.1 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0175
Sparsity: 59.9 | Dead Features: 0 | Total Loss: 0.0194 | Reconstruction Loss: 0.0111 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0546
Sparsity: 144.9 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0207
Sparsity: 152.6 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0238 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0460
Sparsity: 305.4 | Dead Features: 0 | Total Loss: 1.2161 | Reconstruction Loss: 0.8792 | L1 Loss: 0.3369 | l1_alpha: 8.0000e-04 | Tokens: 10240

  9%|▉         | 5107/55054 [02:31<24:44, 33.65it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0096 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: -0.0046
Sparsity: 46.1 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0183
Sparsity: 59.2 | Dead Features: 0 | Total Loss: 0.0188 | Reconstruction Loss: 0.0110 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0545
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0191
Sparsity: 152.1 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0245 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0462
Sparsity: 57.8 | Dead Features: 0 | Total Loss: 0.2201 | Reconstruction Loss: 0.1618 | L1 Loss: 0.0583 | l1_alpha: 8.0000e-04 | Tokens: 104448

  9%|▉         | 5207/55054 [02:34<24:43, 33.59it/s]

Sparsity: 17.9 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: -0.0048
Sparsity: 42.1 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: 0.0190
Sparsity: 57.5 | Dead Features: 0 | Total Loss: 0.0183 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: 0.0544
Sparsity: 145.4 | Dead Features: 0 | Total Loss: 0.0368 | Reconstruction Loss: 0.0171 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: 0.0178
Sparsity: 149.0 | Dead Features: 0 | Total Loss: 0.0455 | Reconstruction Loss: 0.0219 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: 0.0468
Sparsity: 30.4 | Dead Features: 0 | Total Loss: 0.1594 | Reconstruction Loss: 0.1081 | L1 Loss: 0.0513 | l1_alpha: 8.0000e-04 | Tokens: 106496

 10%|▉         | 5307/55054 [02:37<24:47, 33.44it/s]

Sparsity: 20.6 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: -0.0050
Sparsity: 47.3 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: 0.0197
Sparsity: 61.0 | Dead Features: 0 | Total Loss: 0.0187 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: 0.0539
Sparsity: 145.7 | Dead Features: 0 | Total Loss: 0.0374 | Reconstruction Loss: 0.0177 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: 0.0160
Sparsity: 148.7 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: 0.0474
Sparsity: 32.8 | Dead Features: 0 | Total Loss: 0.1600 | Reconstruction Loss: 0.1093 | L1 Loss: 0.0506 | l1_alpha: 8.0000e-04 | Tokens: 108544

 10%|▉         | 5407/55054 [02:40<24:39, 33.56it/s]

Sparsity: 22.5 | Dead Features: 0 | Total Loss: 0.0103 | Reconstruction Loss: 0.0044 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: -0.0048
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: 0.0197
Sparsity: 62.1 | Dead Features: 0 | Total Loss: 0.0197 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: 0.0535
Sparsity: 147.6 | Dead Features: 0 | Total Loss: 0.0392 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: 0.0138
Sparsity: 149.7 | Dead Features: 0 | Total Loss: 0.0509 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: 0.0474
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.1700 | Reconstruction Loss: 0.1167 | L1 Loss: 0.0532 | l1_alpha: 8.0000e-04 | Tokens: 110592

 10%|█         | 5507/55054 [02:43<24:35, 33.59it/s]

Sparsity: 19.0 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: -0.0048
Sparsity: 45.5 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: 0.0202
Sparsity: 57.1 | Dead Features: 0 | Total Loss: 0.0184 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: 0.0529
Sparsity: 143.5 | Dead Features: 0 | Total Loss: 0.0364 | Reconstruction Loss: 0.0172 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: 0.0126
Sparsity: 147.6 | Dead Features: 0 | Total Loss: 0.0451 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: 0.0476
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.1522 | Reconstruction Loss: 0.0994 | L1 Loss: 0.0528 | l1_alpha: 8.0000e-04 | Tokens: 112640

 10%|█         | 5607/55054 [02:46<24:42, 33.34it/s]

Sparsity: 20.7 | Dead Features: 0 | Total Loss: 0.0101 | Reconstruction Loss: 0.0043 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: -0.0046
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: 0.0205
Sparsity: 62.2 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0114 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: 0.0526
Sparsity: 150.9 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: 0.0109
Sparsity: 152.5 | Dead Features: 0 | Total Loss: 0.0491 | Reconstruction Loss: 0.0240 | L1 Loss: 0.0250 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: 0.0480
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.1648 | Reconstruction Loss: 0.1125 | L1 Loss: 0.0524 | l1_alpha: 8.0000e-04 | Tokens: 114688

 10%|█         | 5707/55054 [02:49<24:45, 33.22it/s]

Sparsity: 19.2 | Dead Features: 0 | Total Loss: 0.0094 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: -0.0045
Sparsity: 46.2 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: 0.0214
Sparsity: 59.9 | Dead Features: 0 | Total Loss: 0.0186 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: 0.0518
Sparsity: 147.9 | Dead Features: 0 | Total Loss: 0.0374 | Reconstruction Loss: 0.0173 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: 0.0091
Sparsity: 149.7 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0227 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: 0.0480
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.1599 | Reconstruction Loss: 0.1067 | L1 Loss: 0.0532 | l1_alpha: 8.0000e-04 | Tokens: 116736

 11%|█         | 5807/55054 [02:52<24:43, 33.19it/s]

Sparsity: 23.6 | Dead Features: 0 | Total Loss: 0.0107 | Reconstruction Loss: 0.0048 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: -0.0047
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: 0.0217
Sparsity: 60.1 | Dead Features: 0 | Total Loss: 0.0187 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: 0.0514
Sparsity: 147.2 | Dead Features: 0 | Total Loss: 0.0382 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: 0.0069
Sparsity: 154.4 | Dead Features: 0 | Total Loss: 0.0483 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: 0.0481
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.1628 | Reconstruction Loss: 0.1085 | L1 Loss: 0.0542 | l1_alpha: 8.0000e-04 | Tokens: 118784

 11%|█         | 5907/55054 [02:55<24:36, 33.29it/s]

Sparsity: 18.6 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: -0.0047
Sparsity: 44.7 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: 0.0223
Sparsity: 61.5 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0108 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: 0.0512
Sparsity: 148.9 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: 0.0051
Sparsity: 152.4 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0224 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: 0.0481
Sparsity: 39.4 | Dead Features: 0 | Total Loss: 0.1591 | Reconstruction Loss: 0.1071 | L1 Loss: 0.0521 | l1_alpha: 8.0000e-04 | Tokens: 120832

 11%|█         | 6007/55054 [02:58<24:42, 33.09it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0098 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: -0.0046
Sparsity: 46.4 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: 0.0231
Sparsity: 60.1 | Dead Features: 0 | Total Loss: 0.0188 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: 0.0506
Sparsity: 149.0 | Dead Features: 0 | Total Loss: 0.0368 | Reconstruction Loss: 0.0170 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: 0.0030
Sparsity: 151.7 | Dead Features: 0 | Total Loss: 0.0459 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: 0.0478
Sparsity: 38.8 | Dead Features: 0 | Total Loss: 0.1502 | Reconstruction Loss: 0.0968 | L1 Loss: 0.0534 | l1_alpha: 8.0000e-04 | Tokens: 122880

 11%|█         | 6107/55054 [03:01<24:19, 33.54it/s]

Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0096 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: -0.0047
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: 0.0235
Sparsity: 61.3 | Dead Features: 0 | Total Loss: 0.0191 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: 0.0504
Sparsity: 149.7 | Dead Features: 0 | Total Loss: 0.0374 | Reconstruction Loss: 0.0173 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: 0.0009
Sparsity: 153.4 | Dead Features: 0 | Total Loss: 0.0467 | Reconstruction Loss: 0.0227 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: 0.0476
Sparsity: 39.9 | Dead Features: 0 | Total Loss: 0.1520 | Reconstruction Loss: 0.0997 | L1 Loss: 0.0523 | l1_alpha: 8.0000e-04 | Tokens: 124928

 11%|█▏        | 6207/55054 [03:04<24:17, 33.53it/s]

Sparsity: 17.9 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: -0.0048
Sparsity: 43.7 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: 0.0238
Sparsity: 59.3 | Dead Features: 0 | Total Loss: 0.0186 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: 0.0502
Sparsity: 146.3 | Dead Features: 0 | Total Loss: 0.0367 | Reconstruction Loss: 0.0171 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: -0.0012
Sparsity: 151.5 | Dead Features: 0 | Total Loss: 0.0463 | Reconstruction Loss: 0.0222 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: 0.0471
Sparsity: 39.8 | Dead Features: 0 | Total Loss: 0.1598 | Reconstruction Loss: 0.1073 | L1 Loss: 0.0525 | l1_alpha: 8.0000e-04 | Tokens: 12697

 11%|█▏        | 6307/55054 [03:07<24:18, 33.41it/s]

Sparsity: 20.8 | Dead Features: 0 | Total Loss: 0.0101 | Reconstruction Loss: 0.0043 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: -0.0047
Sparsity: 47.1 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: 0.0242
Sparsity: 61.0 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0111 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: 0.0495
Sparsity: 150.8 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: -0.0034
Sparsity: 156.2 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0257 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: 0.0471
Sparsity: 40.7 | Dead Features: 0 | Total Loss: 0.1633 | Reconstruction Loss: 0.1102 | L1 Loss: 0.0531 | l1_alpha: 8.0000e-04 | Tokens: 12902

 12%|█▏        | 6407/55054 [03:10<24:11, 33.53it/s]

Sparsity: 18.4 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: -0.0047
Sparsity: 42.6 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: 0.0248
Sparsity: 58.4 | Dead Features: 0 | Total Loss: 0.0182 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: 0.0491
Sparsity: 147.7 | Dead Features: 0 | Total Loss: 0.0366 | Reconstruction Loss: 0.0170 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: -0.0056
Sparsity: 150.1 | Dead Features: 0 | Total Loss: 0.0445 | Reconstruction Loss: 0.0215 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: 0.0465
Sparsity: 39.9 | Dead Features: 0 | Total Loss: 0.1490 | Reconstruction Loss: 0.0977 | L1 Loss: 0.0513 | l1_alpha: 8.0000e-04 | Tokens: 13107

 12%|█▏        | 6507/55054 [03:13<23:48, 34.00it/s]

Sparsity: 26.9 | Dead Features: 0 | Total Loss: 0.0146 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: -0.0045
Sparsity: 49.3 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: 0.0254
Sparsity: 64.7 | Dead Features: 0 | Total Loss: 0.0199 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: 0.0486
Sparsity: 152.9 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0183 | L1 Loss: 0.0205 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: -0.0078
Sparsity: 157.5 | Dead Features: 0 | Total Loss: 0.0482 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: 0.0462
Sparsity: 44.0 | Dead Features: 0 | Total Loss: 0.1605 | Reconstruction Loss: 0.1106 | L1 Loss: 0.0499 | l1_alpha: 8.0000e-04 | Tokens: 13312

 12%|█▏        | 6607/55054 [03:16<24:13, 33.32it/s]

Sparsity: 18.7 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: -0.0047
Sparsity: 43.7 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: 0.0254
Sparsity: 59.2 | Dead Features: 0 | Total Loss: 0.0189 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: 0.0481
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0378 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: -0.0101
Sparsity: 153.3 | Dead Features: 0 | Total Loss: 0.0486 | Reconstruction Loss: 0.0239 | L1 Loss: 0.0247 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: 0.0456
Sparsity: 42.6 | Dead Features: 0 | Total Loss: 0.1614 | Reconstruction Loss: 0.1072 | L1 Loss: 0.0542 | l1_alpha: 8.0000e-04 | Tokens: 13516

 12%|█▏        | 6707/55054 [03:18<23:56, 33.66it/s]

Sparsity: 14.6 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: -0.0049
Sparsity: 40.3 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: 0.0259
Sparsity: 58.7 | Dead Features: 0 | Total Loss: 0.0182 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: 0.0483
Sparsity: 147.5 | Dead Features: 0 | Total Loss: 0.0372 | Reconstruction Loss: 0.0174 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: -0.0123
Sparsity: 152.9 | Dead Features: 0 | Total Loss: 0.0460 | Reconstruction Loss: 0.0217 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: 0.0452
Sparsity: 40.6 | Dead Features: 0 | Total Loss: 0.1566 | Reconstruction Loss: 0.1035 | L1 Loss: 0.0531 | l1_alpha: 8.0000e-04 | Tokens: 13721

 12%|█▏        | 6807/55054 [03:21<23:57, 33.56it/s]

Sparsity: 16.0 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: -0.0048
Sparsity: 44.6 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: 0.0263
Sparsity: 59.2 | Dead Features: 0 | Total Loss: 0.0185 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: 0.0478
Sparsity: 147.2 | Dead Features: 0 | Total Loss: 0.0364 | Reconstruction Loss: 0.0169 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: -0.0144
Sparsity: 150.3 | Dead Features: 0 | Total Loss: 0.0453 | Reconstruction Loss: 0.0220 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: 0.0442
Sparsity: 41.5 | Dead Features: 0 | Total Loss: 0.1510 | Reconstruction Loss: 0.0983 | L1 Loss: 0.0527 | l1_alpha: 8.0000e-04 | Tokens: 13926

 13%|█▎        | 6907/55054 [03:24<23:56, 33.52it/s]

Sparsity: 21.2 | Dead Features: 0 | Total Loss: 0.0104 | Reconstruction Loss: 0.0046 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: -0.0049
Sparsity: 47.7 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: 0.0265
Sparsity: 61.5 | Dead Features: 0 | Total Loss: 0.0188 | Reconstruction Loss: 0.0108 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: 0.0476
Sparsity: 147.0 | Dead Features: 0 | Total Loss: 0.0376 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: -0.0162
Sparsity: 153.5 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0231 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: 0.0438
Sparsity: 44.9 | Dead Features: 0 | Total Loss: 0.1554 | Reconstruction Loss: 0.1031 | L1 Loss: 0.0524 | l1_alpha: 8.0000e-04 | Tokens: 14131

 13%|█▎        | 7007/55054 [03:27<23:56, 33.45it/s]

Sparsity: 18.6 | Dead Features: 0 | Total Loss: 0.0098 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: -0.0048
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: 0.0269
Sparsity: 65.7 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0111 | L1 Loss: 0.0086 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: 0.0471
Sparsity: 153.8 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: -0.0184
Sparsity: 159.1 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0222 | L1 Loss: 0.0251 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: 0.0428
Sparsity: 43.5 | Dead Features: 0 | Total Loss: 0.1522 | Reconstruction Loss: 0.0986 | L1 Loss: 0.0536 | l1_alpha: 8.0000e-04 | Tokens: 14336

 13%|█▎        | 7107/55054 [03:30<23:52, 33.48it/s]

Sparsity: 16.0 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: -0.0048
Sparsity: 44.0 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: 0.0274
Sparsity: 57.5 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: 0.0469
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0363 | Reconstruction Loss: 0.0167 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: -0.0211
Sparsity: 146.9 | Dead Features: 0 | Total Loss: 0.0450 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: 0.0417
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.1561 | Reconstruction Loss: 0.1039 | L1 Loss: 0.0522 | l1_alpha: 8.0000e-04 | Tokens: 14540

 13%|█▎        | 7207/55054 [03:33<24:00, 33.22it/s]

Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0100 | Reconstruction Loss: 0.0043 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: -0.0048
Sparsity: 48.6 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: 0.0276
Sparsity: 61.6 | Dead Features: 0 | Total Loss: 0.0187 | Reconstruction Loss: 0.0108 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: 0.0467
Sparsity: 151.4 | Dead Features: 0 | Total Loss: 0.0371 | Reconstruction Loss: 0.0172 | L1 Loss: 0.0199 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: -0.0237
Sparsity: 157.0 | Dead Features: 0 | Total Loss: 0.0472 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: 0.0408
Sparsity: 44.7 | Dead Features: 0 | Total Loss: 0.1558 | Reconstruction Loss: 0.1030 | L1 Loss: 0.0527 | l1_alpha: 8.0000e-04 | Tokens: 14745

 13%|█▎        | 7307/55054 [03:36<23:52, 33.34it/s]

Sparsity: 15.6 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: -0.0048
Sparsity: 41.6 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: 0.0282
Sparsity: 58.4 | Dead Features: 0 | Total Loss: 0.0181 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: 0.0462
Sparsity: 144.6 | Dead Features: 0 | Total Loss: 0.0359 | Reconstruction Loss: 0.0169 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: -0.0260
Sparsity: 151.9 | Dead Features: 0 | Total Loss: 0.0453 | Reconstruction Loss: 0.0217 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: 0.0401
Sparsity: 44.7 | Dead Features: 0 | Total Loss: 0.1549 | Reconstruction Loss: 0.0989 | L1 Loss: 0.0560 | l1_alpha: 8.0000e-04 | Tokens: 14950

 13%|█▎        | 7407/55054 [03:39<24:01, 33.05it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0080 | Reconstruction Loss: 0.0031 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: -0.0049
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: 0.0282
Sparsity: 55.1 | Dead Features: 0 | Total Loss: 0.0181 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: 0.0460
Sparsity: 146.4 | Dead Features: 0 | Total Loss: 0.0362 | Reconstruction Loss: 0.0170 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: -0.0288
Sparsity: 148.2 | Dead Features: 0 | Total Loss: 0.0435 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0227 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: 0.0392
Sparsity: 45.1 | Dead Features: 0 | Total Loss: 0.1468 | Reconstruction Loss: 0.0900 | L1 Loss: 0.0568 | l1_alpha: 8.0000e-04 | Tokens: 15155

 14%|█▎        | 7507/55054 [03:42<23:38, 33.51it/s]

Sparsity: 17.0 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: -0.0049
Sparsity: 44.2 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: 0.0288
Sparsity: 59.0 | Dead Features: 0 | Total Loss: 0.0187 | Reconstruction Loss: 0.0107 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: 0.0454
Sparsity: 148.1 | Dead Features: 0 | Total Loss: 0.0367 | Reconstruction Loss: 0.0172 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: -0.0308
Sparsity: 151.8 | Dead Features: 0 | Total Loss: 0.0462 | Reconstruction Loss: 0.0227 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: 0.0381
Sparsity: 47.8 | Dead Features: 0 | Total Loss: 0.1552 | Reconstruction Loss: 0.1015 | L1 Loss: 0.0537 | l1_alpha: 8.0000e-04 | Tokens: 15360

 14%|█▍        | 7607/55054 [03:45<24:00, 32.93it/s]

Sparsity: 17.8 | Dead Features: 0 | Total Loss: 0.0096 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: -0.0047
Sparsity: 44.2 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: 0.0285
Sparsity: 59.6 | Dead Features: 0 | Total Loss: 0.0187 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: 0.0449
Sparsity: 152.5 | Dead Features: 0 | Total Loss: 0.0381 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: -0.0330
Sparsity: 153.0 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0234 | L1 Loss: 0.0240 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: 0.0371
Sparsity: 54.9 | Dead Features: 0 | Total Loss: 0.1756 | Reconstruction Loss: 0.1148 | L1 Loss: 0.0608 | l1_alpha: 8.0000e-04 | Tokens: 15564

 14%|█▍        | 7703/55054 [03:48<23:08, 34.11it/s]

Sparsity: 15.6 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: -0.0047
Sparsity: 40.6 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: 0.0292
Sparsity: 58.4 | Dead Features: 0 | Total Loss: 0.0180 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: 0.0444
Sparsity: 148.8 | Dead Features: 0 | Total Loss: 0.0365 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: -0.0356
Sparsity: 153.6 | Dead Features: 0 | Total Loss: 0.0447 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0238 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: 0.0363
Sparsity: 43.3 | Dead Features: 0 | Total Loss: 0.1488 | Reconstruction Loss: 0.0942 | L1 Loss: 0.0546 | l1_alpha: 8.0000e-04 | Tokens: 15769

 14%|█▍        | 7803/55054 [03:51<24:11, 32.55it/s]

Sparsity: 19.3 | Dead Features: 0 | Total Loss: 0.0106 | Reconstruction Loss: 0.0048 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: -0.0047
Sparsity: 43.7 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: 0.0292
Sparsity: 60.9 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0111 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: 0.0436
Sparsity: 149.5 | Dead Features: 0 | Total Loss: 0.0386 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: -0.0379
Sparsity: 153.0 | Dead Features: 0 | Total Loss: 0.0471 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: 0.0352
Sparsity: 46.6 | Dead Features: 0 | Total Loss: 1.2198 | Reconstruction Loss: 1.1502 | L1 Loss: 0.0697 | l1_alpha: 8.0000e-04 | Tokens: 15974

 14%|█▍        | 7903/55054 [03:54<24:50, 31.63it/s]

Sparsity: 20.9 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0057 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: -0.0047
Sparsity: 47.2 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: 0.0293
Sparsity: 63.3 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0113 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: 0.0430
Sparsity: 153.0 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: -0.0397
Sparsity: 160.7 | Dead Features: 0 | Total Loss: 0.0490 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: 0.0341
Sparsity: 42.9 | Dead Features: 0 | Total Loss: 0.1807 | Reconstruction Loss: 0.1218 | L1 Loss: 0.0589 | l1_alpha: 8.0000e-04 | Tokens: 16179

 15%|█▍        | 8003/55054 [03:57<24:31, 31.96it/s]

Sparsity: 17.7 | Dead Features: 0 | Total Loss: 0.0098 | Reconstruction Loss: 0.0043 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: -0.0046
Sparsity: 45.4 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: 0.0299
Sparsity: 59.6 | Dead Features: 0 | Total Loss: 0.0186 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: 0.0428
Sparsity: 151.2 | Dead Features: 0 | Total Loss: 0.0374 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: -0.0421
Sparsity: 153.6 | Dead Features: 0 | Total Loss: 0.0460 | Reconstruction Loss: 0.0224 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: 0.0331
Sparsity: 45.7 | Dead Features: 0 | Total Loss: 0.1430 | Reconstruction Loss: 0.0890 | L1 Loss: 0.0540 | l1_alpha: 8.0000e-04 | Tokens: 16384

 15%|█▍        | 8107/55054 [04:00<22:28, 34.81it/s]

Sparsity: 15.5 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: -0.0047
Sparsity: 40.5 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: 0.0303
Sparsity: 57.9 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: 0.0426
Sparsity: 147.5 | Dead Features: 0 | Total Loss: 0.0358 | Reconstruction Loss: 0.0166 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: -0.0443
Sparsity: 150.3 | Dead Features: 0 | Total Loss: 0.0439 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: 0.0320
Sparsity: 46.7 | Dead Features: 0 | Total Loss: 0.1464 | Reconstruction Loss: 0.0913 | L1 Loss: 0.0551 | l1_alpha: 8.0000e-04 | Tokens: 16588

 15%|█▍        | 8207/55054 [04:03<22:39, 34.46it/s]

Sparsity: 16.4 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: -0.0045
Sparsity: 40.9 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: 0.0304
Sparsity: 58.1 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: 0.0420
Sparsity: 145.6 | Dead Features: 0 | Total Loss: 0.0358 | Reconstruction Loss: 0.0170 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: -0.0468
Sparsity: 151.3 | Dead Features: 0 | Total Loss: 0.0449 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: 0.0303
Sparsity: 47.6 | Dead Features: 0 | Total Loss: 0.1510 | Reconstruction Loss: 0.0958 | L1 Loss: 0.0551 | l1_alpha: 8.0000e-04 | Tokens: 16793

 15%|█▌        | 8303/55054 [04:06<24:07, 32.29it/s]

Sparsity: 17.0 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: -0.0046
Sparsity: 44.0 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: 0.0308
Sparsity: 60.2 | Dead Features: 0 | Total Loss: 0.0184 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: 0.0419
Sparsity: 152.5 | Dead Features: 0 | Total Loss: 0.0374 | Reconstruction Loss: 0.0174 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: -0.0487
Sparsity: 156.6 | Dead Features: 0 | Total Loss: 0.0460 | Reconstruction Loss: 0.0222 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: 0.0296
Sparsity: 50.8 | Dead Features: 0 | Total Loss: 0.1751 | Reconstruction Loss: 0.1146 | L1 Loss: 0.0605 | l1_alpha: 8.0000e-04 | Tokens: 16998

 15%|█▌        | 8407/55054 [04:09<23:19, 33.34it/s]

Sparsity: 18.0 | Dead Features: 0 | Total Loss: 0.0097 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: -0.0046
Sparsity: 43.5 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: 0.0308
Sparsity: 62.6 | Dead Features: 0 | Total Loss: 0.0184 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: 0.0417
Sparsity: 150.2 | Dead Features: 0 | Total Loss: 0.0361 | Reconstruction Loss: 0.0167 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: -0.0510
Sparsity: 153.9 | Dead Features: 0 | Total Loss: 0.0448 | Reconstruction Loss: 0.0217 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: 0.0286
Sparsity: 50.7 | Dead Features: 0 | Total Loss: 0.1519 | Reconstruction Loss: 0.0952 | L1 Loss: 0.0566 | l1_alpha: 8.0000e-04 | Tokens: 17203

 15%|█▌        | 8503/55054 [04:12<23:42, 32.73it/s]

Sparsity: 16.1 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: -0.0046
Sparsity: 41.7 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: 0.0310
Sparsity: 58.6 | Dead Features: 0 | Total Loss: 0.0181 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: 0.0413
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0371 | Reconstruction Loss: 0.0174 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: -0.0527
Sparsity: 148.2 | Dead Features: 0 | Total Loss: 0.0456 | Reconstruction Loss: 0.0226 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: 0.0274
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.1421 | Reconstruction Loss: 0.0872 | L1 Loss: 0.0548 | l1_alpha: 8.0000e-04 | Tokens: 17408

 16%|█▌        | 8607/55054 [04:15<23:18, 33.20it/s]

Sparsity: 16.8 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: -0.0046
Sparsity: 41.6 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: 0.0309
Sparsity: 59.3 | Dead Features: 0 | Total Loss: 0.0180 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: 0.0411
Sparsity: 146.2 | Dead Features: 0 | Total Loss: 0.0357 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: -0.0544
Sparsity: 150.9 | Dead Features: 0 | Total Loss: 0.0439 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: 0.0266
Sparsity: 45.7 | Dead Features: 0 | Total Loss: 0.1436 | Reconstruction Loss: 0.0905 | L1 Loss: 0.0531 | l1_alpha: 8.0000e-04 | Tokens: 17612

 16%|█▌        | 8703/55054 [04:18<23:57, 32.25it/s]

Sparsity: 13.9 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: -0.0047
Sparsity: 40.4 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: 0.0311
Sparsity: 56.0 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: 0.0410
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0354 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: -0.0569
Sparsity: 149.1 | Dead Features: 0 | Total Loss: 0.0431 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: 0.0258
Sparsity: 42.8 | Dead Features: 0 | Total Loss: 0.1555 | Reconstruction Loss: 0.1040 | L1 Loss: 0.0516 | l1_alpha: 8.0000e-04 | Tokens: 17817

 16%|█▌        | 8807/55054 [04:21<23:36, 32.65it/s]

Sparsity: 15.6 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: -0.0045
Sparsity: 41.1 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: 0.0314
Sparsity: 58.6 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: 0.0406
Sparsity: 145.7 | Dead Features: 0 | Total Loss: 0.0357 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: -0.0595
Sparsity: 150.9 | Dead Features: 0 | Total Loss: 0.0440 | Reconstruction Loss: 0.0213 | L1 Loss: 0.0227 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: 0.0244
Sparsity: 128.1 | Dead Features: 0 | Total Loss: 0.7203 | Reconstruction Loss: 0.4894 | L1 Loss: 0.2309 | l1_alpha: 8.0000e-04 | Tokens: 1802

 16%|█▌        | 8903/55054 [04:24<23:30, 32.71it/s]

Sparsity: 18.2 | Dead Features: 0 | Total Loss: 0.0100 | Reconstruction Loss: 0.0044 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: -0.0049
Sparsity: 43.8 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: 0.0316
Sparsity: 56.6 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: 0.0398
Sparsity: 144.5 | Dead Features: 0 | Total Loss: 0.0356 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: -0.0615
Sparsity: 150.1 | Dead Features: 0 | Total Loss: 0.0444 | Reconstruction Loss: 0.0214 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: 0.0235
Sparsity: 46.5 | Dead Features: 0 | Total Loss: 0.3359 | Reconstruction Loss: 0.2560 | L1 Loss: 0.0799 | l1_alpha: 8.0000e-04 | Tokens: 18227

 16%|█▋        | 9007/55054 [04:27<23:01, 33.34it/s]

Sparsity: 15.7 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: -0.0055
Sparsity: 40.6 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: 0.0319
Sparsity: 55.9 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: 0.0396
Sparsity: 142.0 | Dead Features: 0 | Total Loss: 0.0358 | Reconstruction Loss: 0.0166 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: -0.0626
Sparsity: 151.4 | Dead Features: 0 | Total Loss: 0.0435 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: 0.0227
Sparsity: 29.4 | Dead Features: 0 | Total Loss: 0.1725 | Reconstruction Loss: 0.1176 | L1 Loss: 0.0549 | l1_alpha: 8.0000e-04 | Tokens: 18432

 17%|█▋        | 9107/55054 [04:30<21:26, 35.71it/s]

Sparsity: 16.4 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: -0.0057
Sparsity: 42.1 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: 0.0320
Sparsity: 59.9 | Dead Features: 0 | Total Loss: 0.0182 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: 0.0395
Sparsity: 146.9 | Dead Features: 0 | Total Loss: 0.0369 | Reconstruction Loss: 0.0175 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: -0.0637
Sparsity: 154.1 | Dead Features: 0 | Total Loss: 0.0453 | Reconstruction Loss: 0.0220 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: 0.0219
Sparsity: 32.1 | Dead Features: 0 | Total Loss: 0.1655 | Reconstruction Loss: 0.1105 | L1 Loss: 0.0549 | l1_alpha: 8.0000e-04 | Tokens: 18636

 17%|█▋        | 9207/55054 [04:33<23:28, 32.55it/s]

Sparsity: 15.1 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: -0.0057
Sparsity: 40.3 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: 0.0322
Sparsity: 56.8 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: 0.0391
Sparsity: 143.7 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: -0.0657
Sparsity: 148.9 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: 0.0210
Sparsity: 31.9 | Dead Features: 0 | Total Loss: 0.1512 | Reconstruction Loss: 0.0980 | L1 Loss: 0.0532 | l1_alpha: 8.0000e-04 | Tokens: 18841

 17%|█▋        | 9307/55054 [04:36<22:22, 34.07it/s]

Sparsity: 18.6 | Dead Features: 0 | Total Loss: 0.0100 | Reconstruction Loss: 0.0043 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: -0.0057
Sparsity: 43.4 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: 0.0325
Sparsity: 59.8 | Dead Features: 0 | Total Loss: 0.0184 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: 0.0386
Sparsity: 148.3 | Dead Features: 0 | Total Loss: 0.0368 | Reconstruction Loss: 0.0171 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: -0.0673
Sparsity: 152.5 | Dead Features: 0 | Total Loss: 0.0464 | Reconstruction Loss: 0.0226 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: 0.0197
Sparsity: 34.2 | Dead Features: 0 | Total Loss: 0.1613 | Reconstruction Loss: 0.1065 | L1 Loss: 0.0548 | l1_alpha: 8.0000e-04 | Tokens: 19046

 17%|█▋        | 9407/55054 [04:39<23:11, 32.80it/s]

Sparsity: 15.7 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: -0.0057
Sparsity: 41.9 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: 0.0325
Sparsity: 57.9 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: 0.0383
Sparsity: 149.6 | Dead Features: 0 | Total Loss: 0.0354 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: -0.0698
Sparsity: 153.9 | Dead Features: 0 | Total Loss: 0.0436 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: 0.0187
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.1532 | Reconstruction Loss: 0.0973 | L1 Loss: 0.0559 | l1_alpha: 8.0000e-04 | Tokens: 19251

 17%|█▋        | 9507/55054 [04:42<22:41, 33.46it/s]

Sparsity: 15.4 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: -0.0057
Sparsity: 42.3 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: 0.0329
Sparsity: 61.0 | Dead Features: 0 | Total Loss: 0.0183 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: 0.0381
Sparsity: 149.6 | Dead Features: 0 | Total Loss: 0.0367 | Reconstruction Loss: 0.0166 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: -0.0720
Sparsity: 144.7 | Dead Features: 0 | Total Loss: 0.0465 | Reconstruction Loss: 0.0230 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: 0.0176
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.1599 | Reconstruction Loss: 0.1043 | L1 Loss: 0.0556 | l1_alpha: 8.0000e-04 | Tokens: 19456

 17%|█▋        | 9607/55054 [04:45<21:44, 34.84it/s]

Sparsity: 18.7 | Dead Features: 0 | Total Loss: 0.0108 | Reconstruction Loss: 0.0050 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: -0.0057
Sparsity: 51.1 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: 0.0328
Sparsity: 65.6 | Dead Features: 0 | Total Loss: 0.0198 | Reconstruction Loss: 0.0117 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: 0.0376
Sparsity: 157.7 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: -0.0745
Sparsity: 158.0 | Dead Features: 0 | Total Loss: 0.0505 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0253 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: 0.0165
Sparsity: 39.4 | Dead Features: 0 | Total Loss: 0.2016 | Reconstruction Loss: 0.1231 | L1 Loss: 0.0785 | l1_alpha: 8.0000e-04 | Tokens: 19660

 18%|█▊        | 9707/55054 [04:48<22:49, 33.12it/s]

Sparsity: 16.0 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: -0.0058
Sparsity: 41.1 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: 0.0330
Sparsity: 57.9 | Dead Features: 0 | Total Loss: 0.0181 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: 0.0373
Sparsity: 149.0 | Dead Features: 0 | Total Loss: 0.0361 | Reconstruction Loss: 0.0167 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: -0.0761
Sparsity: 151.0 | Dead Features: 0 | Total Loss: 0.0454 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: 0.0155
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.1639 | Reconstruction Loss: 0.1079 | L1 Loss: 0.0561 | l1_alpha: 8.0000e-04 | Tokens: 19865

 18%|█▊        | 9807/55054 [04:51<22:27, 33.57it/s]

Sparsity: 13.4 | Dead Features: 0 | Total Loss: 0.0084 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: -0.0059
Sparsity: 40.0 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: 0.0330
Sparsity: 62.9 | Dead Features: 0 | Total Loss: 0.0196 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0087 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: 0.0372
Sparsity: 148.0 | Dead Features: 0 | Total Loss: 0.0375 | Reconstruction Loss: 0.0180 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: -0.0780
Sparsity: 149.3 | Dead Features: 0 | Total Loss: 0.0465 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: 0.0145
Sparsity: 38.9 | Dead Features: 0 | Total Loss: 0.1574 | Reconstruction Loss: 0.1054 | L1 Loss: 0.0520 | l1_alpha: 8.0000e-04 | Tokens: 20070

 18%|█▊        | 9907/55054 [04:54<23:00, 32.70it/s]

Sparsity: 16.4 | Dead Features: 0 | Total Loss: 0.0096 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: -0.0059
Sparsity: 43.9 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: 0.0327
Sparsity: 60.1 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: 0.0370
Sparsity: 148.7 | Dead Features: 0 | Total Loss: 0.0362 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: -0.0796
Sparsity: 153.0 | Dead Features: 0 | Total Loss: 0.0450 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: 0.0134
Sparsity: 41.0 | Dead Features: 0 | Total Loss: 0.1563 | Reconstruction Loss: 0.0983 | L1 Loss: 0.0580 | l1_alpha: 8.0000e-04 | Tokens: 20275

 18%|█▊        | 10007/55054 [04:57<22:34, 33.26it/s]

Sparsity: 14.7 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: -0.0058
Sparsity: 39.3 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: 0.0331
Sparsity: 56.4 | Dead Features: 0 | Total Loss: 0.0173 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: 0.0366
Sparsity: 147.6 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: -0.0818
Sparsity: 148.4 | Dead Features: 0 | Total Loss: 0.0418 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: 0.0121
Sparsity: 41.6 | Dead Features: 0 | Total Loss: 0.1471 | Reconstruction Loss: 0.0887 | L1 Loss: 0.0584 | l1_alpha: 8.0000e-04 | Tokens: 20480

 18%|█▊        | 10107/55054 [05:00<22:16, 33.62it/s]

Sparsity: 15.6 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: -0.0058
Sparsity: 41.6 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: 0.0333
Sparsity: 57.8 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: 0.0362
Sparsity: 152.1 | Dead Features: 0 | Total Loss: 0.0358 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: -0.0833
Sparsity: 151.7 | Dead Features: 0 | Total Loss: 0.0438 | Reconstruction Loss: 0.0211 | L1 Loss: 0.0227 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: 0.0109
Sparsity: 42.4 | Dead Features: 0 | Total Loss: 0.1513 | Reconstruction Loss: 0.0946 | L1 Loss: 0.0567 | l1_alpha: 8.0000e-04 | Tokens: 20684

 19%|█▊        | 10207/55054 [05:03<22:14, 33.61it/s]

Sparsity: 18.9 | Dead Features: 0 | Total Loss: 0.0104 | Reconstruction Loss: 0.0046 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: -0.0059
Sparsity: 45.5 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: 0.0333
Sparsity: 62.0 | Dead Features: 0 | Total Loss: 0.0190 | Reconstruction Loss: 0.0109 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: 0.0359
Sparsity: 157.0 | Dead Features: 0 | Total Loss: 0.0380 | Reconstruction Loss: 0.0173 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: -0.0853
Sparsity: 154.9 | Dead Features: 0 | Total Loss: 0.0480 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0246 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: 0.0098
Sparsity: 43.4 | Dead Features: 0 | Total Loss: 0.1567 | Reconstruction Loss: 0.1000 | L1 Loss: 0.0566 | l1_alpha: 8.0000e-04 | Tokens: 20889

 19%|█▊        | 10307/55054 [05:06<22:10, 33.64it/s]

Sparsity: 16.4 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: -0.0060
Sparsity: 41.5 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: 0.0332
Sparsity: 59.2 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: 0.0359
Sparsity: 149.9 | Dead Features: 0 | Total Loss: 0.0352 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: -0.0866
Sparsity: 150.2 | Dead Features: 0 | Total Loss: 0.0433 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: 0.0090
Sparsity: 43.8 | Dead Features: 0 | Total Loss: 0.1476 | Reconstruction Loss: 0.0910 | L1 Loss: 0.0566 | l1_alpha: 8.0000e-04 | Tokens: 21094

 19%|█▉        | 10407/55054 [05:09<22:12, 33.50it/s]

Sparsity: 13.9 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: -0.0060
Sparsity: 40.6 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: 0.0335
Sparsity: 60.3 | Dead Features: 0 | Total Loss: 0.0185 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0082 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: 0.0356
Sparsity: 144.6 | Dead Features: 0 | Total Loss: 0.0373 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: -0.0873
Sparsity: 151.9 | Dead Features: 0 | Total Loss: 0.0457 | Reconstruction Loss: 0.0220 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: 0.0088
Sparsity: 40.2 | Dead Features: 0 | Total Loss: 0.1651 | Reconstruction Loss: 0.1084 | L1 Loss: 0.0568 | l1_alpha: 8.0000e-04 | Tokens: 21299

 19%|█▉        | 10507/55054 [05:12<22:05, 33.61it/s]

Sparsity: 15.6 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: -0.0062
Sparsity: 42.7 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: 0.0338
Sparsity: 58.6 | Dead Features: 0 | Total Loss: 0.0180 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: 0.0351
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0359 | Reconstruction Loss: 0.0169 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: -0.0885
Sparsity: 151.8 | Dead Features: 0 | Total Loss: 0.0439 | Reconstruction Loss: 0.0214 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: 0.0076
Sparsity: 42.6 | Dead Features: 0 | Total Loss: 0.1495 | Reconstruction Loss: 0.0930 | L1 Loss: 0.0565 | l1_alpha: 8.0000e-04 | Tokens: 21504

 19%|█▉        | 10607/55054 [05:15<22:18, 33.21it/s]

Sparsity: 15.5 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: -0.0059
Sparsity: 41.2 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: 0.0338
Sparsity: 58.6 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: 0.0350
Sparsity: 147.8 | Dead Features: 0 | Total Loss: 0.0362 | Reconstruction Loss: 0.0169 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: -0.0897
Sparsity: 152.9 | Dead Features: 0 | Total Loss: 0.0444 | Reconstruction Loss: 0.0213 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: 0.0064
Sparsity: 43.3 | Dead Features: 0 | Total Loss: 0.1511 | Reconstruction Loss: 0.0952 | L1 Loss: 0.0559 | l1_alpha: 8.0000e-04 | Tokens: 21708

 19%|█▉        | 10707/55054 [05:18<22:02, 33.53it/s]

Sparsity: 13.5 | Dead Features: 0 | Total Loss: 0.0084 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: -0.0060
Sparsity: 39.7 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: 0.0337
Sparsity: 55.9 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: 0.0345
Sparsity: 146.7 | Dead Features: 0 | Total Loss: 0.0351 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: -0.0909
Sparsity: 146.9 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: 0.0057
Sparsity: 45.8 | Dead Features: 0 | Total Loss: 0.1538 | Reconstruction Loss: 0.0951 | L1 Loss: 0.0587 | l1_alpha: 8.0000e-04 | Tokens: 21913

 20%|█▉        | 10807/55054 [05:21<22:03, 33.42it/s]

Sparsity: 17.0 | Dead Features: 0 | Total Loss: 0.0099 | Reconstruction Loss: 0.0044 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: -0.0060
Sparsity: 44.2 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: 0.0334
Sparsity: 60.1 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: 0.0341
Sparsity: 152.6 | Dead Features: 0 | Total Loss: 0.0367 | Reconstruction Loss: 0.0169 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: -0.0925
Sparsity: 151.2 | Dead Features: 0 | Total Loss: 0.0454 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: 0.0048
Sparsity: 45.6 | Dead Features: 0 | Total Loss: 0.1485 | Reconstruction Loss: 0.0911 | L1 Loss: 0.0575 | l1_alpha: 8.0000e-04 | Tokens: 22118

 20%|█▉        | 10907/55054 [05:24<22:11, 33.15it/s]

Sparsity: 15.7 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: -0.0059
Sparsity: 42.8 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: 0.0335
Sparsity: 61.5 | Dead Features: 0 | Total Loss: 0.0181 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: 0.0337
Sparsity: 152.1 | Dead Features: 0 | Total Loss: 0.0366 | Reconstruction Loss: 0.0171 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: -0.0940
Sparsity: 156.0 | Dead Features: 0 | Total Loss: 0.0447 | Reconstruction Loss: 0.0212 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: 0.0036
Sparsity: 45.8 | Dead Features: 0 | Total Loss: 0.1463 | Reconstruction Loss: 0.0892 | L1 Loss: 0.0572 | l1_alpha: 8.0000e-04 | Tokens: 22323

 20%|█▉        | 11007/55054 [05:27<22:01, 33.33it/s]

Sparsity: 15.8 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: -0.0058
Sparsity: 41.8 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: 0.0338
Sparsity: 60.2 | Dead Features: 0 | Total Loss: 0.0181 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: 0.0333
Sparsity: 149.7 | Dead Features: 0 | Total Loss: 0.0363 | Reconstruction Loss: 0.0170 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: -0.0951
Sparsity: 153.4 | Dead Features: 0 | Total Loss: 0.0453 | Reconstruction Loss: 0.0218 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: 0.0028
Sparsity: 45.1 | Dead Features: 0 | Total Loss: 0.1523 | Reconstruction Loss: 0.0965 | L1 Loss: 0.0558 | l1_alpha: 8.0000e-04 | Tokens: 22528

 20%|██        | 11107/55054 [05:30<22:45, 32.18it/s]

Sparsity: 15.7 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: -0.0059
Sparsity: 40.4 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: 0.0338
Sparsity: 58.2 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: 0.0328
Sparsity: 146.4 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0164 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: -0.0964
Sparsity: 148.1 | Dead Features: 0 | Total Loss: 0.0428 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: 0.0016
Sparsity: 48.0 | Dead Features: 0 | Total Loss: 0.1481 | Reconstruction Loss: 0.0908 | L1 Loss: 0.0574 | l1_alpha: 8.0000e-04 | Tokens: 22732

 20%|██        | 11207/55054 [05:33<22:23, 32.65it/s]

Sparsity: 13.5 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: -0.0060
Sparsity: 39.0 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: 0.0339
Sparsity: 57.2 | Dead Features: 0 | Total Loss: 0.0182 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: 0.0326
Sparsity: 153.1 | Dead Features: 0 | Total Loss: 0.0370 | Reconstruction Loss: 0.0171 | L1 Loss: 0.0200 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: -0.0979
Sparsity: 155.5 | Dead Features: 0 | Total Loss: 0.0453 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0237 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: 0.0010
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.1650 | Reconstruction Loss: 0.1072 | L1 Loss: 0.0578 | l1_alpha: 8.0000e-04 | Tokens: 22937

 21%|██        | 11303/55054 [05:36<22:30, 32.39it/s]

Sparsity: 17.3 | Dead Features: 0 | Total Loss: 0.0102 | Reconstruction Loss: 0.0045 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: -0.0060
Sparsity: 43.9 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0080 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: 0.0343
Sparsity: 62.1 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0110 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: 0.0325
Sparsity: 154.2 | Dead Features: 0 | Total Loss: 0.0387 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: -0.0992
Sparsity: 159.2 | Dead Features: 0 | Total Loss: 0.0474 | Reconstruction Loss: 0.0230 | L1 Loss: 0.0243 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: 0.0002
Sparsity: 44.0 | Dead Features: 0 | Total Loss: 0.1553 | Reconstruction Loss: 0.1005 | L1 Loss: 0.0547 | l1_alpha: 8.0000e-04 | Tokens: 23142

 21%|██        | 11407/55054 [05:39<22:01, 33.04it/s]

Sparsity: 13.3 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0060
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: 0.0340
Sparsity: 57.6 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: 0.0320
Sparsity: 144.9 | Dead Features: 0 | Total Loss: 0.0354 | Reconstruction Loss: 0.0164 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0998
Sparsity: 148.0 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0004
Sparsity: 45.1 | Dead Features: 0 | Total Loss: 0.1405 | Reconstruction Loss: 0.0847 | L1 Loss: 0.0558 | l1_alpha: 8.0000e-04 | Tokens: 2334

 21%|██        | 11507/55054 [05:42<21:49, 33.26it/s]

Sparsity: 14.7 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: -0.0061
Sparsity: 39.9 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: 0.0339
Sparsity: 58.6 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: 0.0315
Sparsity: 148.2 | Dead Features: 0 | Total Loss: 0.0353 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: -0.1009
Sparsity: 147.1 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: -0.0013
Sparsity: 47.0 | Dead Features: 0 | Total Loss: 0.1433 | Reconstruction Loss: 0.0882 | L1 Loss: 0.0551 | l1_alpha: 8.0000e-04 | Tokens: 2355

 21%|██        | 11523/55054 [05:42<21:45, 33.35it/s]IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

 86%|████████▌ | 47105/55054 [23:28<04:05, 32.34it/s]

Sparsity: 14.6 | Dead Features: 0 | Total Loss: 0.0097 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 96460800 | Self Similarity: -0.0119
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 96460800 | Self Similarity: 0.0320
Sparsity: 54.8 | Dead Features: 0 | Total Loss: 0.0180 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 96460800 | Self Similarity: 0.0024
Sparsity: 144.2 | Dead Features: 0 | Total Loss: 0.0365 | Reconstruction Loss: 0.0174 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 96460800 | Self Similarity: -0.0877
Sparsity: 140.6 | Dead Features: 0 | Total Loss: 0.0454 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0234 | l1_alpha: 8.0000e-04 | Tokens: 96460800 | Self Similarity: -0.0082
Sparsity: 66.9 | Dead Features: 0 | Total Loss: 0.1258 | Reconstruction Loss: 0.0794 | L1 Loss: 0.0464 | l1_alpha: 8.0000e-04 | Tokens: 9646

 86%|████████▌ | 47205/55054 [23:31<04:02, 32.39it/s]

Sparsity: 12.9 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 96665600 | Self Similarity: -0.0119
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 96665600 | Self Similarity: 0.0322
Sparsity: 50.5 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 96665600 | Self Similarity: 0.0025
Sparsity: 142.0 | Dead Features: 0 | Total Loss: 0.0341 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 96665600 | Self Similarity: -0.0876
Sparsity: 139.5 | Dead Features: 0 | Total Loss: 0.0417 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 96665600 | Self Similarity: -0.0080
Sparsity: 76.2 | Dead Features: 0 | Total Loss: 0.1192 | Reconstruction Loss: 0.0694 | L1 Loss: 0.0498 | l1_alpha: 8.0000e-04 | Tokens: 9666

 86%|████████▌ | 47305/55054 [23:34<03:59, 32.35it/s]

Sparsity: 11.3 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 96870400 | Self Similarity: -0.0119
Sparsity: 33.9 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 96870400 | Self Similarity: 0.0323
Sparsity: 51.0 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 96870400 | Self Similarity: 0.0021
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 96870400 | Self Similarity: -0.0877
Sparsity: 139.1 | Dead Features: 0 | Total Loss: 0.0417 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 96870400 | Self Similarity: -0.0079
Sparsity: 80.3 | Dead Features: 0 | Total Loss: 0.1175 | Reconstruction Loss: 0.0660 | L1 Loss: 0.0515 | l1_alpha: 8.0000e-04 | Tokens: 9687

 86%|████████▌ | 47405/55054 [23:37<03:55, 32.45it/s]

Sparsity: 13.4 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 97075200 | Self Similarity: -0.0120
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 97075200 | Self Similarity: 0.0324
Sparsity: 53.0 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 97075200 | Self Similarity: 0.0020
Sparsity: 143.9 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0164 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 97075200 | Self Similarity: -0.0877
Sparsity: 143.1 | Dead Features: 0 | Total Loss: 0.0430 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0226 | l1_alpha: 8.0000e-04 | Tokens: 97075200 | Self Similarity: -0.0078
Sparsity: 74.4 | Dead Features: 0 | Total Loss: 0.1170 | Reconstruction Loss: 0.0683 | L1 Loss: 0.0488 | l1_alpha: 8.0000e-04 | Tokens: 9707

 86%|████████▋ | 47505/55054 [23:40<03:54, 32.16it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 97280000 | Self Similarity: -0.0119
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 97280000 | Self Similarity: 0.0324
Sparsity: 53.1 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 97280000 | Self Similarity: 0.0024
Sparsity: 143.1 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 97280000 | Self Similarity: -0.0875
Sparsity: 139.8 | Dead Features: 0 | Total Loss: 0.0422 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 97280000 | Self Similarity: -0.0081
Sparsity: 77.4 | Dead Features: 0 | Total Loss: 0.1119 | Reconstruction Loss: 0.0625 | L1 Loss: 0.0494 | l1_alpha: 8.0000e-04 | Tokens: 9728

 86%|████████▋ | 47605/55054 [23:43<03:49, 32.39it/s]

Sparsity: 11.0 | Dead Features: 0 | Total Loss: 0.0081 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 97484800 | Self Similarity: -0.0120
Sparsity: 34.0 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 97484800 | Self Similarity: 0.0325
Sparsity: 52.3 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 97484800 | Self Similarity: 0.0021
Sparsity: 142.4 | Dead Features: 0 | Total Loss: 0.0346 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 97484800 | Self Similarity: -0.0875
Sparsity: 134.7 | Dead Features: 0 | Total Loss: 0.0418 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 97484800 | Self Similarity: -0.0079
Sparsity: 67.6 | Dead Features: 0 | Total Loss: 0.1282 | Reconstruction Loss: 0.0814 | L1 Loss: 0.0468 | l1_alpha: 8.0000e-04 | Tokens: 9748

 87%|████████▋ | 47705/55054 [23:46<03:47, 32.36it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 97689600 | Self Similarity: -0.0120
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 97689600 | Self Similarity: 0.0325
Sparsity: 53.2 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 97689600 | Self Similarity: 0.0021
Sparsity: 142.4 | Dead Features: 0 | Total Loss: 0.0352 | Reconstruction Loss: 0.0166 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 97689600 | Self Similarity: -0.0874
Sparsity: 141.8 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 97689600 | Self Similarity: -0.0078
Sparsity: 76.3 | Dead Features: 0 | Total Loss: 0.1153 | Reconstruction Loss: 0.0676 | L1 Loss: 0.0477 | l1_alpha: 8.0000e-04 | Tokens: 9768

 87%|████████▋ | 47805/55054 [23:49<03:43, 32.44it/s]

Sparsity: 14.0 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 97894400 | Self Similarity: -0.0120
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 97894400 | Self Similarity: 0.0323
Sparsity: 53.0 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 97894400 | Self Similarity: 0.0019
Sparsity: 147.2 | Dead Features: 0 | Total Loss: 0.0351 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 97894400 | Self Similarity: -0.0875
Sparsity: 140.1 | Dead Features: 0 | Total Loss: 0.0424 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 97894400 | Self Similarity: -0.0075
Sparsity: 65.9 | Dead Features: 0 | Total Loss: 0.1321 | Reconstruction Loss: 0.0886 | L1 Loss: 0.0436 | l1_alpha: 8.0000e-04 | Tokens: 9789

 87%|████████▋ | 47905/55054 [23:52<03:32, 33.66it/s]

Sparsity: 12.7 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 98099200 | Self Similarity: -0.0121
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 98099200 | Self Similarity: 0.0322
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 98099200 | Self Similarity: 0.0021
Sparsity: 142.1 | Dead Features: 0 | Total Loss: 0.0339 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 98099200 | Self Similarity: -0.0870
Sparsity: 136.7 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 98099200 | Self Similarity: -0.0078
Sparsity: 81.5 | Dead Features: 0 | Total Loss: 0.1089 | Reconstruction Loss: 0.0589 | L1 Loss: 0.0500 | l1_alpha: 8.0000e-04 | Tokens: 9809

 87%|████████▋ | 48005/55054 [23:55<03:31, 33.35it/s]

Sparsity: 13.0 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 98304000 | Self Similarity: -0.0120
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 98304000 | Self Similarity: 0.0323
Sparsity: 52.8 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 98304000 | Self Similarity: 0.0021
Sparsity: 145.3 | Dead Features: 0 | Total Loss: 0.0351 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 98304000 | Self Similarity: -0.0870
Sparsity: 142.7 | Dead Features: 0 | Total Loss: 0.0432 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 98304000 | Self Similarity: -0.0076
Sparsity: 78.5 | Dead Features: 0 | Total Loss: 0.1166 | Reconstruction Loss: 0.0686 | L1 Loss: 0.0480 | l1_alpha: 8.0000e-04 | Tokens: 9830

 87%|████████▋ | 48105/55054 [23:58<03:28, 33.29it/s]

Sparsity: 11.4 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 98508800 | Self Similarity: -0.0121
Sparsity: 33.8 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 98508800 | Self Similarity: 0.0320
Sparsity: 50.8 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 98508800 | Self Similarity: 0.0020
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 98508800 | Self Similarity: -0.0871
Sparsity: 138.5 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 98508800 | Self Similarity: -0.0076
Sparsity: 76.8 | Dead Features: 0 | Total Loss: 0.1101 | Reconstruction Loss: 0.0625 | L1 Loss: 0.0475 | l1_alpha: 8.0000e-04 | Tokens: 9850

 88%|████████▊ | 48205/55054 [24:01<03:26, 33.14it/s]

Sparsity: 14.0 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 98713600 | Self Similarity: -0.0121
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 98713600 | Self Similarity: 0.0320
Sparsity: 54.4 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 98713600 | Self Similarity: 0.0019
Sparsity: 148.3 | Dead Features: 0 | Total Loss: 0.0355 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 98713600 | Self Similarity: -0.0872
Sparsity: 144.4 | Dead Features: 0 | Total Loss: 0.0432 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 98713600 | Self Similarity: -0.0077
Sparsity: 72.7 | Dead Features: 0 | Total Loss: 0.1150 | Reconstruction Loss: 0.0691 | L1 Loss: 0.0458 | l1_alpha: 8.0000e-04 | Tokens: 9871

 88%|████████▊ | 48305/55054 [24:04<03:21, 33.47it/s]

Sparsity: 13.1 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 98918400 | Self Similarity: -0.0122
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 98918400 | Self Similarity: 0.0322
Sparsity: 53.3 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 98918400 | Self Similarity: 0.0020
Sparsity: 150.2 | Dead Features: 0 | Total Loss: 0.0361 | Reconstruction Loss: 0.0165 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 98918400 | Self Similarity: -0.0873
Sparsity: 146.5 | Dead Features: 0 | Total Loss: 0.0437 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 98918400 | Self Similarity: -0.0077
Sparsity: 79.6 | Dead Features: 0 | Total Loss: 0.1141 | Reconstruction Loss: 0.0652 | L1 Loss: 0.0489 | l1_alpha: 8.0000e-04 | Tokens: 9891

 88%|████████▊ | 48405/55054 [24:07<03:22, 32.84it/s]

Sparsity: 13.9 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 99123200 | Self Similarity: -0.0122
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 99123200 | Self Similarity: 0.0320
Sparsity: 54.1 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 99123200 | Self Similarity: 0.0019
Sparsity: 146.9 | Dead Features: 0 | Total Loss: 0.0354 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 99123200 | Self Similarity: -0.0871
Sparsity: 143.3 | Dead Features: 0 | Total Loss: 0.0428 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 99123200 | Self Similarity: -0.0077
Sparsity: 78.4 | Dead Features: 0 | Total Loss: 0.1165 | Reconstruction Loss: 0.0691 | L1 Loss: 0.0474 | l1_alpha: 8.0000e-04 | Tokens: 9912

 88%|████████▊ | 48505/55054 [24:10<03:16, 33.29it/s]

Sparsity: 13.3 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 99328000 | Self Similarity: -0.0123
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 99328000 | Self Similarity: 0.0320
Sparsity: 54.0 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 99328000 | Self Similarity: 0.0017
Sparsity: 148.0 | Dead Features: 0 | Total Loss: 0.0355 | Reconstruction Loss: 0.0165 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 99328000 | Self Similarity: -0.0871
Sparsity: 138.6 | Dead Features: 0 | Total Loss: 0.0441 | Reconstruction Loss: 0.0222 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 99328000 | Self Similarity: -0.0083
Sparsity: 82.4 | Dead Features: 0 | Total Loss: 0.1289 | Reconstruction Loss: 0.0777 | L1 Loss: 0.0512 | l1_alpha: 8.0000e-04 | Tokens: 9932

 88%|████████▊ | 48605/55054 [24:13<03:13, 33.38it/s]

Sparsity: 12.9 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 99532800 | Self Similarity: -0.0124
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 99532800 | Self Similarity: 0.0319
Sparsity: 55.0 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 99532800 | Self Similarity: 0.0015
Sparsity: 149.4 | Dead Features: 0 | Total Loss: 0.0365 | Reconstruction Loss: 0.0170 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 99532800 | Self Similarity: -0.0870
Sparsity: 142.2 | Dead Features: 0 | Total Loss: 0.0444 | Reconstruction Loss: 0.0220 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 99532800 | Self Similarity: -0.0078
Sparsity: 67.9 | Dead Features: 0 | Total Loss: 0.1260 | Reconstruction Loss: 0.0793 | L1 Loss: 0.0467 | l1_alpha: 8.0000e-04 | Tokens: 9953

 88%|████████▊ | 48705/55054 [24:16<03:11, 33.19it/s]

Sparsity: 12.9 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 99737600 | Self Similarity: -0.0125
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 99737600 | Self Similarity: 0.0321
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 99737600 | Self Similarity: 0.0014
Sparsity: 146.6 | Dead Features: 0 | Total Loss: 0.0343 | Reconstruction Loss: 0.0155 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 99737600 | Self Similarity: -0.0869
Sparsity: 139.8 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 99737600 | Self Similarity: -0.0075
Sparsity: 71.8 | Dead Features: 0 | Total Loss: 0.1100 | Reconstruction Loss: 0.0643 | L1 Loss: 0.0457 | l1_alpha: 8.0000e-04 | Tokens: 9973

 89%|████████▊ | 48805/55054 [24:19<02:58, 35.05it/s]

Sparsity: 12.0 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 99942400 | Self Similarity: -0.0122
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 99942400 | Self Similarity: 0.0320
Sparsity: 53.0 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 99942400 | Self Similarity: 0.0016
Sparsity: 145.7 | Dead Features: 0 | Total Loss: 0.0352 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 99942400 | Self Similarity: -0.0868
Sparsity: 139.0 | Dead Features: 0 | Total Loss: 0.0423 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 99942400 | Self Similarity: -0.0075
Sparsity: 69.2 | Dead Features: 0 | Total Loss: 0.1243 | Reconstruction Loss: 0.0779 | L1 Loss: 0.0464 | l1_alpha: 8.0000e-04 | Tokens: 9994

 89%|████████▉ | 48907/55054 [24:22<02:37, 39.13it/s]

Sparsity: 13.3 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 100147200 | Self Similarity: -0.0124
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 100147200 | Self Similarity: 0.0319
Sparsity: 53.4 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 100147200 | Self Similarity: 0.0018
Sparsity: 144.6 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 100147200 | Self Similarity: -0.0870
Sparsity: 140.8 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 100147200 | Self Similarity: -0.0075
Sparsity: 72.4 | Dead Features: 0 | Total Loss: 0.1131 | Reconstruction Loss: 0.0671 | L1 Loss: 0.0460 | l1_alpha: 8.0000e-04 | Tokens:

 89%|████████▉ | 49004/55054 [24:25<03:01, 33.40it/s]

Sparsity: 11.1 | Dead Features: 0 | Total Loss: 0.0081 | Reconstruction Loss: 0.0031 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 100352000 | Self Similarity: -0.0124
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 100352000 | Self Similarity: 0.0318
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 100352000 | Self Similarity: 0.0016
Sparsity: 144.8 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 100352000 | Self Similarity: -0.0870
Sparsity: 137.2 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 100352000 | Self Similarity: -0.0076
Sparsity: 77.8 | Dead Features: 0 | Total Loss: 0.1131 | Reconstruction Loss: 0.0649 | L1 Loss: 0.0483 | l1_alpha: 8.0000e-04 | Tokens:

 89%|████████▉ | 49104/55054 [24:28<02:58, 33.39it/s]

Sparsity: 12.9 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 100556800 | Self Similarity: -0.0123
Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 100556800 | Self Similarity: 0.0319
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 100556800 | Self Similarity: 0.0015
Sparsity: 143.9 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 100556800 | Self Similarity: -0.0867
Sparsity: 141.2 | Dead Features: 0 | Total Loss: 0.0432 | Reconstruction Loss: 0.0211 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 100556800 | Self Similarity: -0.0076
Sparsity: 74.7 | Dead Features: 0 | Total Loss: 0.1221 | Reconstruction Loss: 0.0753 | L1 Loss: 0.0469 | l1_alpha: 8.0000e-04 | Tokens:

 89%|████████▉ | 49208/55054 [24:31<02:32, 38.32it/s]

Sparsity: 11.4 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 100761600 | Self Similarity: -0.0124
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 100761600 | Self Similarity: 0.0318
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 100761600 | Self Similarity: 0.0015
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 100761600 | Self Similarity: -0.0866
Sparsity: 140.7 | Dead Features: 0 | Total Loss: 0.0425 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 100761600 | Self Similarity: -0.0076
Sparsity: 77.4 | Dead Features: 0 | Total Loss: 0.1153 | Reconstruction Loss: 0.0682 | L1 Loss: 0.0471 | l1_alpha: 8.0000e-04 | Tokens:

 90%|████████▉ | 49304/55054 [24:34<02:46, 34.48it/s]

Sparsity: 12.9 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 100966400 | Self Similarity: -0.0124
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 100966400 | Self Similarity: 0.0319
Sparsity: 52.1 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 100966400 | Self Similarity: 0.0016
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0340 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 100966400 | Self Similarity: -0.0865
Sparsity: 140.2 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0190 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 100966400 | Self Similarity: -0.0077
Sparsity: 78.4 | Dead Features: 0 | Total Loss: 0.1073 | Reconstruction Loss: 0.0600 | L1 Loss: 0.0473 | l1_alpha: 8.0000e-04 | Tokens:

 90%|████████▉ | 49404/55054 [24:37<02:49, 33.38it/s]

Sparsity: 11.0 | Dead Features: 0 | Total Loss: 0.0081 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 101171200 | Self Similarity: -0.0124
Sparsity: 33.6 | Dead Features: 0 | Total Loss: 0.0111 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 101171200 | Self Similarity: 0.0318
Sparsity: 51.3 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 101171200 | Self Similarity: 0.0015
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0341 | Reconstruction Loss: 0.0153 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 101171200 | Self Similarity: -0.0867
Sparsity: 136.8 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 101171200 | Self Similarity: -0.0077
Sparsity: 79.1 | Dead Features: 0 | Total Loss: 0.1120 | Reconstruction Loss: 0.0640 | L1 Loss: 0.0480 | l1_alpha: 8.0000e-04 | Tokens:

 90%|████████▉ | 49504/55054 [24:40<02:47, 33.04it/s]

Sparsity: 11.4 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 101376000 | Self Similarity: -0.0123
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 101376000 | Self Similarity: 0.0319
Sparsity: 51.6 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 101376000 | Self Similarity: 0.0014
Sparsity: 146.6 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 101376000 | Self Similarity: -0.0866
Sparsity: 141.1 | Dead Features: 0 | Total Loss: 0.0417 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 101376000 | Self Similarity: -0.0076
Sparsity: 78.3 | Dead Features: 0 | Total Loss: 0.1156 | Reconstruction Loss: 0.0672 | L1 Loss: 0.0484 | l1_alpha: 8.0000e-04 | Tokens:

 90%|█████████ | 49604/55054 [24:43<02:39, 34.23it/s]

Sparsity: 11.7 | Dead Features: 0 | Total Loss: 0.0084 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 101580800 | Self Similarity: -0.0124
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 101580800 | Self Similarity: 0.0319
Sparsity: 51.6 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 101580800 | Self Similarity: 0.0015
Sparsity: 146.2 | Dead Features: 0 | Total Loss: 0.0341 | Reconstruction Loss: 0.0153 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 101580800 | Self Similarity: -0.0865
Sparsity: 140.0 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0188 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 101580800 | Self Similarity: -0.0074
Sparsity: 76.7 | Dead Features: 0 | Total Loss: 0.1082 | Reconstruction Loss: 0.0615 | L1 Loss: 0.0467 | l1_alpha: 8.0000e-04 | Tokens:

 90%|█████████ | 49704/55054 [24:45<02:41, 33.18it/s]

Sparsity: 13.3 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 101785600 | Self Similarity: -0.0125
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 101785600 | Self Similarity: 0.0319
Sparsity: 54.1 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 101785600 | Self Similarity: 0.0015
Sparsity: 143.5 | Dead Features: 0 | Total Loss: 0.0346 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 101785600 | Self Similarity: -0.0865
Sparsity: 137.3 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 101785600 | Self Similarity: -0.0072
Sparsity: 79.3 | Dead Features: 0 | Total Loss: 0.1094 | Reconstruction Loss: 0.0621 | L1 Loss: 0.0473 | l1_alpha: 8.0000e-04 | Tokens:

 90%|█████████ | 49804/55054 [24:48<02:37, 33.32it/s]

Sparsity: 12.6 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 101990400 | Self Similarity: -0.0126
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 101990400 | Self Similarity: 0.0316
Sparsity: 54.4 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 101990400 | Self Similarity: 0.0013
Sparsity: 146.3 | Dead Features: 0 | Total Loss: 0.0358 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 101990400 | Self Similarity: -0.0864
Sparsity: 141.6 | Dead Features: 0 | Total Loss: 0.0439 | Reconstruction Loss: 0.0215 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 101990400 | Self Similarity: -0.0074
Sparsity: 76.0 | Dead Features: 0 | Total Loss: 0.1198 | Reconstruction Loss: 0.0737 | L1 Loss: 0.0461 | l1_alpha: 8.0000e-04 | Tokens:

 91%|█████████ | 49904/55054 [24:51<02:32, 33.87it/s]

Sparsity: 11.4 | Dead Features: 0 | Total Loss: 0.0084 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 102195200 | Self Similarity: -0.0126
Sparsity: 33.8 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 102195200 | Self Similarity: 0.0318
Sparsity: 52.2 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 102195200 | Self Similarity: 0.0013
Sparsity: 141.9 | Dead Features: 0 | Total Loss: 0.0340 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0181 | l1_alpha: 8.0000e-04 | Tokens: 102195200 | Self Similarity: -0.0864
Sparsity: 137.8 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 102195200 | Self Similarity: -0.0076
Sparsity: 82.4 | Dead Features: 0 | Total Loss: 0.1105 | Reconstruction Loss: 0.0626 | L1 Loss: 0.0480 | l1_alpha: 8.0000e-04 | Tokens:

 91%|█████████ | 50004/55054 [24:54<02:31, 33.38it/s]

Sparsity: 12.7 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 102400000 | Self Similarity: -0.0125
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 102400000 | Self Similarity: 0.0319
Sparsity: 52.1 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 102400000 | Self Similarity: 0.0014
Sparsity: 145.6 | Dead Features: 0 | Total Loss: 0.0346 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 102400000 | Self Similarity: -0.0865
Sparsity: 135.4 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 102400000 | Self Similarity: -0.0077
Sparsity: 79.1 | Dead Features: 0 | Total Loss: 0.1112 | Reconstruction Loss: 0.0644 | L1 Loss: 0.0468 | l1_alpha: 8.0000e-04 | Tokens:

 91%|█████████ | 50104/55054 [24:57<02:28, 33.36it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 102604800 | Self Similarity: -0.0127
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 102604800 | Self Similarity: 0.0318
Sparsity: 52.1 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 102604800 | Self Similarity: 0.0015
Sparsity: 146.3 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 102604800 | Self Similarity: -0.0862
Sparsity: 140.7 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 102604800 | Self Similarity: -0.0075
Sparsity: 79.6 | Dead Features: 0 | Total Loss: 0.1094 | Reconstruction Loss: 0.0620 | L1 Loss: 0.0473 | l1_alpha: 8.0000e-04 | Tokens:

 91%|█████████ | 50204/55054 [25:00<02:24, 33.57it/s]

Sparsity: 13.0 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 102809600 | Self Similarity: -0.0128
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 102809600 | Self Similarity: 0.0319
Sparsity: 53.1 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 102809600 | Self Similarity: 0.0015
Sparsity: 146.8 | Dead Features: 0 | Total Loss: 0.0361 | Reconstruction Loss: 0.0170 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 102809600 | Self Similarity: -0.0862
Sparsity: 139.9 | Dead Features: 0 | Total Loss: 0.0442 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0226 | l1_alpha: 8.0000e-04 | Tokens: 102809600 | Self Similarity: -0.0075
Sparsity: 80.9 | Dead Features: 0 | Total Loss: 0.1163 | Reconstruction Loss: 0.0690 | L1 Loss: 0.0473 | l1_alpha: 8.0000e-04 | Tokens:

 91%|█████████▏| 50304/55054 [25:03<02:22, 33.37it/s]

Sparsity: 13.5 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 103014400 | Self Similarity: -0.0127
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 103014400 | Self Similarity: 0.0319
Sparsity: 53.5 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 103014400 | Self Similarity: 0.0016
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0342 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 103014400 | Self Similarity: -0.0863
Sparsity: 139.8 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 103014400 | Self Similarity: -0.0073
Sparsity: 76.7 | Dead Features: 0 | Total Loss: 0.1125 | Reconstruction Loss: 0.0670 | L1 Loss: 0.0455 | l1_alpha: 8.0000e-04 | Tokens:

 92%|█████████▏| 50404/55054 [25:06<02:21, 32.76it/s]

Sparsity: 13.1 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 103219200 | Self Similarity: -0.0128
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 103219200 | Self Similarity: 0.0320
Sparsity: 55.3 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 103219200 | Self Similarity: 0.0014
Sparsity: 147.4 | Dead Features: 0 | Total Loss: 0.0354 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 103219200 | Self Similarity: -0.0863
Sparsity: 143.7 | Dead Features: 0 | Total Loss: 0.0437 | Reconstruction Loss: 0.0212 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 103219200 | Self Similarity: -0.0067
Sparsity: 80.4 | Dead Features: 0 | Total Loss: 0.1161 | Reconstruction Loss: 0.0701 | L1 Loss: 0.0461 | l1_alpha: 8.0000e-04 | Tokens:

 92%|█████████▏| 50504/55054 [25:09<02:15, 33.63it/s]

Sparsity: 15.3 | Dead Features: 0 | Total Loss: 0.0103 | Reconstruction Loss: 0.0046 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 103424000 | Self Similarity: -0.0130
Sparsity: 39.2 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 103424000 | Self Similarity: 0.0319
Sparsity: 57.2 | Dead Features: 0 | Total Loss: 0.0186 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 103424000 | Self Similarity: 0.0014
Sparsity: 148.8 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0182 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 103424000 | Self Similarity: -0.0866
Sparsity: 146.4 | Dead Features: 0 | Total Loss: 0.0454 | Reconstruction Loss: 0.0222 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 103424000 | Self Similarity: -0.0070
Sparsity: 80.4 | Dead Features: 0 | Total Loss: 0.1160 | Reconstruction Loss: 0.0705 | L1 Loss: 0.0455 | l1_alpha: 8.0000e-04 | Tokens:

 92%|█████████▏| 50608/55054 [25:12<02:05, 35.30it/s]

Sparsity: 13.5 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 103628800 | Self Similarity: -0.0129
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 103628800 | Self Similarity: 0.0319
Sparsity: 53.4 | Dead Features: 0 | Total Loss: 0.0173 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 103628800 | Self Similarity: 0.0012
Sparsity: 147.0 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 103628800 | Self Similarity: -0.0862
Sparsity: 143.9 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 103628800 | Self Similarity: -0.0073
Sparsity: 78.0 | Dead Features: 0 | Total Loss: 0.1108 | Reconstruction Loss: 0.0657 | L1 Loss: 0.0451 | l1_alpha: 8.0000e-04 | Tokens:

 92%|█████████▏| 50704/55054 [25:15<02:09, 33.62it/s]

Sparsity: 13.3 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 103833600 | Self Similarity: -0.0128
Sparsity: 36.3 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 103833600 | Self Similarity: 0.0318
Sparsity: 52.8 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 103833600 | Self Similarity: 0.0011
Sparsity: 149.0 | Dead Features: 0 | Total Loss: 0.0352 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 103833600 | Self Similarity: -0.0861
Sparsity: 141.4 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 103833600 | Self Similarity: -0.0069
Sparsity: 80.1 | Dead Features: 0 | Total Loss: 0.1088 | Reconstruction Loss: 0.0631 | L1 Loss: 0.0457 | l1_alpha: 8.0000e-04 | Tokens:

 92%|█████████▏| 50804/55054 [25:18<02:07, 33.27it/s]

Sparsity: 11.7 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 104038400 | Self Similarity: -0.0130
Sparsity: 33.3 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 104038400 | Self Similarity: 0.0318
Sparsity: 51.9 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 104038400 | Self Similarity: 0.0013
Sparsity: 144.8 | Dead Features: 0 | Total Loss: 0.0343 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 104038400 | Self Similarity: -0.0861
Sparsity: 135.7 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 104038400 | Self Similarity: -0.0071
Sparsity: 79.2 | Dead Features: 0 | Total Loss: 0.1056 | Reconstruction Loss: 0.0607 | L1 Loss: 0.0449 | l1_alpha: 8.0000e-04 | Tokens:

 92%|█████████▏| 50904/55054 [25:21<02:04, 33.41it/s]

Sparsity: 11.7 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 104243200 | Self Similarity: -0.0129
Sparsity: 34.4 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 104243200 | Self Similarity: 0.0318
Sparsity: 52.8 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 104243200 | Self Similarity: 0.0013
Sparsity: 146.3 | Dead Features: 0 | Total Loss: 0.0353 | Reconstruction Loss: 0.0164 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 104243200 | Self Similarity: -0.0864
Sparsity: 142.8 | Dead Features: 0 | Total Loss: 0.0438 | Reconstruction Loss: 0.0211 | L1 Loss: 0.0227 | l1_alpha: 8.0000e-04 | Tokens: 104243200 | Self Similarity: -0.0071
Sparsity: 78.1 | Dead Features: 0 | Total Loss: 0.1112 | Reconstruction Loss: 0.0658 | L1 Loss: 0.0454 | l1_alpha: 8.0000e-04 | Tokens:

 93%|█████████▎| 51004/55054 [25:24<02:01, 33.30it/s]

Sparsity: 13.1 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 104448000 | Self Similarity: -0.0130
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 104448000 | Self Similarity: 0.0317
Sparsity: 53.5 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 104448000 | Self Similarity: 0.0012
Sparsity: 147.0 | Dead Features: 0 | Total Loss: 0.0353 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 104448000 | Self Similarity: -0.0861
Sparsity: 140.9 | Dead Features: 0 | Total Loss: 0.0431 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 104448000 | Self Similarity: -0.0070
Sparsity: 77.2 | Dead Features: 0 | Total Loss: 0.1147 | Reconstruction Loss: 0.0710 | L1 Loss: 0.0437 | l1_alpha: 8.0000e-04 | Tokens:

 93%|█████████▎| 51104/55054 [25:27<01:58, 33.22it/s]

Sparsity: 11.6 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 104652800 | Self Similarity: -0.0129
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 104652800 | Self Similarity: 0.0316
Sparsity: 53.5 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 104652800 | Self Similarity: 0.0012
Sparsity: 145.2 | Dead Features: 0 | Total Loss: 0.0364 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 104652800 | Self Similarity: -0.0862
Sparsity: 142.7 | Dead Features: 0 | Total Loss: 0.0442 | Reconstruction Loss: 0.0217 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 104652800 | Self Similarity: -0.0066
Sparsity: 82.7 | Dead Features: 0 | Total Loss: 0.1160 | Reconstruction Loss: 0.0683 | L1 Loss: 0.0477 | l1_alpha: 8.0000e-04 | Tokens:

 93%|█████████▎| 51204/55054 [25:30<01:55, 33.28it/s]

Sparsity: 13.8 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 104857600 | Self Similarity: -0.0129
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 104857600 | Self Similarity: 0.0316
Sparsity: 54.6 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 104857600 | Self Similarity: 0.0011
Sparsity: 148.7 | Dead Features: 0 | Total Loss: 0.0357 | Reconstruction Loss: 0.0164 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 104857600 | Self Similarity: -0.0863
Sparsity: 143.9 | Dead Features: 0 | Total Loss: 0.0431 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0226 | l1_alpha: 8.0000e-04 | Tokens: 104857600 | Self Similarity: -0.0066
Sparsity: 81.3 | Dead Features: 0 | Total Loss: 0.1115 | Reconstruction Loss: 0.0653 | L1 Loss: 0.0463 | l1_alpha: 8.0000e-04 | Tokens:

 93%|█████████▎| 51304/55054 [25:33<01:53, 33.15it/s]

Sparsity: 14.2 | Dead Features: 0 | Total Loss: 0.0098 | Reconstruction Loss: 0.0043 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 105062400 | Self Similarity: -0.0129
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 105062400 | Self Similarity: 0.0315
Sparsity: 53.7 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 105062400 | Self Similarity: 0.0011
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 105062400 | Self Similarity: -0.0861
Sparsity: 140.3 | Dead Features: 0 | Total Loss: 0.0431 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 105062400 | Self Similarity: -0.0066
Sparsity: 82.0 | Dead Features: 0 | Total Loss: 0.1110 | Reconstruction Loss: 0.0636 | L1 Loss: 0.0473 | l1_alpha: 8.0000e-04 | Tokens:

 93%|█████████▎| 51404/55054 [25:36<01:49, 33.36it/s]

Sparsity: 15.5 | Dead Features: 0 | Total Loss: 0.0103 | Reconstruction Loss: 0.0046 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 105267200 | Self Similarity: -0.0129
Sparsity: 39.8 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 105267200 | Self Similarity: 0.0318
Sparsity: 55.6 | Dead Features: 0 | Total Loss: 0.0184 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 105267200 | Self Similarity: 0.0010
Sparsity: 147.8 | Dead Features: 0 | Total Loss: 0.0371 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 105267200 | Self Similarity: -0.0861
Sparsity: 148.6 | Dead Features: 0 | Total Loss: 0.0452 | Reconstruction Loss: 0.0217 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 105267200 | Self Similarity: -0.0066
Sparsity: 80.9 | Dead Features: 0 | Total Loss: 0.1164 | Reconstruction Loss: 0.0699 | L1 Loss: 0.0465 | l1_alpha: 8.0000e-04 | Tokens:

 94%|█████████▎| 51504/55054 [25:39<01:48, 32.71it/s]

Sparsity: 13.1 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 105472000 | Self Similarity: -0.0130
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 105472000 | Self Similarity: 0.0319
Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 105472000 | Self Similarity: 0.0010
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0164 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 105472000 | Self Similarity: -0.0859
Sparsity: 142.1 | Dead Features: 0 | Total Loss: 0.0427 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 105472000 | Self Similarity: -0.0068
Sparsity: 81.4 | Dead Features: 0 | Total Loss: 0.1104 | Reconstruction Loss: 0.0635 | L1 Loss: 0.0469 | l1_alpha: 8.0000e-04 | Tokens:

 94%|█████████▎| 51604/55054 [25:42<01:45, 32.64it/s]

Sparsity: 13.0 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 105676800 | Self Similarity: -0.0128
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 105676800 | Self Similarity: 0.0317
Sparsity: 53.7 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 105676800 | Self Similarity: 0.0012
Sparsity: 144.0 | Dead Features: 0 | Total Loss: 0.0343 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 105676800 | Self Similarity: -0.0857
Sparsity: 138.0 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 105676800 | Self Similarity: -0.0069
Sparsity: 81.4 | Dead Features: 0 | Total Loss: 0.1084 | Reconstruction Loss: 0.0635 | L1 Loss: 0.0449 | l1_alpha: 8.0000e-04 | Tokens:

 94%|█████████▍| 51704/55054 [25:45<01:41, 33.11it/s]

Sparsity: 12.2 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 105881600 | Self Similarity: -0.0129
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 105881600 | Self Similarity: 0.0318
Sparsity: 51.4 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 105881600 | Self Similarity: 0.0012
Sparsity: 141.4 | Dead Features: 0 | Total Loss: 0.0342 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0182 | l1_alpha: 8.0000e-04 | Tokens: 105881600 | Self Similarity: -0.0856
Sparsity: 137.8 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 105881600 | Self Similarity: -0.0066
Sparsity: 82.9 | Dead Features: 0 | Total Loss: 0.1077 | Reconstruction Loss: 0.0625 | L1 Loss: 0.0453 | l1_alpha: 8.0000e-04 | Tokens:

 94%|█████████▍| 51808/55054 [25:48<01:30, 35.82it/s]

Sparsity: 11.4 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 106086400 | Self Similarity: -0.0129
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 106086400 | Self Similarity: 0.0317
Sparsity: 53.7 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 106086400 | Self Similarity: 0.0010
Sparsity: 143.3 | Dead Features: 0 | Total Loss: 0.0362 | Reconstruction Loss: 0.0169 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 106086400 | Self Similarity: -0.0857
Sparsity: 136.2 | Dead Features: 0 | Total Loss: 0.0438 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 106086400 | Self Similarity: -0.0067
Sparsity: 79.0 | Dead Features: 0 | Total Loss: 0.1122 | Reconstruction Loss: 0.0664 | L1 Loss: 0.0458 | l1_alpha: 8.0000e-04 | Tokens:

 94%|█████████▍| 51904/55054 [25:51<01:34, 33.25it/s]

Sparsity: 12.0 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 106291200 | Self Similarity: -0.0129
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 106291200 | Self Similarity: 0.0316
Sparsity: 51.1 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 106291200 | Self Similarity: 0.0009
Sparsity: 140.5 | Dead Features: 0 | Total Loss: 0.0338 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0182 | l1_alpha: 8.0000e-04 | Tokens: 106291200 | Self Similarity: -0.0855
Sparsity: 138.5 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 106291200 | Self Similarity: -0.0067
Sparsity: 82.8 | Dead Features: 0 | Total Loss: 0.1073 | Reconstruction Loss: 0.0631 | L1 Loss: 0.0441 | l1_alpha: 8.0000e-04 | Tokens:

 94%|█████████▍| 52004/55054 [25:54<01:31, 33.19it/s]

Sparsity: 13.1 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 106496000 | Self Similarity: -0.0130
Sparsity: 37.6 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 106496000 | Self Similarity: 0.0316
Sparsity: 54.3 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 106496000 | Self Similarity: 0.0008
Sparsity: 145.4 | Dead Features: 0 | Total Loss: 0.0355 | Reconstruction Loss: 0.0167 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 106496000 | Self Similarity: -0.0855
Sparsity: 145.6 | Dead Features: 0 | Total Loss: 0.0434 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0226 | l1_alpha: 8.0000e-04 | Tokens: 106496000 | Self Similarity: -0.0067
Sparsity: 79.5 | Dead Features: 0 | Total Loss: 0.1138 | Reconstruction Loss: 0.0682 | L1 Loss: 0.0456 | l1_alpha: 8.0000e-04 | Tokens:

 95%|█████████▍| 52104/55054 [25:57<01:28, 33.39it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 106700800 | Self Similarity: -0.0130
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 106700800 | Self Similarity: 0.0317
Sparsity: 52.2 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 106700800 | Self Similarity: 0.0007
Sparsity: 140.8 | Dead Features: 0 | Total Loss: 0.0343 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 106700800 | Self Similarity: -0.0853
Sparsity: 135.0 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 106700800 | Self Similarity: -0.0067
Sparsity: 79.4 | Dead Features: 0 | Total Loss: 0.1028 | Reconstruction Loss: 0.0599 | L1 Loss: 0.0428 | l1_alpha: 8.0000e-04 | Tokens:

 95%|█████████▍| 52204/55054 [26:00<01:25, 33.16it/s]

Sparsity: 10.1 | Dead Features: 0 | Total Loss: 0.0078 | Reconstruction Loss: 0.0030 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 106905600 | Self Similarity: -0.0130
Sparsity: 33.5 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 106905600 | Self Similarity: 0.0318
Sparsity: 51.1 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 106905600 | Self Similarity: 0.0007
Sparsity: 142.9 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 106905600 | Self Similarity: -0.0850
Sparsity: 135.8 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 106905600 | Self Similarity: -0.0067
Sparsity: 79.7 | Dead Features: 0 | Total Loss: 0.1075 | Reconstruction Loss: 0.0638 | L1 Loss: 0.0438 | l1_alpha: 8.0000e-04 | Tokens:

 95%|█████████▌| 52304/55054 [26:03<01:22, 33.39it/s]

Sparsity: 12.6 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 107110400 | Self Similarity: -0.0130
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 107110400 | Self Similarity: 0.0317
Sparsity: 53.2 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 107110400 | Self Similarity: 0.0006
Sparsity: 141.6 | Dead Features: 0 | Total Loss: 0.0345 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 107110400 | Self Similarity: -0.0850
Sparsity: 137.0 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 107110400 | Self Similarity: -0.0065
Sparsity: 78.0 | Dead Features: 0 | Total Loss: 0.1073 | Reconstruction Loss: 0.0646 | L1 Loss: 0.0427 | l1_alpha: 8.0000e-04 | Tokens:

 95%|█████████▌| 52404/55054 [26:06<01:19, 33.34it/s]

Sparsity: 13.7 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 107315200 | Self Similarity: -0.0130
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 107315200 | Self Similarity: 0.0317
Sparsity: 53.2 | Dead Features: 0 | Total Loss: 0.0173 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 107315200 | Self Similarity: 0.0007
Sparsity: 146.7 | Dead Features: 0 | Total Loss: 0.0353 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 107315200 | Self Similarity: -0.0849
Sparsity: 141.0 | Dead Features: 0 | Total Loss: 0.0436 | Reconstruction Loss: 0.0213 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 107315200 | Self Similarity: -0.0064
Sparsity: 83.4 | Dead Features: 0 | Total Loss: 0.1067 | Reconstruction Loss: 0.0611 | L1 Loss: 0.0456 | l1_alpha: 8.0000e-04 | Tokens:

 95%|█████████▌| 52504/55054 [26:09<01:17, 32.99it/s]

Sparsity: 14.0 | Dead Features: 0 | Total Loss: 0.0094 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 107520000 | Self Similarity: -0.0131
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 107520000 | Self Similarity: 0.0316
Sparsity: 53.7 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 107520000 | Self Similarity: 0.0006
Sparsity: 142.5 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 107520000 | Self Similarity: -0.0850
Sparsity: 139.6 | Dead Features: 0 | Total Loss: 0.0424 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 107520000 | Self Similarity: -0.0065
Sparsity: 73.9 | Dead Features: 0 | Total Loss: 0.1124 | Reconstruction Loss: 0.0705 | L1 Loss: 0.0419 | l1_alpha: 8.0000e-04 | Tokens:

 96%|█████████▌| 52604/55054 [26:12<01:13, 33.40it/s]

Sparsity: 12.0 | Dead Features: 0 | Total Loss: 0.0084 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 107724800 | Self Similarity: -0.0132
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 107724800 | Self Similarity: 0.0319
Sparsity: 51.5 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 107724800 | Self Similarity: 0.0006
Sparsity: 144.5 | Dead Features: 0 | Total Loss: 0.0340 | Reconstruction Loss: 0.0155 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 107724800 | Self Similarity: -0.0850
Sparsity: 137.1 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 107724800 | Self Similarity: -0.0064
Sparsity: 81.8 | Dead Features: 0 | Total Loss: 0.1030 | Reconstruction Loss: 0.0597 | L1 Loss: 0.0433 | l1_alpha: 8.0000e-04 | Tokens:

 96%|█████████▌| 52704/55054 [26:15<01:10, 33.47it/s]

Sparsity: 12.3 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 107929600 | Self Similarity: -0.0132
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 107929600 | Self Similarity: 0.0317
Sparsity: 53.1 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 107929600 | Self Similarity: 0.0008
Sparsity: 142.5 | Dead Features: 0 | Total Loss: 0.0359 | Reconstruction Loss: 0.0171 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 107929600 | Self Similarity: -0.0851
Sparsity: 134.2 | Dead Features: 0 | Total Loss: 0.0440 | Reconstruction Loss: 0.0224 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 107929600 | Self Similarity: -0.0063
Sparsity: 80.6 | Dead Features: 0 | Total Loss: 0.1148 | Reconstruction Loss: 0.0697 | L1 Loss: 0.0451 | l1_alpha: 8.0000e-04 | Tokens:

 96%|█████████▌| 52804/55054 [26:18<01:08, 32.82it/s]

Sparsity: 11.6 | Dead Features: 0 | Total Loss: 0.0084 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 108134400 | Self Similarity: -0.0131
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 108134400 | Self Similarity: 0.0318
Sparsity: 51.9 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 108134400 | Self Similarity: 0.0007
Sparsity: 143.1 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 108134400 | Self Similarity: -0.0850
Sparsity: 135.1 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 108134400 | Self Similarity: -0.0063
Sparsity: 80.9 | Dead Features: 0 | Total Loss: 0.1047 | Reconstruction Loss: 0.0618 | L1 Loss: 0.0430 | l1_alpha: 8.0000e-04 | Tokens:

 96%|█████████▌| 52904/55054 [26:21<01:04, 33.48it/s]

Sparsity: 11.4 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 108339200 | Self Similarity: -0.0132
Sparsity: 34.4 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 108339200 | Self Similarity: 0.0319
Sparsity: 50.4 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 108339200 | Self Similarity: 0.0007
Sparsity: 145.3 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 108339200 | Self Similarity: -0.0849
Sparsity: 141.0 | Dead Features: 0 | Total Loss: 0.0427 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 108339200 | Self Similarity: -0.0063
Sparsity: 83.5 | Dead Features: 0 | Total Loss: 0.1076 | Reconstruction Loss: 0.0622 | L1 Loss: 0.0454 | l1_alpha: 8.0000e-04 | Tokens:

 96%|█████████▋| 53004/55054 [26:24<01:01, 33.44it/s]

Sparsity: 16.6 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0054 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 108544000 | Self Similarity: -0.0132
Sparsity: 40.0 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 108544000 | Self Similarity: 0.0317
Sparsity: 56.5 | Dead Features: 0 | Total Loss: 0.0184 | Reconstruction Loss: 0.0107 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 108544000 | Self Similarity: 0.0007
Sparsity: 150.7 | Dead Features: 0 | Total Loss: 0.0389 | Reconstruction Loss: 0.0184 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 108544000 | Self Similarity: -0.0849
Sparsity: 146.1 | Dead Features: 0 | Total Loss: 0.0500 | Reconstruction Loss: 0.0246 | L1 Loss: 0.0254 | l1_alpha: 8.0000e-04 | Tokens: 108544000 | Self Similarity: -0.0064
Sparsity: 82.8 | Dead Features: 0 | Total Loss: 0.1412 | Reconstruction Loss: 0.0886 | L1 Loss: 0.0525 | l1_alpha: 8.0000e-04 | Tokens:

 96%|█████████▋| 53104/55054 [26:27<00:58, 33.47it/s]

Sparsity: 10.9 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 108748800 | Self Similarity: -0.0132
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 108748800 | Self Similarity: 0.0316
Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 108748800 | Self Similarity: 0.0006
Sparsity: 144.7 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 108748800 | Self Similarity: -0.0848
Sparsity: 134.1 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 108748800 | Self Similarity: -0.0063
Sparsity: 83.2 | Dead Features: 0 | Total Loss: 0.1058 | Reconstruction Loss: 0.0612 | L1 Loss: 0.0447 | l1_alpha: 8.0000e-04 | Tokens:

 97%|█████████▋| 53204/55054 [26:30<00:55, 33.48it/s]

Sparsity: 10.6 | Dead Features: 0 | Total Loss: 0.0080 | Reconstruction Loss: 0.0031 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 108953600 | Self Similarity: -0.0133
Sparsity: 33.0 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 108953600 | Self Similarity: 0.0317
Sparsity: 50.3 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 108953600 | Self Similarity: 0.0006
Sparsity: 143.1 | Dead Features: 0 | Total Loss: 0.0337 | Reconstruction Loss: 0.0155 | L1 Loss: 0.0182 | l1_alpha: 8.0000e-04 | Tokens: 108953600 | Self Similarity: -0.0846
Sparsity: 137.1 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 108953600 | Self Similarity: -0.0062
Sparsity: 83.2 | Dead Features: 0 | Total Loss: 0.1028 | Reconstruction Loss: 0.0593 | L1 Loss: 0.0435 | l1_alpha: 8.0000e-04 | Tokens:

 97%|█████████▋| 53304/55054 [26:33<00:52, 33.35it/s]

Sparsity: 10.9 | Dead Features: 0 | Total Loss: 0.0080 | Reconstruction Loss: 0.0031 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 109158400 | Self Similarity: -0.0132
Sparsity: 33.7 | Dead Features: 0 | Total Loss: 0.0110 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 109158400 | Self Similarity: 0.0317
Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 109158400 | Self Similarity: 0.0007
Sparsity: 146.3 | Dead Features: 0 | Total Loss: 0.0339 | Reconstruction Loss: 0.0154 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 109158400 | Self Similarity: -0.0848
Sparsity: 140.3 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 109158400 | Self Similarity: -0.0060
Sparsity: 81.2 | Dead Features: 0 | Total Loss: 0.1025 | Reconstruction Loss: 0.0607 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens:

 97%|█████████▋| 53404/55054 [26:36<00:49, 33.50it/s]

Sparsity: 11.8 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 109363200 | Self Similarity: -0.0132
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 109363200 | Self Similarity: 0.0317
Sparsity: 53.7 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 109363200 | Self Similarity: 0.0005
Sparsity: 148.1 | Dead Features: 0 | Total Loss: 0.0357 | Reconstruction Loss: 0.0164 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 109363200 | Self Similarity: -0.0847
Sparsity: 141.1 | Dead Features: 0 | Total Loss: 0.0433 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 109363200 | Self Similarity: -0.0061
Sparsity: 81.6 | Dead Features: 0 | Total Loss: 0.1113 | Reconstruction Loss: 0.0680 | L1 Loss: 0.0433 | l1_alpha: 8.0000e-04 | Tokens:

 97%|█████████▋| 53504/55054 [26:39<00:47, 32.89it/s]

Sparsity: 11.8 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 109568000 | Self Similarity: -0.0133
Sparsity: 34.1 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 109568000 | Self Similarity: 0.0316
Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 109568000 | Self Similarity: 0.0003
Sparsity: 143.7 | Dead Features: 0 | Total Loss: 0.0340 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 109568000 | Self Similarity: -0.0847
Sparsity: 132.9 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0204 | l1_alpha: 8.0000e-04 | Tokens: 109568000 | Self Similarity: -0.0061
Sparsity: 78.2 | Dead Features: 0 | Total Loss: 0.1038 | Reconstruction Loss: 0.0620 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens:

 97%|█████████▋| 53604/55054 [26:42<00:44, 32.51it/s]

Sparsity: 9.8 | Dead Features: 0 | Total Loss: 0.0077 | Reconstruction Loss: 0.0028 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 109772800 | Self Similarity: -0.0134
Sparsity: 33.7 | Dead Features: 0 | Total Loss: 0.0111 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 109772800 | Self Similarity: 0.0314
Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 109772800 | Self Similarity: 0.0005
Sparsity: 142.0 | Dead Features: 0 | Total Loss: 0.0341 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 109772800 | Self Similarity: -0.0845
Sparsity: 135.3 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 109772800 | Self Similarity: -0.0064
Sparsity: 83.4 | Dead Features: 0 | Total Loss: 0.1042 | Reconstruction Loss: 0.0604 | L1 Loss: 0.0438 | l1_alpha: 8.0000e-04 | Tokens: 

 98%|█████████▊| 53704/55054 [26:45<00:41, 32.47it/s]

Sparsity: 13.5 | Dead Features: 0 | Total Loss: 0.0097 | Reconstruction Loss: 0.0043 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 109977600 | Self Similarity: -0.0134
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 109977600 | Self Similarity: 0.0313
Sparsity: 54.2 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 109977600 | Self Similarity: 0.0004
Sparsity: 147.8 | Dead Features: 0 | Total Loss: 0.0377 | Reconstruction Loss: 0.0174 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 109977600 | Self Similarity: -0.0845
Sparsity: 143.0 | Dead Features: 0 | Total Loss: 0.0458 | Reconstruction Loss: 0.0222 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 109977600 | Self Similarity: -0.0060
Sparsity: 84.2 | Dead Features: 0 | Total Loss: 0.1106 | Reconstruction Loss: 0.0663 | L1 Loss: 0.0443 | l1_alpha: 8.0000e-04 | Tokens:

 98%|█████████▊| 53804/55054 [26:48<00:39, 31.89it/s]

Sparsity: 12.3 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 110182400 | Self Similarity: -0.0134
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 110182400 | Self Similarity: 0.0313
Sparsity: 52.3 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 110182400 | Self Similarity: 0.0006
Sparsity: 141.3 | Dead Features: 0 | Total Loss: 0.0346 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 110182400 | Self Similarity: -0.0842
Sparsity: 137.4 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 110182400 | Self Similarity: -0.0059
Sparsity: 81.4 | Dead Features: 0 | Total Loss: 0.1029 | Reconstruction Loss: 0.0605 | L1 Loss: 0.0424 | l1_alpha: 8.0000e-04 | Tokens:

 98%|█████████▊| 53904/55054 [26:51<00:35, 32.40it/s]

Sparsity: 13.4 | Dead Features: 0 | Total Loss: 0.0097 | Reconstruction Loss: 0.0043 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 110387200 | Self Similarity: -0.0133
Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 110387200 | Self Similarity: 0.0314
Sparsity: 55.5 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 110387200 | Self Similarity: 0.0004
Sparsity: 143.7 | Dead Features: 0 | Total Loss: 0.0359 | Reconstruction Loss: 0.0171 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 110387200 | Self Similarity: -0.0844
Sparsity: 131.0 | Dead Features: 0 | Total Loss: 0.0443 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 110387200 | Self Similarity: -0.0060
Sparsity: 82.3 | Dead Features: 0 | Total Loss: 0.1055 | Reconstruction Loss: 0.0624 | L1 Loss: 0.0431 | l1_alpha: 8.0000e-04 | Tokens:

 98%|█████████▊| 54004/55054 [26:54<00:32, 32.17it/s]

Sparsity: 12.3 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 110592000 | Self Similarity: -0.0132
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 110592000 | Self Similarity: 0.0312
Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 110592000 | Self Similarity: 0.0006
Sparsity: 141.2 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 110592000 | Self Similarity: -0.0840
Sparsity: 133.8 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 110592000 | Self Similarity: -0.0061
Sparsity: 81.8 | Dead Features: 0 | Total Loss: 0.1076 | Reconstruction Loss: 0.0652 | L1 Loss: 0.0424 | l1_alpha: 8.0000e-04 | Tokens:

 98%|█████████▊| 54104/55054 [26:57<00:29, 32.49it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 110796800 | Self Similarity: -0.0131
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 110796800 | Self Similarity: 0.0312
Sparsity: 53.2 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 110796800 | Self Similarity: 0.0005
Sparsity: 144.9 | Dead Features: 0 | Total Loss: 0.0364 | Reconstruction Loss: 0.0169 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 110796800 | Self Similarity: -0.0839
Sparsity: 136.7 | Dead Features: 0 | Total Loss: 0.0451 | Reconstruction Loss: 0.0218 | L1 Loss: 0.0233 | l1_alpha: 8.0000e-04 | Tokens: 110796800 | Self Similarity: -0.0057
Sparsity: 80.4 | Dead Features: 0 | Total Loss: 0.1083 | Reconstruction Loss: 0.0650 | L1 Loss: 0.0434 | l1_alpha: 8.0000e-04 | Tokens:

 98%|█████████▊| 54204/55054 [27:00<00:26, 32.34it/s]

Sparsity: 11.5 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 111001600 | Self Similarity: -0.0133
Sparsity: 33.8 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 111001600 | Self Similarity: 0.0313
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 111001600 | Self Similarity: 0.0006
Sparsity: 144.9 | Dead Features: 0 | Total Loss: 0.0353 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 111001600 | Self Similarity: -0.0840
Sparsity: 137.2 | Dead Features: 0 | Total Loss: 0.0428 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 111001600 | Self Similarity: -0.0058
Sparsity: 82.0 | Dead Features: 0 | Total Loss: 0.1059 | Reconstruction Loss: 0.0621 | L1 Loss: 0.0438 | l1_alpha: 8.0000e-04 | Tokens:

 99%|█████████▊| 54304/55054 [27:03<00:20, 36.08it/s]

Sparsity: 12.5 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 111206400 | Self Similarity: -0.0132
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 111206400 | Self Similarity: 0.0315
Sparsity: 56.0 | Dead Features: 0 | Total Loss: 0.0183 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 111206400 | Self Similarity: 0.0007
Sparsity: 148.3 | Dead Features: 0 | Total Loss: 0.0372 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 111206400 | Self Similarity: -0.0839
Sparsity: 140.7 | Dead Features: 0 | Total Loss: 0.0457 | Reconstruction Loss: 0.0224 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 111206400 | Self Similarity: -0.0059
Sparsity: 84.2 | Dead Features: 0 | Total Loss: 0.1143 | Reconstruction Loss: 0.0700 | L1 Loss: 0.0442 | l1_alpha: 8.0000e-04 | Tokens:

 99%|█████████▉| 54404/55054 [27:06<00:19, 32.89it/s]

Sparsity: 11.0 | Dead Features: 0 | Total Loss: 0.0081 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 111411200 | Self Similarity: -0.0132
Sparsity: 34.5 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 111411200 | Self Similarity: 0.0314
Sparsity: 51.4 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 111411200 | Self Similarity: 0.0007
Sparsity: 142.1 | Dead Features: 0 | Total Loss: 0.0340 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 111411200 | Self Similarity: -0.0840
Sparsity: 136.4 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 111411200 | Self Similarity: -0.0058
Sparsity: 82.3 | Dead Features: 0 | Total Loss: 0.1006 | Reconstruction Loss: 0.0589 | L1 Loss: 0.0416 | l1_alpha: 8.0000e-04 | Tokens:

 99%|█████████▉| 54504/55054 [27:09<00:16, 32.56it/s]

Sparsity: 12.7 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 111616000 | Self Similarity: -0.0132
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 111616000 | Self Similarity: 0.0314
Sparsity: 53.7 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 111616000 | Self Similarity: 0.0006
Sparsity: 147.1 | Dead Features: 0 | Total Loss: 0.0358 | Reconstruction Loss: 0.0166 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 111616000 | Self Similarity: -0.0840
Sparsity: 141.1 | Dead Features: 0 | Total Loss: 0.0434 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 111616000 | Self Similarity: -0.0058
Sparsity: 85.5 | Dead Features: 0 | Total Loss: 0.1088 | Reconstruction Loss: 0.0656 | L1 Loss: 0.0432 | l1_alpha: 8.0000e-04 | Tokens:

 99%|█████████▉| 54604/55054 [27:12<00:13, 34.01it/s]

Sparsity: 14.2 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 111820800 | Self Similarity: -0.0131
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 111820800 | Self Similarity: 0.0315
Sparsity: 54.2 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 111820800 | Self Similarity: 0.0005
Sparsity: 146.8 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 111820800 | Self Similarity: -0.0841
Sparsity: 139.4 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 111820800 | Self Similarity: -0.0058
Sparsity: 75.7 | Dead Features: 0 | Total Loss: 0.1046 | Reconstruction Loss: 0.0636 | L1 Loss: 0.0409 | l1_alpha: 8.0000e-04 | Tokens:

 99%|█████████▉| 54704/55054 [27:15<00:10, 33.23it/s]

Sparsity: 14.4 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 112025600 | Self Similarity: -0.0132
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 112025600 | Self Similarity: 0.0316
Sparsity: 53.9 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 112025600 | Self Similarity: 0.0005
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0355 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 112025600 | Self Similarity: -0.0841
Sparsity: 142.1 | Dead Features: 0 | Total Loss: 0.0427 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 112025600 | Self Similarity: -0.0055
Sparsity: 86.2 | Dead Features: 0 | Total Loss: 0.1095 | Reconstruction Loss: 0.0664 | L1 Loss: 0.0431 | l1_alpha: 8.0000e-04 | Tokens:

100%|█████████▉| 54804/55054 [27:18<00:07, 32.67it/s]

Sparsity: 12.5 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 112230400 | Self Similarity: -0.0132
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 112230400 | Self Similarity: 0.0315
Sparsity: 52.4 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 112230400 | Self Similarity: 0.0006
Sparsity: 142.8 | Dead Features: 0 | Total Loss: 0.0343 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 112230400 | Self Similarity: -0.0842
Sparsity: 136.8 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 112230400 | Self Similarity: -0.0057
Sparsity: 82.0 | Dead Features: 0 | Total Loss: 0.1014 | Reconstruction Loss: 0.0603 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens:

100%|█████████▉| 54904/55054 [27:21<00:04, 32.57it/s]

Sparsity: 13.0 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 112435200 | Self Similarity: -0.0132
Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 112435200 | Self Similarity: 0.0316
Sparsity: 52.4 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 112435200 | Self Similarity: 0.0006
Sparsity: 143.6 | Dead Features: 0 | Total Loss: 0.0343 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 112435200 | Self Similarity: -0.0843
Sparsity: 135.1 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 112435200 | Self Similarity: -0.0057
Sparsity: 80.2 | Dead Features: 0 | Total Loss: 0.1016 | Reconstruction Loss: 0.0618 | L1 Loss: 0.0398 | l1_alpha: 8.0000e-04 | Tokens:

100%|█████████▉| 55004/55054 [27:24<00:01, 32.76it/s]

Sparsity: 15.2 | Dead Features: 0 | Total Loss: 0.0101 | Reconstruction Loss: 0.0045 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 112640000 | Self Similarity: -0.0131
Sparsity: 39.3 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 112640000 | Self Similarity: 0.0319
Sparsity: 56.9 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 112640000 | Self Similarity: 0.0006
Sparsity: 149.8 | Dead Features: 0 | Total Loss: 0.0368 | Reconstruction Loss: 0.0172 | L1 Loss: 0.0196 | l1_alpha: 8.0000e-04 | Tokens: 112640000 | Self Similarity: -0.0841
Sparsity: 148.0 | Dead Features: 0 | Total Loss: 0.0457 | Reconstruction Loss: 0.0218 | L1 Loss: 0.0239 | l1_alpha: 8.0000e-04 | Tokens: 112640000 | Self Similarity: -0.0059
Sparsity: 86.5 | Dead Features: 0 | Total Loss: 0.1142 | Reconstruction Loss: 0.0700 | L1 Loss: 0.0442 | l1_alpha: 8.0000e-04 | Tokens:

100%|██████████| 55054/55054 [27:25<00:00, 33.45it/s]
  0%|          | 7/55054 [00:00<30:24, 30.18it/s]

Sparsity: 14.6 | Dead Features: 4096 | Total Loss: 0.0097 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0130
Sparsity: 36.9 | Dead Features: 4096 | Total Loss: 0.0120 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0320
Sparsity: 53.1 | Dead Features: 4096 | Total Loss: 0.0170 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0005
Sparsity: 147.6 | Dead Features: 4096 | Total Loss: 0.0352 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0840
Sparsity: 142.4 | Dead Features: 4096 | Total Loss: 0.0421 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0056
Sparsity: 85.3 | Dead Features: 4096 | Total Loss: 0.1019 | Reconstruction Loss: 0.0603 | L1 Loss: 0.0417 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 

  0%|          | 103/55054 [00:03<27:32, 33.25it/s]

Sparsity: 16.1 | Dead Features: 0 | Total Loss: 0.0107 | Reconstruction Loss: 0.0050 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0130
Sparsity: 39.9 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0081 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0319
Sparsity: 57.6 | Dead Features: 0 | Total Loss: 0.0188 | Reconstruction Loss: 0.0108 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0004
Sparsity: 147.1 | Dead Features: 0 | Total Loss: 0.0390 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0842
Sparsity: 149.1 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0232 | L1 Loss: 0.0241 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0055
Sparsity: 86.7 | Dead Features: 0 | Total Loss: 0.1122 | Reconstruction Loss: 0.0689 | L1 Loss: 0.0433 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self 

  0%|          | 207/55054 [00:06<27:16, 33.52it/s]

Sparsity: 12.3 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0132
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0319
Sparsity: 53.4 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0004
Sparsity: 144.4 | Dead Features: 0 | Total Loss: 0.0354 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0839
Sparsity: 140.0 | Dead Features: 0 | Total Loss: 0.0427 | Reconstruction Loss: 0.0211 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0053
Sparsity: 87.9 | Dead Features: 0 | Total Loss: 0.1059 | Reconstruction Loss: 0.0632 | L1 Loss: 0.0428 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self 

  1%|          | 307/55054 [00:09<27:33, 33.10it/s]

Sparsity: 11.7 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0133
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0319
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0003
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0839
Sparsity: 139.4 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0053
Sparsity: 85.5 | Dead Features: 0 | Total Loss: 0.1000 | Reconstruction Loss: 0.0581 | L1 Loss: 0.0419 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self 

  1%|          | 403/55054 [00:11<27:40, 32.92it/s]

Sparsity: 11.6 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0132
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0317
Sparsity: 54.7 | Dead Features: 0 | Total Loss: 0.0173 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0004
Sparsity: 148.7 | Dead Features: 0 | Total Loss: 0.0353 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0840
Sparsity: 140.5 | Dead Features: 0 | Total Loss: 0.0424 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0052
Sparsity: 86.1 | Dead Features: 0 | Total Loss: 0.1016 | Reconstruction Loss: 0.0606 | L1 Loss: 0.0410 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self 

  1%|          | 507/55054 [00:15<27:31, 33.02it/s]

Sparsity: 11.2 | Dead Features: 0 | Total Loss: 0.0081 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0132
Sparsity: 33.7 | Dead Features: 0 | Total Loss: 0.0111 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0318
Sparsity: 51.1 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0003
Sparsity: 141.9 | Dead Features: 0 | Total Loss: 0.0337 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0181 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0841
Sparsity: 135.6 | Dead Features: 0 | Total Loss: 0.0400 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0053
Sparsity: 81.7 | Dead Features: 0 | Total Loss: 0.0991 | Reconstruction Loss: 0.0601 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 1024000 |

  1%|          | 607/55054 [00:18<27:10, 33.40it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0132
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0319
Sparsity: 54.0 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0005
Sparsity: 148.9 | Dead Features: 0 | Total Loss: 0.0360 | Reconstruction Loss: 0.0167 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0840
Sparsity: 144.9 | Dead Features: 0 | Total Loss: 0.0442 | Reconstruction Loss: 0.0213 | L1 Loss: 0.0229 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0052
Sparsity: 84.5 | Dead Features: 0 | Total Loss: 0.1085 | Reconstruction Loss: 0.0661 | L1 Loss: 0.0424 | l1_alpha: 8.0000e-04 | Tokens: 1228800 |

  1%|▏         | 707/55054 [00:21<26:56, 33.63it/s]

Sparsity: 12.7 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0133
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0318
Sparsity: 55.6 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0003
Sparsity: 145.5 | Dead Features: 0 | Total Loss: 0.0352 | Reconstruction Loss: 0.0165 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0839
Sparsity: 143.4 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0049
Sparsity: 85.2 | Dead Features: 0 | Total Loss: 0.1045 | Reconstruction Loss: 0.0620 | L1 Loss: 0.0425 | l1_alpha: 8.0000e-04 | Tokens: 1433600 |

  1%|▏         | 807/55054 [00:24<26:56, 33.55it/s]

Sparsity: 11.4 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0133
Sparsity: 34.4 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0316
Sparsity: 51.5 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0002
Sparsity: 147.9 | Dead Features: 0 | Total Loss: 0.0356 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0837
Sparsity: 145.2 | Dead Features: 0 | Total Loss: 0.0437 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0230 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0050
Sparsity: 88.7 | Dead Features: 0 | Total Loss: 0.1065 | Reconstruction Loss: 0.0625 | L1 Loss: 0.0440 | l1_alpha: 8.0000e-04 | Tokens: 1638400 |

  2%|▏         | 907/55054 [00:27<27:16, 33.10it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0131
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0316
Sparsity: 53.6 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0001
Sparsity: 150.7 | Dead Features: 0 | Total Loss: 0.0364 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0840
Sparsity: 141.2 | Dead Features: 0 | Total Loss: 0.0438 | Reconstruction Loss: 0.0218 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0052
Sparsity: 86.9 | Dead Features: 0 | Total Loss: 0.1059 | Reconstruction Loss: 0.0637 | L1 Loss: 0.0423 | l1_alpha: 8.0000e-04 | Tokens: 1843200 |

  2%|▏         | 1007/55054 [00:30<27:15, 33.05it/s]

Sparsity: 11.2 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0131
Sparsity: 33.5 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0315
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0002
Sparsity: 143.9 | Dead Features: 0 | Total Loss: 0.0341 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0839
Sparsity: 135.3 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0052
Sparsity: 82.6 | Dead Features: 0 | Total Loss: 0.0988 | Reconstruction Loss: 0.0587 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens: 2048000 |

  2%|▏         | 1107/55054 [00:33<26:54, 33.40it/s]

Sparsity: 11.9 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0132
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0316
Sparsity: 53.1 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0003
Sparsity: 147.3 | Dead Features: 0 | Total Loss: 0.0360 | Reconstruction Loss: 0.0166 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0838
Sparsity: 142.5 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0052
Sparsity: 85.6 | Dead Features: 0 | Total Loss: 0.1034 | Reconstruction Loss: 0.0605 | L1 Loss: 0.0429 | l1_alpha: 8.0000e-04 | Tokens: 2252800 |

  2%|▏         | 1207/55054 [00:36<26:49, 33.45it/s]

Sparsity: 12.2 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0132
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0316
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0003
Sparsity: 143.1 | Dead Features: 0 | Total Loss: 0.0346 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0837
Sparsity: 136.5 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0052
Sparsity: 89.1 | Dead Features: 0 | Total Loss: 0.1053 | Reconstruction Loss: 0.0607 | L1 Loss: 0.0446 | l1_alpha: 8.0000e-04 | Tokens: 2457600 |

  2%|▏         | 1307/55054 [00:39<26:52, 33.33it/s]

Sparsity: 13.6 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0132
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0316
Sparsity: 55.1 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0000
Sparsity: 148.5 | Dead Features: 0 | Total Loss: 0.0351 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0836
Sparsity: 140.9 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0051
Sparsity: 87.6 | Dead Features: 0 | Total Loss: 0.0991 | Reconstruction Loss: 0.0586 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens: 2662400 |

  3%|▎         | 1407/55054 [00:41<25:43, 34.75it/s]

Sparsity: 12.5 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0132
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0317
Sparsity: 53.1 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0002
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0339 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0837
Sparsity: 139.8 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0051
Sparsity: 87.3 | Dead Features: 0 | Total Loss: 0.1000 | Reconstruction Loss: 0.0588 | L1 Loss: 0.0413 | l1_alpha: 8.0000e-04 | Tokens: 2867200 |

  3%|▎         | 1507/55054 [00:45<27:21, 32.61it/s]

Sparsity: 14.4 | Dead Features: 0 | Total Loss: 0.0097 | Reconstruction Loss: 0.0043 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0132
Sparsity: 39.4 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0316
Sparsity: 54.7 | Dead Features: 0 | Total Loss: 0.0180 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0004
Sparsity: 148.7 | Dead Features: 0 | Total Loss: 0.0360 | Reconstruction Loss: 0.0170 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0839
Sparsity: 145.5 | Dead Features: 0 | Total Loss: 0.0444 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0050
Sparsity: 88.0 | Dead Features: 0 | Total Loss: 0.1055 | Reconstruction Loss: 0.0650 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens: 3072000 |

  3%|▎         | 1607/55054 [00:48<26:45, 33.29it/s]

Sparsity: 10.7 | Dead Features: 0 | Total Loss: 0.0080 | Reconstruction Loss: 0.0031 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: -0.0132
Sparsity: 34.2 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: 0.0314
Sparsity: 51.0 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: 0.0003
Sparsity: 141.7 | Dead Features: 0 | Total Loss: 0.0343 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: -0.0837
Sparsity: 135.8 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 3276800 | Self Similarity: -0.0051
Sparsity: 88.1 | Dead Features: 0 | Total Loss: 0.1010 | Reconstruction Loss: 0.0593 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 3276800 |

  3%|▎         | 1707/55054 [00:51<26:28, 33.59it/s]

Sparsity: 10.7 | Dead Features: 0 | Total Loss: 0.0080 | Reconstruction Loss: 0.0030 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: -0.0131
Sparsity: 33.8 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: 0.0315
Sparsity: 51.0 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: 0.0002
Sparsity: 141.7 | Dead Features: 0 | Total Loss: 0.0340 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0182 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: -0.0836
Sparsity: 137.2 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 3481600 | Self Similarity: -0.0050
Sparsity: 86.0 | Dead Features: 0 | Total Loss: 0.0947 | Reconstruction Loss: 0.0555 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 3481600 |

  3%|▎         | 1807/55054 [00:53<26:34, 33.39it/s]

Sparsity: 13.9 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: -0.0130
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: 0.0315
Sparsity: 54.1 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: -0.0000
Sparsity: 146.2 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: -0.0836
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 3686400 | Self Similarity: -0.0050
Sparsity: 88.3 | Dead Features: 0 | Total Loss: 0.0999 | Reconstruction Loss: 0.0587 | L1 Loss: 0.0412 | l1_alpha: 8.0000e-04 | Tokens: 3686400 

  3%|▎         | 1907/55054 [00:56<24:16, 36.49it/s]

Sparsity: 14.1 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: -0.0132
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: 0.0313
Sparsity: 55.3 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: 0.0002
Sparsity: 147.4 | Dead Features: 0 | Total Loss: 0.0356 | Reconstruction Loss: 0.0165 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: -0.0836
Sparsity: 141.4 | Dead Features: 0 | Total Loss: 0.0440 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 3891200 | Self Similarity: -0.0049
Sparsity: 88.7 | Dead Features: 0 | Total Loss: 0.1074 | Reconstruction Loss: 0.0653 | L1 Loss: 0.0422 | l1_alpha: 8.0000e-04 | Tokens: 3891200 |

  4%|▎         | 2007/55054 [00:59<26:57, 32.79it/s]

Sparsity: 12.7 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: -0.0129
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: 0.0314
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: 0.0001
Sparsity: 146.6 | Dead Features: 0 | Total Loss: 0.0346 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: -0.0834
Sparsity: 136.8 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 4096000 | Self Similarity: -0.0050
Sparsity: 87.2 | Dead Features: 0 | Total Loss: 0.0978 | Reconstruction Loss: 0.0585 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 4096000 |

  4%|▍         | 2107/55054 [01:02<26:35, 33.19it/s]

Sparsity: 11.2 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: -0.0132
Sparsity: 33.3 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0314
Sparsity: 52.7 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: 0.0003
Sparsity: 147.4 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: -0.0835
Sparsity: 138.0 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 4300800 | Self Similarity: -0.0049
Sparsity: 88.4 | Dead Features: 0 | Total Loss: 0.0997 | Reconstruction Loss: 0.0586 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 4300800 |

  4%|▍         | 2207/55054 [01:05<23:28, 37.53it/s]

Sparsity: 13.2 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: -0.0131
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0315
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: 0.0003
Sparsity: 139.4 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0167 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: -0.0832
Sparsity: 126.2 | Dead Features: 0 | Total Loss: 0.0436 | Reconstruction Loss: 0.0222 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 4505600 | Self Similarity: -0.0050
Sparsity: 86.4 | Dead Features: 0 | Total Loss: 0.0995 | Reconstruction Loss: 0.0595 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 4505600 |

  4%|▍         | 2307/55054 [01:08<26:39, 32.98it/s]

Sparsity: 11.4 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: -0.0133
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0314
Sparsity: 53.3 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: 0.0003
Sparsity: 144.7 | Dead Features: 0 | Total Loss: 0.0357 | Reconstruction Loss: 0.0165 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: -0.0832
Sparsity: 128.3 | Dead Features: 0 | Total Loss: 0.0433 | Reconstruction Loss: 0.0218 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 4710400 | Self Similarity: -0.0047
Sparsity: 88.7 | Dead Features: 0 | Total Loss: 0.1043 | Reconstruction Loss: 0.0629 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 4710400 |

  4%|▍         | 2404/55054 [01:11<26:09, 33.56it/s]

Sparsity: 16.6 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0055 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: -0.0132
Sparsity: 39.0 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0313
Sparsity: 55.7 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: 0.0004
Sparsity: 143.5 | Dead Features: 0 | Total Loss: 0.0362 | Reconstruction Loss: 0.0174 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: -0.0833
Sparsity: 134.8 | Dead Features: 0 | Total Loss: 0.0446 | Reconstruction Loss: 0.0222 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 4915200 | Self Similarity: -0.0049
Sparsity: 87.1 | Dead Features: 0 | Total Loss: 0.1056 | Reconstruction Loss: 0.0628 | L1 Loss: 0.0428 | l1_alpha: 8.0000e-04 | Tokens: 4915200 |

  5%|▍         | 2504/55054 [01:14<26:20, 33.26it/s]

Sparsity: 12.0 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: -0.0132
Sparsity: 36.6 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0314
Sparsity: 54.8 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: 0.0002
Sparsity: 149.6 | Dead Features: 0 | Total Loss: 0.0363 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: -0.0834
Sparsity: 137.3 | Dead Features: 0 | Total Loss: 0.0444 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 5120000 | Self Similarity: -0.0049
Sparsity: 90.5 | Dead Features: 0 | Total Loss: 0.1060 | Reconstruction Loss: 0.0642 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 5120000 |

  5%|▍         | 2607/55054 [01:17<26:19, 33.21it/s]

Sparsity: 10.9 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: -0.0134
Sparsity: 33.7 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0315
Sparsity: 51.4 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: 0.0002
Sparsity: 142.2 | Dead Features: 0 | Total Loss: 0.0342 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: -0.0832
Sparsity: 132.2 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 5324800 | Self Similarity: -0.0050
Sparsity: 87.8 | Dead Features: 0 | Total Loss: 0.0942 | Reconstruction Loss: 0.0543 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 5324800 |

  5%|▍         | 2707/55054 [01:20<25:42, 33.93it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: -0.0133
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0317
Sparsity: 53.0 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: 0.0001
Sparsity: 147.6 | Dead Features: 0 | Total Loss: 0.0356 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: -0.0832
Sparsity: 137.5 | Dead Features: 0 | Total Loss: 0.0422 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 5529600 | Self Similarity: -0.0049
Sparsity: 88.5 | Dead Features: 0 | Total Loss: 0.0983 | Reconstruction Loss: 0.0575 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 5529600 |

  5%|▌         | 2807/55054 [01:23<25:53, 33.63it/s]

Sparsity: 15.3 | Dead Features: 0 | Total Loss: 0.0104 | Reconstruction Loss: 0.0048 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: -0.0131
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0317
Sparsity: 54.5 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: 0.0003
Sparsity: 144.3 | Dead Features: 0 | Total Loss: 0.0359 | Reconstruction Loss: 0.0172 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: -0.0832
Sparsity: 135.6 | Dead Features: 0 | Total Loss: 0.0443 | Reconstruction Loss: 0.0221 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 5734400 | Self Similarity: -0.0050
Sparsity: 88.5 | Dead Features: 0 | Total Loss: 0.1068 | Reconstruction Loss: 0.0626 | L1 Loss: 0.0442 | l1_alpha: 8.0000e-04 | Tokens: 5734400 |

  5%|▌         | 2907/55054 [01:26<26:03, 33.34it/s]

Sparsity: 11.1 | Dead Features: 0 | Total Loss: 0.0080 | Reconstruction Loss: 0.0031 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: -0.0133
Sparsity: 33.1 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0315
Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: 0.0002
Sparsity: 139.5 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: -0.0831
Sparsity: 133.3 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 5939200 | Self Similarity: -0.0046
Sparsity: 88.6 | Dead Features: 0 | Total Loss: 0.0939 | Reconstruction Loss: 0.0544 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 5939200 |

  5%|▌         | 3007/55054 [01:29<25:38, 33.83it/s]

Sparsity: 11.0 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: -0.0135
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0314
Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: 0.0003
Sparsity: 143.7 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: -0.0832
Sparsity: 137.9 | Dead Features: 0 | Total Loss: 0.0418 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 6144000 | Self Similarity: -0.0050
Sparsity: 60.7 | Dead Features: 0 | Total Loss: 0.1039 | Reconstruction Loss: 0.0705 | L1 Loss: 0.0334 | l1_alpha: 8.0000e-04 | Tokens: 6144000 |

  6%|▌         | 3104/55054 [01:31<23:16, 37.21it/s]

Sparsity: 11.8 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: -0.0135
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0313
Sparsity: 53.8 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: 0.0003
Sparsity: 145.6 | Dead Features: 0 | Total Loss: 0.0351 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: -0.0831
Sparsity: 137.7 | Dead Features: 0 | Total Loss: 0.0427 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 6348800 | Self Similarity: -0.0048
Sparsity: 77.2 | Dead Features: 0 | Total Loss: 0.0992 | Reconstruction Loss: 0.0610 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 6348800 |

  6%|▌         | 3204/55054 [01:34<25:55, 33.34it/s]

Sparsity: 12.5 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: -0.0136
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0316
Sparsity: 53.2 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: 0.0002
Sparsity: 143.0 | Dead Features: 0 | Total Loss: 0.0345 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: -0.0830
Sparsity: 138.9 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 6553600 | Self Similarity: -0.0048
Sparsity: 83.1 | Dead Features: 0 | Total Loss: 0.0979 | Reconstruction Loss: 0.0584 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 6553600 |

  6%|▌         | 3304/55054 [01:37<25:29, 33.84it/s]

Sparsity: 10.9 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: -0.0135
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0315
Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: 0.0002
Sparsity: 143.5 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: -0.0830
Sparsity: 138.7 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 6758400 | Self Similarity: -0.0047
Sparsity: 84.9 | Dead Features: 0 | Total Loss: 0.0990 | Reconstruction Loss: 0.0588 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 6758400 |

  6%|▌         | 3404/55054 [01:40<25:52, 33.26it/s]

Sparsity: 13.2 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: -0.0135
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: 0.0315
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: -0.0000
Sparsity: 139.4 | Dead Features: 0 | Total Loss: 0.0342 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: -0.0828
Sparsity: 135.2 | Dead Features: 0 | Total Loss: 0.0406 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 6963200 | Self Similarity: -0.0047
Sparsity: 83.0 | Dead Features: 0 | Total Loss: 0.0917 | Reconstruction Loss: 0.0534 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 6963200 

  6%|▋         | 3504/55054 [01:43<25:29, 33.71it/s]

Sparsity: 13.8 | Dead Features: 0 | Total Loss: 0.0096 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: -0.0134
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0316
Sparsity: 53.9 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: 0.0001
Sparsity: 144.5 | Dead Features: 0 | Total Loss: 0.0353 | Reconstruction Loss: 0.0164 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: -0.0831
Sparsity: 139.1 | Dead Features: 0 | Total Loss: 0.0425 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 7168000 | Self Similarity: -0.0050
Sparsity: 86.9 | Dead Features: 0 | Total Loss: 0.0971 | Reconstruction Loss: 0.0577 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 7168000 |

  7%|▋         | 3604/55054 [01:46<25:36, 33.49it/s]

Sparsity: 13.4 | Dead Features: 0 | Total Loss: 0.0094 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: -0.0135
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0316
Sparsity: 54.0 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: 0.0001
Sparsity: 147.1 | Dead Features: 0 | Total Loss: 0.0351 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: -0.0829
Sparsity: 140.4 | Dead Features: 0 | Total Loss: 0.0422 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 7372800 | Self Similarity: -0.0047
Sparsity: 88.3 | Dead Features: 0 | Total Loss: 0.0991 | Reconstruction Loss: 0.0582 | L1 Loss: 0.0409 | l1_alpha: 8.0000e-04 | Tokens: 7372800 |

  7%|▋         | 3704/55054 [01:49<25:34, 33.46it/s]

Sparsity: 11.7 | Dead Features: 0 | Total Loss: 0.0084 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: -0.0134
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: 0.0314
Sparsity: 50.8 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: -0.0001
Sparsity: 145.6 | Dead Features: 0 | Total Loss: 0.0345 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: -0.0829
Sparsity: 138.5 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 7577600 | Self Similarity: -0.0050
Sparsity: 88.8 | Dead Features: 0 | Total Loss: 0.0955 | Reconstruction Loss: 0.0558 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 7577600 

  7%|▋         | 3804/55054 [01:52<25:31, 33.46it/s]

Sparsity: 10.6 | Dead Features: 0 | Total Loss: 0.0080 | Reconstruction Loss: 0.0030 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: -0.0133
Sparsity: 31.3 | Dead Features: 0 | Total Loss: 0.0108 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0046 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0316
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0164 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: 0.0001
Sparsity: 141.0 | Dead Features: 0 | Total Loss: 0.0338 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0181 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: -0.0831
Sparsity: 135.9 | Dead Features: 0 | Total Loss: 0.0402 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 7782400 | Self Similarity: -0.0052
Sparsity: 88.1 | Dead Features: 0 | Total Loss: 0.0936 | Reconstruction Loss: 0.0543 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 7782400 |

  7%|▋         | 3908/55054 [01:55<22:45, 37.46it/s]

Sparsity: 11.1 | Dead Features: 0 | Total Loss: 0.0081 | Reconstruction Loss: 0.0031 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: -0.0132
Sparsity: 34.2 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0316
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: 0.0001
Sparsity: 143.2 | Dead Features: 0 | Total Loss: 0.0341 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: -0.0828
Sparsity: 138.5 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 7987200 | Self Similarity: -0.0049
Sparsity: 89.7 | Dead Features: 0 | Total Loss: 0.0942 | Reconstruction Loss: 0.0544 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 7987200 |

  7%|▋         | 4004/55054 [01:58<25:42, 33.10it/s]

Sparsity: 10.3 | Dead Features: 0 | Total Loss: 0.0080 | Reconstruction Loss: 0.0030 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: -0.0134
Sparsity: 33.2 | Dead Features: 0 | Total Loss: 0.0110 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0314
Sparsity: 51.3 | Dead Features: 0 | Total Loss: 0.0164 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: 0.0000
Sparsity: 146.7 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: -0.0828
Sparsity: 139.8 | Dead Features: 0 | Total Loss: 0.0428 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 8192000 | Self Similarity: -0.0049
Sparsity: 90.3 | Dead Features: 0 | Total Loss: 0.0945 | Reconstruction Loss: 0.0538 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 8192000 |

  7%|▋         | 4104/55054 [02:01<25:24, 33.43it/s]

Sparsity: 14.1 | Dead Features: 0 | Total Loss: 0.0096 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: -0.0132
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: 0.0314
Sparsity: 54.8 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: -0.0000
Sparsity: 150.3 | Dead Features: 0 | Total Loss: 0.0361 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: -0.0828
Sparsity: 143.2 | Dead Features: 0 | Total Loss: 0.0440 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 8396800 | Self Similarity: -0.0051
Sparsity: 90.6 | Dead Features: 0 | Total Loss: 0.1043 | Reconstruction Loss: 0.0632 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 8396800 

  8%|▊         | 4204/55054 [02:04<25:40, 33.01it/s]

Sparsity: 15.6 | Dead Features: 0 | Total Loss: 0.0105 | Reconstruction Loss: 0.0048 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: -0.0134
Sparsity: 39.2 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0077 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0314
Sparsity: 56.5 | Dead Features: 0 | Total Loss: 0.0184 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: 0.0000
Sparsity: 149.2 | Dead Features: 0 | Total Loss: 0.0371 | Reconstruction Loss: 0.0173 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: -0.0828
Sparsity: 146.5 | Dead Features: 0 | Total Loss: 0.0453 | Reconstruction Loss: 0.0224 | L1 Loss: 0.0229 | l1_alpha: 8.0000e-04 | Tokens: 8601600 | Self Similarity: -0.0051
Sparsity: 91.0 | Dead Features: 0 | Total Loss: 0.1015 | Reconstruction Loss: 0.0615 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 8601600 |

  8%|▊         | 4304/55054 [02:07<25:47, 32.80it/s]

Sparsity: 13.9 | Dead Features: 0 | Total Loss: 0.0097 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: -0.0136
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: 0.0314
Sparsity: 55.0 | Dead Features: 0 | Total Loss: 0.0183 | Reconstruction Loss: 0.0107 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: -0.0000
Sparsity: 150.5 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: -0.0828
Sparsity: 142.5 | Dead Features: 0 | Total Loss: 0.0461 | Reconstruction Loss: 0.0226 | L1 Loss: 0.0235 | l1_alpha: 8.0000e-04 | Tokens: 8806400 | Self Similarity: -0.0049
Sparsity: 90.1 | Dead Features: 0 | Total Loss: 0.1010 | Reconstruction Loss: 0.0596 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 8806400 

  8%|▊         | 4404/55054 [02:10<26:09, 32.27it/s]

Sparsity: 10.9 | Dead Features: 0 | Total Loss: 0.0081 | Reconstruction Loss: 0.0030 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: -0.0136
Sparsity: 32.7 | Dead Features: 0 | Total Loss: 0.0110 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0046 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: 0.0313
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: -0.0001
Sparsity: 142.0 | Dead Features: 0 | Total Loss: 0.0339 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: -0.0827
Sparsity: 135.5 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 9011200 | Self Similarity: -0.0047
Sparsity: 87.5 | Dead Features: 0 | Total Loss: 0.0919 | Reconstruction Loss: 0.0539 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 9011200 

  8%|▊         | 4504/55054 [02:13<25:41, 32.78it/s]

Sparsity: 13.4 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: -0.0139
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: 0.0312
Sparsity: 52.2 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: -0.0003
Sparsity: 137.9 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0164 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: -0.0823
Sparsity: 136.3 | Dead Features: 0 | Total Loss: 0.0422 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 9216000 | Self Similarity: -0.0046
Sparsity: 94.0 | Dead Features: 0 | Total Loss: 0.0969 | Reconstruction Loss: 0.0561 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 9216000 

  8%|▊         | 4604/55054 [02:16<25:59, 32.35it/s]

Sparsity: 12.6 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: -0.0135
Sparsity: 35.6 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: 0.0313
Sparsity: 51.4 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: -0.0003
Sparsity: 141.9 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: -0.0823
Sparsity: 135.7 | Dead Features: 0 | Total Loss: 0.0418 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 9420800 | Self Similarity: -0.0044
Sparsity: 91.6 | Dead Features: 0 | Total Loss: 0.0949 | Reconstruction Loss: 0.0557 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 9420800 

  9%|▊         | 4704/55054 [02:19<25:56, 32.36it/s]

Sparsity: 10.0 | Dead Features: 0 | Total Loss: 0.0077 | Reconstruction Loss: 0.0029 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: -0.0136
Sparsity: 32.3 | Dead Features: 0 | Total Loss: 0.0110 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: 0.0312
Sparsity: 48.7 | Dead Features: 0 | Total Loss: 0.0165 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: -0.0003
Sparsity: 137.5 | Dead Features: 0 | Total Loss: 0.0335 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0179 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: -0.0821
Sparsity: 131.7 | Dead Features: 0 | Total Loss: 0.0401 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 9625600 | Self Similarity: -0.0041
Sparsity: 90.9 | Dead Features: 0 | Total Loss: 0.0921 | Reconstruction Loss: 0.0532 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 9625600 

  9%|▊         | 4804/55054 [02:22<25:03, 33.42it/s]

Sparsity: 12.9 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: -0.0135
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: 0.0313
Sparsity: 52.3 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: -0.0003
Sparsity: 143.2 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: -0.0824
Sparsity: 138.4 | Dead Features: 0 | Total Loss: 0.0417 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 9830400 | Self Similarity: -0.0043
Sparsity: 91.9 | Dead Features: 0 | Total Loss: 0.0930 | Reconstruction Loss: 0.0538 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 9830400 

  9%|▉         | 4904/55054 [02:25<25:12, 33.15it/s]

Sparsity: 11.3 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: -0.0135
Sparsity: 33.4 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: 0.0313
Sparsity: 51.9 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: -0.0003
Sparsity: 144.9 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: -0.0823
Sparsity: 136.3 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 10035200 | Self Similarity: -0.0045
Sparsity: 92.8 | Dead Features: 0 | Total Loss: 0.0918 | Reconstruction Loss: 0.0526 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 100

  9%|▉         | 5004/55054 [02:28<25:01, 33.33it/s]

Sparsity: 11.9 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: -0.0135
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: 0.0312
Sparsity: 51.3 | Dead Features: 0 | Total Loss: 0.0165 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: -0.0004
Sparsity: 143.5 | Dead Features: 0 | Total Loss: 0.0343 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: -0.0825
Sparsity: 139.6 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 10240000 | Self Similarity: -0.0043
Sparsity: 93.9 | Dead Features: 0 | Total Loss: 0.0926 | Reconstruction Loss: 0.0531 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 102

  9%|▉         | 5108/55054 [02:31<22:45, 36.58it/s]

Sparsity: 12.7 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: -0.0136
Sparsity: 37.3 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: 0.0313
Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: -0.0001
Sparsity: 144.5 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: -0.0823
Sparsity: 138.4 | Dead Features: 0 | Total Loss: 0.0418 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 10444800 | Self Similarity: -0.0043
Sparsity: 94.4 | Dead Features: 0 | Total Loss: 0.0955 | Reconstruction Loss: 0.0549 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 104

  9%|▉         | 5204/55054 [02:34<22:09, 37.50it/s]

Sparsity: 10.9 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: -0.0135
Sparsity: 33.9 | Dead Features: 0 | Total Loss: 0.0109 | Reconstruction Loss: 0.0062 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: 0.0311
Sparsity: 50.7 | Dead Features: 0 | Total Loss: 0.0165 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: -0.0004
Sparsity: 144.2 | Dead Features: 0 | Total Loss: 0.0337 | Reconstruction Loss: 0.0153 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: -0.0824
Sparsity: 137.2 | Dead Features: 0 | Total Loss: 0.0399 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 10649600 | Self Similarity: -0.0043
Sparsity: 94.3 | Dead Features: 0 | Total Loss: 0.0900 | Reconstruction Loss: 0.0513 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 106

 10%|▉         | 5304/55054 [02:37<24:42, 33.57it/s]

Sparsity: 12.0 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: -0.0136
Sparsity: 35.2 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: 0.0311
Sparsity: 52.4 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: -0.0003
Sparsity: 144.1 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: -0.0824
Sparsity: 138.9 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 10854400 | Self Similarity: -0.0043
Sparsity: 94.8 | Dead Features: 0 | Total Loss: 0.0908 | Reconstruction Loss: 0.0523 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 108

 10%|▉         | 5404/55054 [02:40<24:56, 33.17it/s]

Sparsity: 12.0 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: -0.0137
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: 0.0313
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: -0.0003
Sparsity: 143.2 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: -0.0822
Sparsity: 138.0 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 11059200 | Self Similarity: -0.0042
Sparsity: 95.3 | Dead Features: 0 | Total Loss: 0.0916 | Reconstruction Loss: 0.0525 | L1 Loss: 0.0391 | l1_alpha: 8.0000e-04 | Tokens: 110

 10%|▉         | 5504/55054 [02:43<24:36, 33.55it/s]

Sparsity: 13.5 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: -0.0137
Sparsity: 38.6 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: 0.0312
Sparsity: 54.8 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: -0.0005
Sparsity: 148.6 | Dead Features: 0 | Total Loss: 0.0354 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: -0.0824
Sparsity: 135.7 | Dead Features: 0 | Total Loss: 0.0446 | Reconstruction Loss: 0.0225 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 11264000 | Self Similarity: -0.0042
Sparsity: 94.8 | Dead Features: 0 | Total Loss: 0.0916 | Reconstruction Loss: 0.0517 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 112

 10%|█         | 5604/55054 [02:46<24:37, 33.47it/s]

Sparsity: 11.7 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: -0.0137
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: 0.0313
Sparsity: 52.4 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: -0.0007
Sparsity: 143.0 | Dead Features: 0 | Total Loss: 0.0346 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: -0.0823
Sparsity: 133.2 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 11468800 | Self Similarity: -0.0045
Sparsity: 97.1 | Dead Features: 0 | Total Loss: 0.0916 | Reconstruction Loss: 0.0521 | L1 Loss: 0.0395 | l1_alpha: 8.0000e-04 | Tokens: 114

 10%|█         | 5704/55054 [02:48<24:53, 33.04it/s]

Sparsity: 13.3 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: -0.0136
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: 0.0314
Sparsity: 54.2 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: -0.0004
Sparsity: 145.3 | Dead Features: 0 | Total Loss: 0.0352 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: -0.0822
Sparsity: 137.6 | Dead Features: 0 | Total Loss: 0.0422 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0223 | l1_alpha: 8.0000e-04 | Tokens: 11673600 | Self Similarity: -0.0042
Sparsity: 97.6 | Dead Features: 0 | Total Loss: 0.0914 | Reconstruction Loss: 0.0513 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens: 116

 11%|█         | 5804/55054 [02:51<24:37, 33.34it/s]

Sparsity: 11.6 | Dead Features: 0 | Total Loss: 0.0084 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: -0.0137
Sparsity: 34.2 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: 0.0314
Sparsity: 52.4 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: -0.0005
Sparsity: 148.2 | Dead Features: 0 | Total Loss: 0.0352 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: -0.0821
Sparsity: 137.7 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 11878400 | Self Similarity: -0.0042
Sparsity: 90.3 | Dead Features: 0 | Total Loss: 0.0938 | Reconstruction Loss: 0.0553 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 118

 11%|█         | 5908/55054 [02:54<21:26, 38.19it/s]

Sparsity: 16.0 | Dead Features: 0 | Total Loss: 0.0104 | Reconstruction Loss: 0.0046 | L1 Loss: 0.0057 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: -0.0139
Sparsity: 40.2 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: 0.0313
Sparsity: 55.2 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: -0.0004
Sparsity: 150.0 | Dead Features: 0 | Total Loss: 0.0355 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: -0.0820
Sparsity: 141.9 | Dead Features: 0 | Total Loss: 0.0430 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 12083200 | Self Similarity: -0.0041
Sparsity: 95.7 | Dead Features: 0 | Total Loss: 0.0934 | Reconstruction Loss: 0.0537 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 120

 11%|█         | 6005/55054 [02:57<24:48, 32.94it/s]

Sparsity: 10.8 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: -0.0137
Sparsity: 34.1 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: 0.0313
Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: -0.0003
Sparsity: 145.9 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: -0.0820
Sparsity: 136.4 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 12288000 | Self Similarity: -0.0042
Sparsity: 98.0 | Dead Features: 0 | Total Loss: 0.0924 | Reconstruction Loss: 0.0517 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 122

 11%|█         | 6105/55054 [03:00<24:28, 33.33it/s]

Sparsity: 12.7 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: -0.0136
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: 0.0311
Sparsity: 53.0 | Dead Features: 0 | Total Loss: 0.0173 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: -0.0003
Sparsity: 148.1 | Dead Features: 0 | Total Loss: 0.0352 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: -0.0819
Sparsity: 141.5 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 12492800 | Self Similarity: -0.0039
Sparsity: 98.1 | Dead Features: 0 | Total Loss: 0.0921 | Reconstruction Loss: 0.0518 | L1 Loss: 0.0403 | l1_alpha: 8.0000e-04 | Tokens: 124

 11%|█▏        | 6205/55054 [03:03<24:15, 33.56it/s]

Sparsity: 13.2 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: -0.0136
Sparsity: 34.1 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: 0.0312
Sparsity: 52.3 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: -0.0002
Sparsity: 142.9 | Dead Features: 0 | Total Loss: 0.0352 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: -0.0819
Sparsity: 138.7 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 12697600 | Self Similarity: -0.0037
Sparsity: 97.4 | Dead Features: 0 | Total Loss: 0.1243 | Reconstruction Loss: 0.0856 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 126

 11%|█▏        | 6305/55054 [03:06<24:05, 33.72it/s]

Sparsity: 12.5 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: -0.0138
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: 0.0312
Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: -0.0005
Sparsity: 140.0 | Dead Features: 0 | Total Loss: 0.0345 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: -0.0815
Sparsity: 138.2 | Dead Features: 0 | Total Loss: 0.0417 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 12902400 | Self Similarity: -0.0037
Sparsity: 84.7 | Dead Features: 0 | Total Loss: 0.0927 | Reconstruction Loss: 0.0553 | L1 Loss: 0.0375 | l1_alpha: 8.0000e-04 | Tokens: 129

 12%|█▏        | 6405/55054 [03:09<23:22, 34.68it/s]

Sparsity: 11.6 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: -0.0139
Sparsity: 34.0 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: 0.0312
Sparsity: 51.3 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: -0.0005
Sparsity: 140.7 | Dead Features: 0 | Total Loss: 0.0346 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: -0.0818
Sparsity: 138.0 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 13107200 | Self Similarity: -0.0041
Sparsity: 94.0 | Dead Features: 0 | Total Loss: 0.0887 | Reconstruction Loss: 0.0499 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 131

 12%|█▏        | 6505/55054 [03:12<23:44, 34.08it/s]

Sparsity: 11.8 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: -0.0138
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: 0.0313
Sparsity: 51.4 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: -0.0006
Sparsity: 144.6 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: -0.0818
Sparsity: 139.5 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 13312000 | Self Similarity: -0.0042
Sparsity: 99.2 | Dead Features: 0 | Total Loss: 0.0942 | Reconstruction Loss: 0.0535 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 133

 12%|█▏        | 6605/55054 [03:15<22:51, 35.32it/s]

Sparsity: 13.1 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: -0.0138
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: 0.0313
Sparsity: 52.2 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: -0.0006
Sparsity: 146.9 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: -0.0818
Sparsity: 140.7 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 13516800 | Self Similarity: -0.0039
Sparsity: 97.1 | Dead Features: 0 | Total Loss: 0.0905 | Reconstruction Loss: 0.0518 | L1 Loss: 0.0386 | l1_alpha: 8.0000e-04 | Tokens: 135

 12%|█▏        | 6705/55054 [03:18<24:47, 32.51it/s]

Sparsity: 14.4 | Dead Features: 0 | Total Loss: 0.0099 | Reconstruction Loss: 0.0045 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: -0.0139
Sparsity: 38.8 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: 0.0313
Sparsity: 56.4 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: -0.0006
Sparsity: 151.1 | Dead Features: 0 | Total Loss: 0.0377 | Reconstruction Loss: 0.0175 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: -0.0818
Sparsity: 144.5 | Dead Features: 0 | Total Loss: 0.0473 | Reconstruction Loss: 0.0225 | L1 Loss: 0.0249 | l1_alpha: 8.0000e-04 | Tokens: 13721600 | Self Similarity: -0.0040
Sparsity: 101.9 | Dead Features: 0 | Total Loss: 0.0988 | Reconstruction Loss: 0.0569 | L1 Loss: 0.0419 | l1_alpha: 8.0000e-04 | Tokens: 13

 12%|█▏        | 6805/55054 [03:21<24:45, 32.49it/s]

Sparsity: 12.3 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: -0.0139
Sparsity: 35.5 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: 0.0311
Sparsity: 54.5 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: -0.0006
Sparsity: 148.1 | Dead Features: 0 | Total Loss: 0.0361 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: -0.0817
Sparsity: 140.4 | Dead Features: 0 | Total Loss: 0.0439 | Reconstruction Loss: 0.0215 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 13926400 | Self Similarity: -0.0040
Sparsity: 103.4 | Dead Features: 0 | Total Loss: 0.0951 | Reconstruction Loss: 0.0540 | L1 Loss: 0.0410 | l1_alpha: 8.0000e-04 | Tokens: 13

 13%|█▎        | 6905/55054 [03:24<24:10, 33.19it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: -0.0139
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: 0.0312
Sparsity: 52.9 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: -0.0006
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0345 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: -0.0816
Sparsity: 140.1 | Dead Features: 0 | Total Loss: 0.0412 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 14131200 | Self Similarity: -0.0040
Sparsity: 100.9 | Dead Features: 0 | Total Loss: 0.0875 | Reconstruction Loss: 0.0492 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 14

 13%|█▎        | 7005/55054 [03:27<24:57, 32.08it/s]

Sparsity: 11.6 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: -0.0138
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: 0.0312
Sparsity: 53.8 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: -0.0005
Sparsity: 148.5 | Dead Features: 0 | Total Loss: 0.0356 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0192 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: -0.0814
Sparsity: 144.6 | Dead Features: 0 | Total Loss: 0.0435 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0226 | l1_alpha: 8.0000e-04 | Tokens: 14336000 | Self Similarity: -0.0039
Sparsity: 105.4 | Dead Features: 0 | Total Loss: 0.0962 | Reconstruction Loss: 0.0544 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 14

 13%|█▎        | 7105/55054 [03:30<24:26, 32.71it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: -0.0139
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: 0.0312
Sparsity: 54.6 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: -0.0006
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: -0.0815
Sparsity: 141.1 | Dead Features: 0 | Total Loss: 0.0423 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 14540800 | Self Similarity: -0.0039
Sparsity: 101.9 | Dead Features: 0 | Total Loss: 0.0958 | Reconstruction Loss: 0.0555 | L1 Loss: 0.0403 | l1_alpha: 8.0000e-04 | Tokens: 14

 13%|█▎        | 7205/55054 [03:33<24:35, 32.42it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: -0.0139
Sparsity: 35.8 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: 0.0313
Sparsity: 53.8 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: -0.0007
Sparsity: 148.8 | Dead Features: 0 | Total Loss: 0.0360 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: -0.0813
Sparsity: 142.3 | Dead Features: 0 | Total Loss: 0.0443 | Reconstruction Loss: 0.0214 | L1 Loss: 0.0228 | l1_alpha: 8.0000e-04 | Tokens: 14745600 | Self Similarity: -0.0037
Sparsity: 104.1 | Dead Features: 0 | Total Loss: 0.0977 | Reconstruction Loss: 0.0559 | L1 Loss: 0.0418 | l1_alpha: 8.0000e-04 | Tokens: 14

 13%|█▎        | 7305/55054 [03:36<24:24, 32.60it/s]

Sparsity: 10.5 | Dead Features: 0 | Total Loss: 0.0079 | Reconstruction Loss: 0.0031 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: -0.0140
Sparsity: 33.0 | Dead Features: 0 | Total Loss: 0.0111 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: 0.0312
Sparsity: 50.7 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: -0.0007
Sparsity: 144.4 | Dead Features: 0 | Total Loss: 0.0343 | Reconstruction Loss: 0.0155 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: -0.0814
Sparsity: 138.5 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 14950400 | Self Similarity: -0.0039
Sparsity: 103.6 | Dead Features: 0 | Total Loss: 0.0896 | Reconstruction Loss: 0.0496 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 14

 13%|█▎        | 7405/55054 [03:39<23:52, 33.25it/s]

Sparsity: 14.2 | Dead Features: 0 | Total Loss: 0.0096 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: -0.0139
Sparsity: 37.4 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: 0.0312
Sparsity: 53.3 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: -0.0007
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: -0.0814
Sparsity: 140.6 | Dead Features: 0 | Total Loss: 0.0428 | Reconstruction Loss: 0.0211 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 15155200 | Self Similarity: -0.0037
Sparsity: 103.8 | Dead Features: 0 | Total Loss: 0.0943 | Reconstruction Loss: 0.0536 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 15

 14%|█▎        | 7505/55054 [03:42<24:28, 32.38it/s]

Sparsity: 14.1 | Dead Features: 0 | Total Loss: 0.0096 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: -0.0145
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: 0.0312
Sparsity: 50.1 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: -0.0007
Sparsity: 134.6 | Dead Features: 0 | Total Loss: 0.0358 | Reconstruction Loss: 0.0175 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: -0.0807
Sparsity: 133.5 | Dead Features: 0 | Total Loss: 0.0433 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 15360000 | Self Similarity: -0.0039
Sparsity: 99.1 | Dead Features: 0 | Total Loss: 0.0934 | Reconstruction Loss: 0.0535 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 153

 14%|█▍        | 7605/55054 [03:45<24:24, 32.41it/s]

Sparsity: 12.4 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: -0.0146
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: 0.0310
Sparsity: 50.6 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: -0.0006
Sparsity: 135.9 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0181 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: -0.0804
Sparsity: 139.1 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 15564800 | Self Similarity: -0.0037
Sparsity: 105.1 | Dead Features: 0 | Total Loss: 0.0921 | Reconstruction Loss: 0.0522 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 15

 14%|█▍        | 7705/55054 [03:48<21:08, 37.32it/s]

Sparsity: 13.7 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: -0.0146
Sparsity: 38.0 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: 0.0312
Sparsity: 52.8 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: -0.0005
Sparsity: 140.6 | Dead Features: 0 | Total Loss: 0.0355 | Reconstruction Loss: 0.0165 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: -0.0805
Sparsity: 135.5 | Dead Features: 0 | Total Loss: 0.0441 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 15769600 | Self Similarity: -0.0038
Sparsity: 102.4 | Dead Features: 0 | Total Loss: 0.0916 | Reconstruction Loss: 0.0499 | L1 Loss: 0.0417 | l1_alpha: 8.0000e-04 | Tokens: 15

 14%|█▍        | 7804/55054 [03:51<21:51, 36.02it/s]

Sparsity: 11.3 | Dead Features: 0 | Total Loss: 0.0084 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: -0.0146
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: 0.0310
Sparsity: 51.4 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: -0.0005
Sparsity: 145.2 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: -0.0805
Sparsity: 131.9 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 15974400 | Self Similarity: -0.0039
Sparsity: 104.2 | Dead Features: 0 | Total Loss: 0.0883 | Reconstruction Loss: 0.0489 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 15

 14%|█▍        | 7904/55054 [03:54<23:12, 33.85it/s]

Sparsity: 10.5 | Dead Features: 0 | Total Loss: 0.0080 | Reconstruction Loss: 0.0031 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: -0.0145
Sparsity: 33.8 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: 0.0311
Sparsity: 49.7 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: -0.0005
Sparsity: 139.5 | Dead Features: 0 | Total Loss: 0.0338 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0181 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: -0.0806
Sparsity: 133.9 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 16179200 | Self Similarity: -0.0040
Sparsity: 102.5 | Dead Features: 0 | Total Loss: 0.0872 | Reconstruction Loss: 0.0488 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 16

 15%|█▍        | 8004/55054 [03:57<23:46, 32.99it/s]

Sparsity: 12.2 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: -0.0145
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: 0.0313
Sparsity: 52.0 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: -0.0006
Sparsity: 141.4 | Dead Features: 0 | Total Loss: 0.0346 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: -0.0808
Sparsity: 135.3 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 16384000 | Self Similarity: -0.0039
Sparsity: 105.4 | Dead Features: 0 | Total Loss: 0.0910 | Reconstruction Loss: 0.0507 | L1 Loss: 0.0403 | l1_alpha: 8.0000e-04 | Tokens: 16

 15%|█▍        | 8104/55054 [04:00<24:06, 32.45it/s]

Sparsity: 11.3 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: -0.0143
Sparsity: 34.1 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: 0.0312
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: -0.0006
Sparsity: 140.6 | Dead Features: 0 | Total Loss: 0.0339 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0182 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: -0.0808
Sparsity: 130.3 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0206 | l1_alpha: 8.0000e-04 | Tokens: 16588800 | Self Similarity: -0.0040
Sparsity: 102.9 | Dead Features: 0 | Total Loss: 0.0858 | Reconstruction Loss: 0.0473 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 16

 15%|█▍        | 8204/55054 [04:03<24:04, 32.42it/s]

Sparsity: 12.3 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: -0.0143
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: 0.0313
Sparsity: 52.4 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: -0.0005
Sparsity: 144.1 | Dead Features: 0 | Total Loss: 0.0356 | Reconstruction Loss: 0.0167 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: -0.0807
Sparsity: 135.4 | Dead Features: 0 | Total Loss: 0.0439 | Reconstruction Loss: 0.0218 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 16793600 | Self Similarity: -0.0039
Sparsity: 94.6 | Dead Features: 0 | Total Loss: 0.0947 | Reconstruction Loss: 0.0553 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 167

 15%|█▌        | 8304/55054 [04:06<24:09, 32.26it/s]

Sparsity: 10.6 | Dead Features: 0 | Total Loss: 0.0079 | Reconstruction Loss: 0.0030 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: -0.0144
Sparsity: 32.8 | Dead Features: 0 | Total Loss: 0.0110 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: 0.0313
Sparsity: 49.6 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: -0.0005
Sparsity: 141.2 | Dead Features: 0 | Total Loss: 0.0340 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0182 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: -0.0808
Sparsity: 135.0 | Dead Features: 0 | Total Loss: 0.0420 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 16998400 | Self Similarity: -0.0035
Sparsity: 100.9 | Dead Features: 0 | Total Loss: 0.0891 | Reconstruction Loss: 0.0504 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 16

 15%|█▌        | 8404/55054 [04:09<24:14, 32.06it/s]

Sparsity: 11.1 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: -0.0142
Sparsity: 32.7 | Dead Features: 0 | Total Loss: 0.0111 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0046 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: 0.0315
Sparsity: 50.6 | Dead Features: 0 | Total Loss: 0.0165 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: -0.0006
Sparsity: 142.9 | Dead Features: 0 | Total Loss: 0.0338 | Reconstruction Loss: 0.0155 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: -0.0808
Sparsity: 132.6 | Dead Features: 0 | Total Loss: 0.0404 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 17203200 | Self Similarity: -0.0035
Sparsity: 90.7 | Dead Features: 0 | Total Loss: 0.0870 | Reconstruction Loss: 0.0513 | L1 Loss: 0.0357 | l1_alpha: 8.0000e-04 | Tokens: 172

 15%|█▌        | 8504/55054 [04:12<23:57, 32.37it/s]

Sparsity: 12.4 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: -0.0144
Sparsity: 38.1 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: 0.0314
Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: -0.0005
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: -0.0810
Sparsity: 138.7 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0227 | l1_alpha: 8.0000e-04 | Tokens: 17408000 | Self Similarity: -0.0036
Sparsity: 105.5 | Dead Features: 0 | Total Loss: 0.0926 | Reconstruction Loss: 0.0516 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 17

 16%|█▌        | 8604/55054 [04:15<21:33, 35.92it/s]

Sparsity: 10.9 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: -0.0143
Sparsity: 33.8 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: 0.0314
Sparsity: 52.2 | Dead Features: 0 | Total Loss: 0.0173 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: -0.0005
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: -0.0808
Sparsity: 133.4 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0210 | l1_alpha: 8.0000e-04 | Tokens: 17612800 | Self Similarity: -0.0035
Sparsity: 105.7 | Dead Features: 0 | Total Loss: 0.0893 | Reconstruction Loss: 0.0493 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 17

 16%|█▌        | 8704/55054 [04:18<23:34, 32.76it/s]

Sparsity: 11.8 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: -0.0143
Sparsity: 34.2 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: 0.0315
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: -0.0004
Sparsity: 142.7 | Dead Features: 0 | Total Loss: 0.0338 | Reconstruction Loss: 0.0157 | L1 Loss: 0.0181 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: -0.0808
Sparsity: 136.2 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0208 | l1_alpha: 8.0000e-04 | Tokens: 17817600 | Self Similarity: -0.0033
Sparsity: 105.3 | Dead Features: 0 | Total Loss: 0.0853 | Reconstruction Loss: 0.0466 | L1 Loss: 0.0387 | l1_alpha: 8.0000e-04 | Tokens: 17

 16%|█▌        | 8804/55054 [04:21<24:01, 32.09it/s]

Sparsity: 14.0 | Dead Features: 0 | Total Loss: 0.0096 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: -0.0144
Sparsity: 38.2 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: 0.0315
Sparsity: 55.0 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: -0.0005
Sparsity: 146.8 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: -0.0808
Sparsity: 139.4 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 18022400 | Self Similarity: -0.0033
Sparsity: 105.7 | Dead Features: 0 | Total Loss: 0.0865 | Reconstruction Loss: 0.0476 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 18

 16%|█▌        | 8904/55054 [04:24<23:24, 32.86it/s]

Sparsity: 12.0 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: -0.0145
Sparsity: 35.0 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: 0.0315
Sparsity: 51.8 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: -0.0005
Sparsity: 143.0 | Dead Features: 0 | Total Loss: 0.0352 | Reconstruction Loss: 0.0164 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: -0.0810
Sparsity: 138.3 | Dead Features: 0 | Total Loss: 0.0423 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 18227200 | Self Similarity: -0.0031
Sparsity: 102.4 | Dead Features: 0 | Total Loss: 0.0910 | Reconstruction Loss: 0.0517 | L1 Loss: 0.0393 | l1_alpha: 8.0000e-04 | Tokens: 18

 16%|█▋        | 9004/55054 [04:27<21:01, 36.52it/s]

Sparsity: 11.5 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: -0.0146
Sparsity: 34.1 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: 0.0314
Sparsity: 51.9 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: -0.0006
Sparsity: 142.7 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: -0.0809
Sparsity: 137.7 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 18432000 | Self Similarity: -0.0034
Sparsity: 106.0 | Dead Features: 0 | Total Loss: 0.0889 | Reconstruction Loss: 0.0493 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 18

 17%|█▋        | 9104/55054 [04:30<23:31, 32.56it/s]

Sparsity: 16.4 | Dead Features: 0 | Total Loss: 0.0109 | Reconstruction Loss: 0.0050 | L1 Loss: 0.0059 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: -0.0148
Sparsity: 42.0 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0082 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: 0.0315
Sparsity: 59.6 | Dead Features: 0 | Total Loss: 0.0187 | Reconstruction Loss: 0.0107 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: -0.0005
Sparsity: 151.7 | Dead Features: 0 | Total Loss: 0.0379 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0203 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: -0.0808
Sparsity: 147.1 | Dead Features: 0 | Total Loss: 0.0454 | Reconstruction Loss: 0.0222 | L1 Loss: 0.0232 | l1_alpha: 8.0000e-04 | Tokens: 18636800 | Self Similarity: -0.0034
Sparsity: 102.8 | Dead Features: 0 | Total Loss: 0.0960 | Reconstruction Loss: 0.0551 | L1 Loss: 0.0409 | l1_alpha: 8.0000e-04 | Tokens: 18

 17%|█▋        | 9204/55054 [04:33<23:15, 32.86it/s]

Sparsity: 12.1 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: -0.0147
Sparsity: 37.0 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: 0.0314
Sparsity: 54.4 | Dead Features: 0 | Total Loss: 0.0175 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: -0.0006
Sparsity: 141.9 | Dead Features: 0 | Total Loss: 0.0354 | Reconstruction Loss: 0.0167 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: -0.0807
Sparsity: 142.0 | Dead Features: 0 | Total Loss: 0.0422 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 18841600 | Self Similarity: -0.0033
Sparsity: 107.4 | Dead Features: 0 | Total Loss: 0.0888 | Reconstruction Loss: 0.0489 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 18

 17%|█▋        | 9304/55054 [04:36<23:12, 32.86it/s]

Sparsity: 11.9 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: -0.0147
Sparsity: 33.5 | Dead Features: 0 | Total Loss: 0.0110 | Reconstruction Loss: 0.0063 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: 0.0311
Sparsity: 49.8 | Dead Features: 0 | Total Loss: 0.0165 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: -0.0007
Sparsity: 139.3 | Dead Features: 0 | Total Loss: 0.0339 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0182 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: -0.0806
Sparsity: 135.6 | Dead Features: 0 | Total Loss: 0.0403 | Reconstruction Loss: 0.0193 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 19046400 | Self Similarity: -0.0032
Sparsity: 107.2 | Dead Features: 0 | Total Loss: 0.0858 | Reconstruction Loss: 0.0473 | L1 Loss: 0.0385 | l1_alpha: 8.0000e-04 | Tokens: 19

 17%|█▋        | 9404/55054 [04:39<21:54, 34.74it/s]

Sparsity: 13.5 | Dead Features: 0 | Total Loss: 0.0096 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: -0.0147
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0075 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: 0.0313
Sparsity: 57.3 | Dead Features: 0 | Total Loss: 0.0189 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0083 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: -0.0008
Sparsity: 144.0 | Dead Features: 0 | Total Loss: 0.0374 | Reconstruction Loss: 0.0179 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: -0.0808
Sparsity: 133.7 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0252 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 19251200 | Self Similarity: -0.0028
Sparsity: 109.1 | Dead Features: 0 | Total Loss: 0.0950 | Reconstruction Loss: 0.0526 | L1 Loss: 0.0424 | l1_alpha: 8.0000e-04 | Tokens: 19

 17%|█▋        | 9504/55054 [04:42<22:40, 33.49it/s]

Sparsity: 11.2 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: -0.0149
Sparsity: 34.7 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: 0.0315
Sparsity: 51.3 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: -0.0009
Sparsity: 141.5 | Dead Features: 0 | Total Loss: 0.0341 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: -0.0807
Sparsity: 135.4 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 19456000 | Self Similarity: -0.0032
Sparsity: 108.3 | Dead Features: 0 | Total Loss: 0.0872 | Reconstruction Loss: 0.0482 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 19

 17%|█▋        | 9604/55054 [04:45<22:52, 33.12it/s]

Sparsity: 13.0 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: -0.0149
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: 0.0315
Sparsity: 52.7 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: -0.0012
Sparsity: 146.2 | Dead Features: 0 | Total Loss: 0.0351 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: -0.0807
Sparsity: 139.9 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 19660800 | Self Similarity: -0.0033
Sparsity: 110.2 | Dead Features: 0 | Total Loss: 0.0877 | Reconstruction Loss: 0.0476 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens: 19

 18%|█▊        | 9704/55054 [04:48<22:44, 33.24it/s]

Sparsity: 13.4 | Dead Features: 0 | Total Loss: 0.0095 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: -0.0151
Sparsity: 36.8 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: 0.0310
Sparsity: 55.4 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: -0.0008
Sparsity: 148.4 | Dead Features: 0 | Total Loss: 0.0373 | Reconstruction Loss: 0.0176 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: -0.0807
Sparsity: 142.1 | Dead Features: 0 | Total Loss: 0.0455 | Reconstruction Loss: 0.0224 | L1 Loss: 0.0231 | l1_alpha: 8.0000e-04 | Tokens: 19865600 | Self Similarity: -0.0036
Sparsity: 110.7 | Dead Features: 0 | Total Loss: 0.0920 | Reconstruction Loss: 0.0509 | L1 Loss: 0.0411 | l1_alpha: 8.0000e-04 | Tokens: 19

 18%|█▊        | 9804/55054 [04:51<22:32, 33.45it/s]

Sparsity: 11.7 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: -0.0149
Sparsity: 33.4 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0065 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: 0.0311
Sparsity: 51.6 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: -0.0008
Sparsity: 141.5 | Dead Features: 0 | Total Loss: 0.0339 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: -0.0807
Sparsity: 133.6 | Dead Features: 0 | Total Loss: 0.0405 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 20070400 | Self Similarity: -0.0034
Sparsity: 105.6 | Dead Features: 0 | Total Loss: 0.0859 | Reconstruction Loss: 0.0470 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 20

 18%|█▊        | 9904/55054 [04:54<22:31, 33.41it/s]

Sparsity: 11.8 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: -0.0149
Sparsity: 37.1 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: 0.0313
Sparsity: 53.0 | Dead Features: 0 | Total Loss: 0.0173 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: -0.0007
Sparsity: 143.5 | Dead Features: 0 | Total Loss: 0.0351 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: -0.0805
Sparsity: 135.2 | Dead Features: 0 | Total Loss: 0.0425 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 20275200 | Self Similarity: -0.0035
Sparsity: 110.3 | Dead Features: 0 | Total Loss: 0.0892 | Reconstruction Loss: 0.0484 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 20

 18%|█▊        | 10004/55054 [04:57<22:09, 33.88it/s]

Sparsity: 12.7 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: -0.0149
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: 0.0313
Sparsity: 51.7 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: -0.0007
Sparsity: 144.7 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: -0.0808
Sparsity: 137.3 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 20480000 | Self Similarity: -0.0032
Sparsity: 109.0 | Dead Features: 0 | Total Loss: 0.0887 | Reconstruction Loss: 0.0488 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 20

 18%|█▊        | 10104/55054 [05:00<22:27, 33.36it/s]

Sparsity: 14.3 | Dead Features: 0 | Total Loss: 0.0099 | Reconstruction Loss: 0.0044 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: -0.0149
Sparsity: 39.1 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0078 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: 0.0314
Sparsity: 55.1 | Dead Features: 0 | Total Loss: 0.0182 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: -0.0007
Sparsity: 150.3 | Dead Features: 0 | Total Loss: 0.0369 | Reconstruction Loss: 0.0171 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: -0.0808
Sparsity: 142.7 | Dead Features: 0 | Total Loss: 0.0444 | Reconstruction Loss: 0.0218 | L1 Loss: 0.0226 | l1_alpha: 8.0000e-04 | Tokens: 20684800 | Self Similarity: -0.0035
Sparsity: 111.8 | Dead Features: 0 | Total Loss: 0.0939 | Reconstruction Loss: 0.0522 | L1 Loss: 0.0417 | l1_alpha: 8.0000e-04 | Tokens: 20

 19%|█▊        | 10204/55054 [05:03<22:19, 33.49it/s]

Sparsity: 12.3 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: -0.0150
Sparsity: 37.7 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: 0.0315
Sparsity: 53.7 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0100 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: -0.0008
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0356 | Reconstruction Loss: 0.0166 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: -0.0807
Sparsity: 136.9 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 20889600 | Self Similarity: -0.0033
Sparsity: 111.7 | Dead Features: 0 | Total Loss: 0.0906 | Reconstruction Loss: 0.0500 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 20

 19%|█▊        | 10304/55054 [05:06<22:33, 33.06it/s]

Sparsity: 14.2 | Dead Features: 0 | Total Loss: 0.0097 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: -0.0148
Sparsity: 37.8 | Dead Features: 0 | Total Loss: 0.0122 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: 0.0314
Sparsity: 54.5 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: -0.0008
Sparsity: 146.0 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: -0.0809
Sparsity: 142.9 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0204 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 21094400 | Self Similarity: -0.0032
Sparsity: 112.4 | Dead Features: 0 | Total Loss: 0.0892 | Reconstruction Loss: 0.0489 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 21

 19%|█▉        | 10404/55054 [05:09<22:12, 33.50it/s]

Sparsity: 13.6 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: -0.0147
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: 0.0313
Sparsity: 54.1 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: -0.0008
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: -0.0807
Sparsity: 141.9 | Dead Features: 0 | Total Loss: 0.0423 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 21299200 | Self Similarity: -0.0032
Sparsity: 111.2 | Dead Features: 0 | Total Loss: 0.0886 | Reconstruction Loss: 0.0483 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 21

 19%|█▉        | 10504/55054 [05:12<22:14, 33.38it/s]

Sparsity: 13.1 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: -0.0147
Sparsity: 38.3 | Dead Features: 0 | Total Loss: 0.0126 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: 0.0310
Sparsity: 54.8 | Dead Features: 0 | Total Loss: 0.0182 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: -0.0010
Sparsity: 149.6 | Dead Features: 0 | Total Loss: 0.0368 | Reconstruction Loss: 0.0171 | L1 Loss: 0.0197 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: -0.0808
Sparsity: 143.9 | Dead Features: 0 | Total Loss: 0.0446 | Reconstruction Loss: 0.0211 | L1 Loss: 0.0236 | l1_alpha: 8.0000e-04 | Tokens: 21504000 | Self Similarity: -0.0033
Sparsity: 108.8 | Dead Features: 0 | Total Loss: 0.0934 | Reconstruction Loss: 0.0526 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 21

 19%|█▉        | 10604/55054 [05:15<22:12, 33.36it/s]

Sparsity: 12.0 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: -0.0147
Sparsity: 35.4 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: 0.0313
Sparsity: 53.7 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0079 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: -0.0010
Sparsity: 144.1 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: -0.0807
Sparsity: 138.7 | Dead Features: 0 | Total Loss: 0.0416 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 21708800 | Self Similarity: -0.0032
Sparsity: 108.0 | Dead Features: 0 | Total Loss: 0.0862 | Reconstruction Loss: 0.0470 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 21

 19%|█▉        | 10704/55054 [05:18<22:04, 33.49it/s]

Sparsity: 12.3 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: -0.0148
Sparsity: 36.2 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: 0.0313
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: -0.0009
Sparsity: 146.4 | Dead Features: 0 | Total Loss: 0.0353 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: -0.0806
Sparsity: 140.4 | Dead Features: 0 | Total Loss: 0.0428 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 21913600 | Self Similarity: -0.0033
Sparsity: 110.6 | Dead Features: 0 | Total Loss: 0.0882 | Reconstruction Loss: 0.0483 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 21

 20%|█▉        | 10808/55054 [05:21<19:32, 37.74it/s]

Sparsity: 11.1 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: -0.0148
Sparsity: 34.0 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: 0.0311
Sparsity: 50.7 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0093 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: -0.0010
Sparsity: 141.0 | Dead Features: 0 | Total Loss: 0.0343 | Reconstruction Loss: 0.0164 | L1 Loss: 0.0180 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: -0.0807
Sparsity: 139.9 | Dead Features: 0 | Total Loss: 0.0409 | Reconstruction Loss: 0.0194 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 22118400 | Self Similarity: -0.0033
Sparsity: 112.3 | Dead Features: 0 | Total Loss: 0.0851 | Reconstruction Loss: 0.0461 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 22

 20%|█▉        | 10904/55054 [05:23<22:08, 33.23it/s]

Sparsity: 12.6 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: -0.0147
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: 0.0314
Sparsity: 52.8 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: -0.0009
Sparsity: 143.5 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: -0.0806
Sparsity: 135.6 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0202 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 22323200 | Self Similarity: -0.0033
Sparsity: 108.4 | Dead Features: 0 | Total Loss: 0.0856 | Reconstruction Loss: 0.0464 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 22

 20%|█▉        | 11004/55054 [05:26<22:01, 33.34it/s]

Sparsity: 12.1 | Dead Features: 0 | Total Loss: 0.0084 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: -0.0147
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: 0.0314
Sparsity: 50.8 | Dead Features: 0 | Total Loss: 0.0165 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: -0.0010
Sparsity: 143.0 | Dead Features: 0 | Total Loss: 0.0335 | Reconstruction Loss: 0.0152 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: -0.0808
Sparsity: 135.7 | Dead Features: 0 | Total Loss: 0.0397 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0207 | l1_alpha: 8.0000e-04 | Tokens: 22528000 | Self Similarity: -0.0033
Sparsity: 110.2 | Dead Features: 0 | Total Loss: 0.0823 | Reconstruction Loss: 0.0439 | L1 Loss: 0.0384 | l1_alpha: 8.0000e-04 | Tokens: 22

 20%|██        | 11104/55054 [05:29<21:26, 34.18it/s]

Sparsity: 13.2 | Dead Features: 0 | Total Loss: 0.0094 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: -0.0146
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0123 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: 0.0315
Sparsity: 55.6 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: -0.0009
Sparsity: 149.0 | Dead Features: 0 | Total Loss: 0.0367 | Reconstruction Loss: 0.0172 | L1 Loss: 0.0195 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: -0.0806
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0440 | Reconstruction Loss: 0.0217 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 22732800 | Self Similarity: -0.0031
Sparsity: 112.7 | Dead Features: 0 | Total Loss: 0.0955 | Reconstruction Loss: 0.0523 | L1 Loss: 0.0432 | l1_alpha: 8.0000e-04 | Tokens: 22

 20%|██        | 11204/55054 [05:32<21:51, 33.42it/s]

Sparsity: 10.6 | Dead Features: 0 | Total Loss: 0.0079 | Reconstruction Loss: 0.0030 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: -0.0151
Sparsity: 34.2 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: 0.0314
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: -0.0008
Sparsity: 142.8 | Dead Features: 0 | Total Loss: 0.0346 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: -0.0807
Sparsity: 137.9 | Dead Features: 0 | Total Loss: 0.0413 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 22937600 | Self Similarity: -0.0032
Sparsity: 113.2 | Dead Features: 0 | Total Loss: 0.0881 | Reconstruction Loss: 0.0469 | L1 Loss: 0.0413 | l1_alpha: 8.0000e-04 | Tokens: 22

 21%|██        | 11304/55054 [05:35<21:54, 33.29it/s]

Sparsity: 12.9 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: -0.0149
Sparsity: 36.7 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: 0.0315
Sparsity: 53.5 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: -0.0010
Sparsity: 144.0 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0165 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: -0.0807
Sparsity: 138.5 | Dead Features: 0 | Total Loss: 0.0427 | Reconstruction Loss: 0.0213 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 23142400 | Self Similarity: -0.0034
Sparsity: 106.1 | Dead Features: 0 | Total Loss: 0.0928 | Reconstruction Loss: 0.0527 | L1 Loss: 0.0400 | l1_alpha: 8.0000e-04 | Tokens: 23

 21%|██        | 11404/55054 [05:38<21:52, 33.26it/s]

Sparsity: 11.8 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0150
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0119 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: 0.0316
Sparsity: 53.1 | Dead Features: 0 | Total Loss: 0.0177 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0010
Sparsity: 145.0 | Dead Features: 0 | Total Loss: 0.0359 | Reconstruction Loss: 0.0169 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0807
Sparsity: 139.1 | Dead Features: 0 | Total Loss: 0.0441 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 23347200 | Self Similarity: -0.0032
Sparsity: 115.1 | Dead Features: 0 | Total Loss: 0.0913 | Reconstruction Loss: 0.0498 | L1 Loss: 0.0415 | l1_alpha: 8.0000e-04 | Tokens: 23

 21%|██        | 11504/55054 [05:41<21:40, 33.49it/s]

Sparsity: 11.6 | Dead Features: 0 | Total Loss: 0.0084 | Reconstruction Loss: 0.0034 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: -0.0149
Sparsity: 35.1 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: 0.0314
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: -0.0011
Sparsity: 145.9 | Dead Features: 0 | Total Loss: 0.0351 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: -0.0805
Sparsity: 139.1 | Dead Features: 0 | Total Loss: 0.0431 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0222 | l1_alpha: 8.0000e-04 | Tokens: 23552000 | Self Similarity: -0.0030
Sparsity: 114.3 | Dead Features: 0 | Total Loss: 0.0889 | Reconstruction Loss: 0.0479 | L1 Loss: 0.0410 | l1_alpha: 8.0000e-04 | Tokens: 23

 21%|██        | 11604/55054 [05:44<21:44, 33.30it/s]

Sparsity: 12.4 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: -0.0148
Sparsity: 35.7 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: 0.0313
Sparsity: 52.4 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: -0.0011
Sparsity: 144.5 | Dead Features: 0 | Total Loss: 0.0345 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0186 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: -0.0806
Sparsity: 136.6 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0203 | L1 Loss: 0.0216 | l1_alpha: 8.0000e-04 | Tokens: 23756800 | Self Similarity: -0.0029
Sparsity: 111.5 | Dead Features: 0 | Total Loss: 0.0858 | Reconstruction Loss: 0.0466 | L1 Loss: 0.0392 | l1_alpha: 8.0000e-04 | Tokens: 23

 21%|██▏       | 11704/55054 [05:47<21:00, 34.39it/s]

Sparsity: 12.9 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0040 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: -0.0149
Sparsity: 36.9 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: 0.0312
Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.0168 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: -0.0010
Sparsity: 142.5 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0165 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: -0.0806
Sparsity: 134.9 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 23961600 | Self Similarity: -0.0030
Sparsity: 109.3 | Dead Features: 0 | Total Loss: 0.0858 | Reconstruction Loss: 0.0463 | L1 Loss: 0.0396 | l1_alpha: 8.0000e-04 | Tokens: 23

 21%|██▏       | 11804/55054 [05:50<21:23, 33.71it/s]

Sparsity: 13.0 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: -0.0149
Sparsity: 35.9 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: 0.0314
Sparsity: 52.6 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: -0.0012
Sparsity: 144.9 | Dead Features: 0 | Total Loss: 0.0348 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: -0.0806
Sparsity: 136.6 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0212 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 24166400 | Self Similarity: -0.0032
Sparsity: 113.5 | Dead Features: 0 | Total Loss: 0.0890 | Reconstruction Loss: 0.0485 | L1 Loss: 0.0405 | l1_alpha: 8.0000e-04 | Tokens: 24

 22%|██▏       | 11904/55054 [05:53<21:35, 33.30it/s]

Sparsity: 14.2 | Dead Features: 0 | Total Loss: 0.0097 | Reconstruction Loss: 0.0041 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: -0.0148
Sparsity: 38.4 | Dead Features: 0 | Total Loss: 0.0127 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: 0.0313
Sparsity: 54.8 | Dead Features: 0 | Total Loss: 0.0180 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: -0.0011
Sparsity: 149.1 | Dead Features: 0 | Total Loss: 0.0366 | Reconstruction Loss: 0.0168 | L1 Loss: 0.0198 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: -0.0806
Sparsity: 142.2 | Dead Features: 0 | Total Loss: 0.0438 | Reconstruction Loss: 0.0214 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 24371200 | Self Similarity: -0.0031
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0921 | Reconstruction Loss: 0.0497 | L1 Loss: 0.0424 | l1_alpha: 8.0000e-04 | Tokens: 24

 22%|██▏       | 12004/55054 [05:56<21:36, 33.22it/s]

Sparsity: 13.0 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: -0.0148
Sparsity: 36.0 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: 0.0314
Sparsity: 52.3 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: -0.0012
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0354 | Reconstruction Loss: 0.0165 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: -0.0808
Sparsity: 145.8 | Dead Features: 0 | Total Loss: 0.0435 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0227 | l1_alpha: 8.0000e-04 | Tokens: 24576000 | Self Similarity: -0.0035
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0913 | Reconstruction Loss: 0.0499 | L1 Loss: 0.0414 | l1_alpha: 8.0000e-04 | Tokens: 24

 22%|██▏       | 12104/55054 [05:59<21:26, 33.38it/s]

Sparsity: 13.2 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: -0.0150
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: 0.0313
Sparsity: 52.5 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: -0.0013
Sparsity: 145.1 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: -0.0806
Sparsity: 136.0 | Dead Features: 0 | Total Loss: 0.0418 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 24780800 | Self Similarity: -0.0032
Sparsity: 111.3 | Dead Features: 0 | Total Loss: 0.0851 | Reconstruction Loss: 0.0454 | L1 Loss: 0.0397 | l1_alpha: 8.0000e-04 | Tokens: 24

 22%|██▏       | 12204/55054 [06:02<21:27, 33.28it/s]

Sparsity: 11.1 | Dead Features: 0 | Total Loss: 0.0082 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: -0.0151
Sparsity: 34.9 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0067 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: 0.0315
Sparsity: 51.9 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: -0.0012
Sparsity: 143.5 | Dead Features: 0 | Total Loss: 0.0342 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: -0.0806
Sparsity: 137.4 | Dead Features: 0 | Total Loss: 0.0408 | Reconstruction Loss: 0.0197 | L1 Loss: 0.0211 | l1_alpha: 8.0000e-04 | Tokens: 24985600 | Self Similarity: -0.0031
Sparsity: 114.3 | Dead Features: 0 | Total Loss: 0.0859 | Reconstruction Loss: 0.0460 | L1 Loss: 0.0399 | l1_alpha: 8.0000e-04 | Tokens: 24

 22%|██▏       | 12304/55054 [06:05<21:18, 33.44it/s]

Sparsity: 16.7 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0052 | L1 Loss: 0.0060 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: -0.0148
Sparsity: 40.3 | Dead Features: 0 | Total Loss: 0.0128 | Reconstruction Loss: 0.0076 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: 0.0314
Sparsity: 59.1 | Dead Features: 0 | Total Loss: 0.0183 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0080 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: -0.0013
Sparsity: 151.5 | Dead Features: 0 | Total Loss: 0.0376 | Reconstruction Loss: 0.0175 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: -0.0805
Sparsity: 147.7 | Dead Features: 0 | Total Loss: 0.0457 | Reconstruction Loss: 0.0215 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 25190400 | Self Similarity: -0.0032
Sparsity: 116.1 | Dead Features: 0 | Total Loss: 0.0941 | Reconstruction Loss: 0.0497 | L1 Loss: 0.0443 | l1_alpha: 8.0000e-04 | Tokens: 25

 23%|██▎       | 12404/55054 [06:08<21:39, 32.83it/s]

Sparsity: 11.8 | Dead Features: 0 | Total Loss: 0.0086 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: -0.0147
Sparsity: 33.1 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: 0.0316
Sparsity: 50.0 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: -0.0014
Sparsity: 143.0 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: -0.0802
Sparsity: 134.1 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0213 | l1_alpha: 8.0000e-04 | Tokens: 25395200 | Self Similarity: -0.0031
Sparsity: 107.5 | Dead Features: 0 | Total Loss: 0.0841 | Reconstruction Loss: 0.0462 | L1 Loss: 0.0379 | l1_alpha: 8.0000e-04 | Tokens: 25

 23%|██▎       | 12504/55054 [06:11<21:06, 33.59it/s]

Sparsity: 12.6 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: -0.0149
Sparsity: 36.4 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: 0.0311
Sparsity: 53.6 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: -0.0013
Sparsity: 143.0 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0165 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: -0.0804
Sparsity: 137.5 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0207 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 25600000 | Self Similarity: -0.0031
Sparsity: 114.5 | Dead Features: 0 | Total Loss: 0.0873 | Reconstruction Loss: 0.0465 | L1 Loss: 0.0408 | l1_alpha: 8.0000e-04 | Tokens: 25

 23%|██▎       | 12604/55054 [06:14<21:03, 33.60it/s]

Sparsity: 13.2 | Dead Features: 0 | Total Loss: 0.0092 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: -0.0150
Sparsity: 37.5 | Dead Features: 0 | Total Loss: 0.0124 | Reconstruction Loss: 0.0073 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: 0.0313
Sparsity: 54.8 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0099 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: -0.0012
Sparsity: 147.1 | Dead Features: 0 | Total Loss: 0.0354 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0193 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: -0.0802
Sparsity: 138.3 | Dead Features: 0 | Total Loss: 0.0426 | Reconstruction Loss: 0.0206 | L1 Loss: 0.0221 | l1_alpha: 8.0000e-04 | Tokens: 25804800 | Self Similarity: -0.0032
Sparsity: 116.9 | Dead Features: 0 | Total Loss: 0.0892 | Reconstruction Loss: 0.0484 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 25

 23%|██▎       | 12704/55054 [06:17<21:57, 32.14it/s]

Sparsity: 13.4 | Dead Features: 0 | Total Loss: 0.0093 | Reconstruction Loss: 0.0039 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: -0.0148
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0072 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: 0.0314
Sparsity: 55.5 | Dead Features: 0 | Total Loss: 0.0179 | Reconstruction Loss: 0.0103 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: -0.0012
Sparsity: 146.1 | Dead Features: 0 | Total Loss: 0.0361 | Reconstruction Loss: 0.0170 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: -0.0804
Sparsity: 143.2 | Dead Features: 0 | Total Loss: 0.0438 | Reconstruction Loss: 0.0212 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 26009600 | Self Similarity: -0.0031
Sparsity: 116.6 | Dead Features: 0 | Total Loss: 0.0888 | Reconstruction Loss: 0.0478 | L1 Loss: 0.0410 | l1_alpha: 8.0000e-04 | Tokens: 26

 23%|██▎       | 12804/55054 [06:20<21:36, 32.59it/s]

Sparsity: 13.6 | Dead Features: 0 | Total Loss: 0.0096 | Reconstruction Loss: 0.0042 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: -0.0146
Sparsity: 37.9 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0079 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: 0.0313
Sparsity: 54.6 | Dead Features: 0 | Total Loss: 0.0183 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: -0.0012
Sparsity: 141.9 | Dead Features: 0 | Total Loss: 0.0370 | Reconstruction Loss: 0.0181 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: -0.0802
Sparsity: 140.5 | Dead Features: 0 | Total Loss: 0.0452 | Reconstruction Loss: 0.0227 | L1 Loss: 0.0225 | l1_alpha: 8.0000e-04 | Tokens: 26214400 | Self Similarity: -0.0027
Sparsity: 114.9 | Dead Features: 0 | Total Loss: 0.0900 | Reconstruction Loss: 0.0495 | L1 Loss: 0.0404 | l1_alpha: 8.0000e-04 | Tokens: 26

 23%|██▎       | 12904/55054 [06:23<21:39, 32.43it/s]

Sparsity: 12.5 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: -0.0147
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: 0.0314
Sparsity: 50.3 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: -0.0013
Sparsity: 139.2 | Dead Features: 0 | Total Loss: 0.0339 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0181 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: -0.0799
Sparsity: 132.4 | Dead Features: 0 | Total Loss: 0.0410 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 26419200 | Self Similarity: -0.0028
Sparsity: 114.0 | Dead Features: 0 | Total Loss: 0.0852 | Reconstruction Loss: 0.0462 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 26

 24%|██▎       | 13004/55054 [06:26<21:31, 32.56it/s]

Sparsity: 12.5 | Dead Features: 0 | Total Loss: 0.0089 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: -0.0148
Sparsity: 34.3 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: 0.0314
Sparsity: 52.4 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: -0.0012
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: -0.0800
Sparsity: 134.9 | Dead Features: 0 | Total Loss: 0.0414 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0209 | l1_alpha: 8.0000e-04 | Tokens: 26624000 | Self Similarity: -0.0028
Sparsity: 99.0 | Dead Features: 0 | Total Loss: 0.0881 | Reconstruction Loss: 0.0498 | L1 Loss: 0.0383 | l1_alpha: 8.0000e-04 | Tokens: 266

 24%|██▍       | 13104/55054 [06:29<21:31, 32.48it/s]

Sparsity: 11.6 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0036 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: -0.0147
Sparsity: 34.1 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: 0.0314
Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: -0.0012
Sparsity: 146.6 | Dead Features: 0 | Total Loss: 0.0354 | Reconstruction Loss: 0.0160 | L1 Loss: 0.0194 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: -0.0800
Sparsity: 132.2 | Dead Features: 0 | Total Loss: 0.0436 | Reconstruction Loss: 0.0212 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 26828800 | Self Similarity: -0.0029
Sparsity: 108.2 | Dead Features: 0 | Total Loss: 0.0855 | Reconstruction Loss: 0.0448 | L1 Loss: 0.0406 | l1_alpha: 8.0000e-04 | Tokens: 26

 24%|██▍       | 13204/55054 [06:32<21:15, 32.80it/s]

Sparsity: 12.1 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: -0.0146
Sparsity: 33.9 | Dead Features: 0 | Total Loss: 0.0111 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: 0.0313
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0167 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: -0.0012
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0338 | Reconstruction Loss: 0.0153 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: -0.0800
Sparsity: 134.8 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 27033600 | Self Similarity: -0.0028
Sparsity: 110.8 | Dead Features: 0 | Total Loss: 0.0824 | Reconstruction Loss: 0.0434 | L1 Loss: 0.0390 | l1_alpha: 8.0000e-04 | Tokens: 27

 24%|██▍       | 13304/55054 [06:35<21:31, 32.34it/s]

Sparsity: 10.9 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: -0.0147
Sparsity: 34.1 | Dead Features: 0 | Total Loss: 0.0114 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: 0.0312
Sparsity: 51.3 | Dead Features: 0 | Total Loss: 0.0172 | Reconstruction Loss: 0.0097 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: -0.0013
Sparsity: 144.4 | Dead Features: 0 | Total Loss: 0.0356 | Reconstruction Loss: 0.0166 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: -0.0802
Sparsity: 136.9 | Dead Features: 0 | Total Loss: 0.0428 | Reconstruction Loss: 0.0208 | L1 Loss: 0.0220 | l1_alpha: 8.0000e-04 | Tokens: 27238400 | Self Similarity: -0.0029
Sparsity: 114.6 | Dead Features: 0 | Total Loss: 0.0883 | Reconstruction Loss: 0.0470 | L1 Loss: 0.0413 | l1_alpha: 8.0000e-04 | Tokens: 27

 24%|██▍       | 13404/55054 [06:38<21:31, 32.26it/s]

Sparsity: 10.9 | Dead Features: 0 | Total Loss: 0.0081 | Reconstruction Loss: 0.0031 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: -0.0147
Sparsity: 32.9 | Dead Features: 0 | Total Loss: 0.0112 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: 0.0312
Sparsity: 49.5 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0091 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: -0.0012
Sparsity: 139.5 | Dead Features: 0 | Total Loss: 0.0333 | Reconstruction Loss: 0.0153 | L1 Loss: 0.0179 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: -0.0801
Sparsity: 127.7 | Dead Features: 0 | Total Loss: 0.0394 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0202 | l1_alpha: 8.0000e-04 | Tokens: 27443200 | Self Similarity: -0.0029
Sparsity: 111.5 | Dead Features: 0 | Total Loss: 0.0818 | Reconstruction Loss: 0.0431 | L1 Loss: 0.0388 | l1_alpha: 8.0000e-04 | Tokens: 27

 25%|██▍       | 13504/55054 [06:41<21:38, 31.99it/s]

Sparsity: 12.4 | Dead Features: 0 | Total Loss: 0.0087 | Reconstruction Loss: 0.0035 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: -0.0146
Sparsity: 34.8 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0068 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: 0.0312
Sparsity: 51.2 | Dead Features: 0 | Total Loss: 0.0171 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: -0.0013
Sparsity: 142.6 | Dead Features: 0 | Total Loss: 0.0341 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0183 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: -0.0802
Sparsity: 135.6 | Dead Features: 0 | Total Loss: 0.0407 | Reconstruction Loss: 0.0195 | L1 Loss: 0.0212 | l1_alpha: 8.0000e-04 | Tokens: 27648000 | Self Similarity: -0.0027
Sparsity: 113.3 | Dead Features: 0 | Total Loss: 0.0826 | Reconstruction Loss: 0.0437 | L1 Loss: 0.0389 | l1_alpha: 8.0000e-04 | Tokens: 27

 25%|██▍       | 13604/55054 [06:45<21:20, 32.36it/s]

Sparsity: 13.1 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: -0.0145
Sparsity: 34.4 | Dead Features: 0 | Total Loss: 0.0117 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: 0.0313
Sparsity: 52.1 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0096 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: -0.0013
Sparsity: 146.4 | Dead Features: 0 | Total Loss: 0.0350 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0189 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: -0.0801
Sparsity: 137.5 | Dead Features: 0 | Total Loss: 0.0428 | Reconstruction Loss: 0.0209 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 27852800 | Self Similarity: -0.0027
Sparsity: 117.7 | Dead Features: 0 | Total Loss: 0.0897 | Reconstruction Loss: 0.0482 | L1 Loss: 0.0415 | l1_alpha: 8.0000e-04 | Tokens: 27

 25%|██▍       | 13704/55054 [06:48<20:49, 33.10it/s]

Sparsity: 12.7 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: -0.0144
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0120 | Reconstruction Loss: 0.0070 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: 0.0313
Sparsity: 54.2 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: -0.0014
Sparsity: 144.8 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0162 | L1 Loss: 0.0187 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: -0.0803
Sparsity: 140.3 | Dead Features: 0 | Total Loss: 0.0429 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0224 | l1_alpha: 8.0000e-04 | Tokens: 28057600 | Self Similarity: -0.0029
Sparsity: 118.6 | Dead Features: 0 | Total Loss: 0.0927 | Reconstruction Loss: 0.0500 | L1 Loss: 0.0427 | l1_alpha: 8.0000e-04 | Tokens: 28

 25%|██▌       | 13804/55054 [06:50<20:36, 33.35it/s]

Sparsity: 12.7 | Dead Features: 0 | Total Loss: 0.0090 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: -0.0144
Sparsity: 36.1 | Dead Features: 0 | Total Loss: 0.0118 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: 0.0313
Sparsity: 53.5 | Dead Features: 0 | Total Loss: 0.0176 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0078 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: -0.0014
Sparsity: 144.8 | Dead Features: 0 | Total Loss: 0.0351 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: -0.0801
Sparsity: 136.6 | Dead Features: 0 | Total Loss: 0.0427 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 28262400 | Self Similarity: -0.0028
Sparsity: 110.9 | Dead Features: 0 | Total Loss: 0.0875 | Reconstruction Loss: 0.0472 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 28

 25%|██▌       | 13904/55054 [06:53<20:34, 33.34it/s]

Sparsity: 11.2 | Dead Features: 0 | Total Loss: 0.0083 | Reconstruction Loss: 0.0032 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: -0.0145
Sparsity: 34.0 | Dead Features: 0 | Total Loss: 0.0116 | Reconstruction Loss: 0.0069 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: 0.0315
Sparsity: 52.9 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0094 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: -0.0015
Sparsity: 147.6 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0158 | L1 Loss: 0.0191 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: -0.0801
Sparsity: 139.1 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0219 | l1_alpha: 8.0000e-04 | Tokens: 28467200 | Self Similarity: -0.0029
Sparsity: 116.2 | Dead Features: 0 | Total Loss: 0.0879 | Reconstruction Loss: 0.0470 | L1 Loss: 0.0409 | l1_alpha: 8.0000e-04 | Tokens: 28

 25%|██▌       | 14004/55054 [06:56<20:21, 33.61it/s]

Sparsity: 10.1 | Dead Features: 0 | Total Loss: 0.0078 | Reconstruction Loss: 0.0030 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: -0.0144
Sparsity: 32.2 | Dead Features: 0 | Total Loss: 0.0110 | Reconstruction Loss: 0.0064 | L1 Loss: 0.0046 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: 0.0311
Sparsity: 50.6 | Dead Features: 0 | Total Loss: 0.0166 | Reconstruction Loss: 0.0092 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: -0.0013
Sparsity: 144.5 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0156 | L1 Loss: 0.0188 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: -0.0801
Sparsity: 135.2 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0196 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 28672000 | Self Similarity: -0.0028
Sparsity: 113.4 | Dead Features: 0 | Total Loss: 0.0844 | Reconstruction Loss: 0.0450 | L1 Loss: 0.0394 | l1_alpha: 8.0000e-04 | Tokens: 28

 26%|██▌       | 14104/55054 [06:59<20:36, 33.13it/s]

Sparsity: 11.8 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: -0.0144
Sparsity: 37.2 | Dead Features: 0 | Total Loss: 0.0125 | Reconstruction Loss: 0.0074 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: 0.0313
Sparsity: 55.0 | Dead Features: 0 | Total Loss: 0.0178 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: -0.0015
Sparsity: 151.4 | Dead Features: 0 | Total Loss: 0.0371 | Reconstruction Loss: 0.0171 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: -0.0801
Sparsity: 140.8 | Dead Features: 0 | Total Loss: 0.0460 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0244 | l1_alpha: 8.0000e-04 | Tokens: 28876800 | Self Similarity: -0.0025
Sparsity: 117.0 | Dead Features: 0 | Total Loss: 0.0960 | Reconstruction Loss: 0.0529 | L1 Loss: 0.0431 | l1_alpha: 8.0000e-04 | Tokens: 28

 26%|██▌       | 14204/55054 [07:02<20:30, 33.19it/s]

Sparsity: 10.1 | Dead Features: 0 | Total Loss: 0.0078 | Reconstruction Loss: 0.0029 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: -0.0144
Sparsity: 33.2 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: 0.0311
Sparsity: 50.9 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: -0.0014
Sparsity: 141.4 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0184 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: -0.0800
Sparsity: 134.4 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0200 | L1 Loss: 0.0215 | l1_alpha: 8.0000e-04 | Tokens: 29081600 | Self Similarity: -0.0025
Sparsity: 114.1 | Dead Features: 0 | Total Loss: 0.0854 | Reconstruction Loss: 0.0447 | L1 Loss: 0.0407 | l1_alpha: 8.0000e-04 | Tokens: 29

 26%|██▌       | 14304/55054 [07:05<20:18, 33.45it/s]

Sparsity: 11.7 | Dead Features: 0 | Total Loss: 0.0085 | Reconstruction Loss: 0.0033 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: -0.0145
Sparsity: 34.6 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0047 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: 0.0311
Sparsity: 52.0 | Dead Features: 0 | Total Loss: 0.0169 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0074 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: -0.0014
Sparsity: 141.3 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0163 | L1 Loss: 0.0181 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: -0.0798
Sparsity: 139.8 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0198 | L1 Loss: 0.0217 | l1_alpha: 8.0000e-04 | Tokens: 29286400 | Self Similarity: -0.0025
Sparsity: 114.1 | Dead Features: 0 | Total Loss: 0.0859 | Reconstruction Loss: 0.0458 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens: 29

 26%|██▌       | 14404/55054 [07:08<20:19, 33.33it/s]

Sparsity: 11.7 | Dead Features: 0 | Total Loss: 0.0088 | Reconstruction Loss: 0.0037 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: -0.0145
Sparsity: 35.3 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0066 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: 0.0311
Sparsity: 52.2 | Dead Features: 0 | Total Loss: 0.0170 | Reconstruction Loss: 0.0095 | L1 Loss: 0.0075 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: -0.0014
Sparsity: 143.8 | Dead Features: 0 | Total Loss: 0.0346 | Reconstruction Loss: 0.0161 | L1 Loss: 0.0185 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: -0.0798
Sparsity: 137.6 | Dead Features: 0 | Total Loss: 0.0415 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0214 | l1_alpha: 8.0000e-04 | Tokens: 29491200 | Self Similarity: -0.0027
Sparsity: 115.2 | Dead Features: 0 | Total Loss: 0.0847 | Reconstruction Loss: 0.0446 | L1 Loss: 0.0401 | l1_alpha: 8.0000e-04 | Tokens: 29

 26%|██▋       | 14504/55054 [07:11<20:19, 33.24it/s]

Sparsity: 12.8 | Dead Features: 0 | Total Loss: 0.0091 | Reconstruction Loss: 0.0038 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: -0.0145
Sparsity: 36.5 | Dead Features: 0 | Total Loss: 0.0121 | Reconstruction Loss: 0.0071 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: 0.0310
Sparsity: 54.2 | Dead Features: 0 | Total Loss: 0.0174 | Reconstruction Loss: 0.0098 | L1 Loss: 0.0076 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: -0.0013
Sparsity: 147.9 | Dead Features: 0 | Total Loss: 0.0349 | Reconstruction Loss: 0.0159 | L1 Loss: 0.0190 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: -0.0799
Sparsity: 141.9 | Dead Features: 0 | Total Loss: 0.0423 | Reconstruction Loss: 0.0205 | L1 Loss: 0.0218 | l1_alpha: 8.0000e-04 | Tokens: 29696000 | Self Similarity: -0.0027
Sparsity: 115.5 | Dead Features: 0 | Total Loss: 0.0864 | Reconstruction Loss: 0.0462 | L1 Loss: 0.0402 | l1_alpha: 8.0000e-04 | Tokens: 29

 27%|██▋       | 14604/55054 [07:14<20:19, 33.16it/s]

Sparsity: 14.9 | Dead Features: 0 | Total Loss: 0.0102 | Reconstruction Loss: 0.0046 | L1 Loss: 0.0056 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: -0.0145
Sparsity: 39.6 | Dead Features: 0 | Total Loss: 0.0136 | Reconstruction Loss: 0.0083 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: 0.0309
Sparsity: 58.6 | Dead Features: 0 | Total Loss: 0.0193 | Reconstruction Loss: 0.0112 | L1 Loss: 0.0081 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: -0.0012
Sparsity: 151.1 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0187 | L1 Loss: 0.0201 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: -0.0798
Sparsity: 153.6 | Dead Features: 0 | Total Loss: 0.0475 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0242 | l1_alpha: 8.0000e-04 | Tokens: 29900800 | Self Similarity: -0.0029
Sparsity: 121.2 | Dead Features: 0 | Total Loss: 0.0948 | Reconstruction Loss: 0.0517 | L1 Loss: 0.0431 | l1_alpha: 8.0000e-04 | Tokens: 29

 27%|██▋       | 14645/55054 [07:15<20:02, 33.60it/s]


KeyboardInterrupt: 

In [None]:
# Code that actually starts a full training run!

model_name = "EleutherAI/pythia-70m"
dataset_name = "Elriggs/openwebtext-100k" # "Elriggs/openwebtext-100k"
ratio = 1
layers = [0, 1, 2, 3, 4, 5]
wandb_log = False
seed = 0
split = "train"
epoches = 1

setup_execute_training(model_name,
                       dataset_name,
                       ratio,
                       layers,
                       seed,
                       wandb_log=wandb_log,
                       split=split,
                      epoches=epoches)

In [None]:
with torch.no_grad():
    torch.cuda.empty_cache()

In [None]:
# Code that actually starts a full training run!

model_name = "EleutherAI/pythia-70m"
dataset_name = "Elriggs/openwebtext-100k" # "Elriggs/openwebtext-100k"
ratio = 4
layers = [0, 1, 2, 3, 4, 5]
wandb_log = False
seed = 0
split = "train[:50000]"
epoches = 2

setup_execute_training(model_name,
                       dataset_name,
                       ratio,
                       layers,
                       seed,
                       wandb_log=wandb_log,
                       split=split,
                      epoches=epoches)

In [None]:
# Code that actually starts a full training run!

model_name = "EleutherAI/pythia-70m"
dataset_name = "Elriggs/openwebtext-100k" # "Elriggs/openwebtext-100k"
ratio = 4
layers = [0, 1, 2, 3, 4, 5]
wandb_log = False
seed = 0
split = "train[50000:]"
epoches = 2

setup_execute_training(model_name,
                       dataset_name,
                       ratio,
                       layers,
                       seed,
                       wandb_log=wandb_log,
                       split=split,
                      epoches=epoches)

In [None]:
# Code that actually starts a full training run!

model_name = "EleutherAI/pythia-160m"
dataset_name = "Elriggs/openwebtext-100k" # "Elriggs/openwebtext-100k"
ratio = 4
layers = [0, 1, 2, 3, 4, 5]
wandb_log = False
seed = 0
split = "train[:50000]"
epoches = 2

setup_execute_training(model_name,
                       dataset_name,
                       ratio,
                       layers,
                       seed,
                       wandb_log=wandb_log,
                       split=split,
                      epoches=epoches)

In [None]:
# Code that actually starts a full training run!

model_name = "EleutherAI/pythia-160m"
dataset_name = "Elriggs/openwebtext-100k" # "Elriggs/openwebtext-100k"
ratio = 4
layers = [0, 1, 2, 3, 4, 5]
wandb_log = False
seed = 0
split = "train[50000:]"
epoches = 2

setup_execute_training(model_name,
                       dataset_name,
                       ratio,
                       layers,
                       seed,
                       wandb_log=wandb_log,
                       split=split,
                      epoches=epoches)

In [None]:
# Code that actually starts a full training run!

model_name = "EleutherAI/pythia-410m"
dataset_name = "Elriggs/openwebtext-100k" # "Elriggs/openwebtext-100k"
ratio = 2
layers = [0, 1, 2, 3, 4, 5]
wandb_log = False
seed = 0
split = "train"
epoches = 1

setup_execute_training(model_name,
                       dataset_name,
                       ratio,
                       layers,
                       seed,
                       wandb_log=wandb_log,
                       split=split,
                      epoches=epoches)

Activation size: 1024


Found cached dataset parquet (/root/.cache/huggingface/datasets/Elriggs___parquet/Elriggs--openwebtext-100k-79076ecafee8a6d5/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
Loading cached processed dataset at /root/.cache/huggingface/datasets/Elriggs___parquet/Elriggs--openwebtext-100k-79076ecafee8a6d5/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-62b1708338b4f701_*_of_00008.arrow


Number of tokens: 112750592


  0%|          | 1/55054 [00:00<1:51:54,  8.20it/s]

Sparsity: 1016.9 | Dead Features: 2048 | Total Loss: 0.4088 | Reconstruction Loss: 0.2168 | L1 Loss: 0.1920 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 1.0000
Sparsity: 1031.2 | Dead Features: 2048 | Total Loss: 0.1260 | Reconstruction Loss: 0.0422 | L1 Loss: 0.0837 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: -0.0000
Sparsity: 1023.0 | Dead Features: 2048 | Total Loss: 0.1994 | Reconstruction Loss: 0.0835 | L1 Loss: 0.1158 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0009
Sparsity: 1012.4 | Dead Features: 2048 | Total Loss: 0.2024 | Reconstruction Loss: 0.0876 | L1 Loss: 0.1147 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0006
Sparsity: 1037.7 | Dead Features: 2048 | Total Loss: 0.2873 | Reconstruction Loss: 0.1550 | L1 Loss: 0.1324 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Similarity: 0.0005
Sparsity: 1023.0 | Dead Features: 2048 | Total Loss: 2.1262 | Reconstruction Loss: 1.9278 | L1 Loss: 0.1984 | l1_alpha: 8.0000e-04 | Tokens: 0 | Self Simi

  0%|          | 103/55054 [00:07<1:03:43, 14.37it/s]

Sparsity: 23.1 | Dead Features: 0 | Total Loss: 0.0691 | Reconstruction Loss: 0.0589 | L1 Loss: 0.0102 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0007
Sparsity: 4.0 | Dead Features: 0 | Total Loss: 0.0143 | Reconstruction Loss: 0.0115 | L1 Loss: 0.0028 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: -0.0375
Sparsity: 6.9 | Dead Features: 0 | Total Loss: 0.0366 | Reconstruction Loss: 0.0332 | L1 Loss: 0.0035 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0062
Sparsity: 5.4 | Dead Features: 0 | Total Loss: 0.0422 | Reconstruction Loss: 0.0394 | L1 Loss: 0.0028 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0168
Sparsity: 9.2 | Dead Features: 0 | Total Loss: 0.0578 | Reconstruction Loss: 0.0536 | L1 Loss: 0.0041 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similarity: 0.0096
Sparsity: 13.9 | Dead Features: 0 | Total Loss: 0.0899 | Reconstruction Loss: 0.0614 | L1 Loss: 0.0285 | l1_alpha: 8.0000e-04 | Tokens: 204800 | Self Similar

  0%|          | 203/55054 [00:14<1:08:09, 13.41it/s]

Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0549 | Reconstruction Loss: 0.0438 | L1 Loss: 0.0111 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0088
Sparsity: 3.9 | Dead Features: 0 | Total Loss: 0.0138 | Reconstruction Loss: 0.0111 | L1 Loss: 0.0027 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: -0.0400
Sparsity: 6.0 | Dead Features: 0 | Total Loss: 0.0323 | Reconstruction Loss: 0.0284 | L1 Loss: 0.0039 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0066
Sparsity: 3.2 | Dead Features: 0 | Total Loss: 0.0375 | Reconstruction Loss: 0.0349 | L1 Loss: 0.0026 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0190
Sparsity: 7.0 | Dead Features: 0 | Total Loss: 0.0478 | Reconstruction Loss: 0.0440 | L1 Loss: 0.0038 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similarity: 0.0108
Sparsity: 9.3 | Dead Features: 0 | Total Loss: 0.0701 | Reconstruction Loss: 0.0549 | L1 Loss: 0.0152 | l1_alpha: 8.0000e-04 | Tokens: 409600 | Self Similari

  1%|          | 303/55054 [00:21<1:03:06, 14.46it/s]

Sparsity: 22.9 | Dead Features: 0 | Total Loss: 0.0497 | Reconstruction Loss: 0.0376 | L1 Loss: 0.0121 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0110
Sparsity: 4.4 | Dead Features: 0 | Total Loss: 0.0133 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0027 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: -0.0419
Sparsity: 7.9 | Dead Features: 0 | Total Loss: 0.0317 | Reconstruction Loss: 0.0273 | L1 Loss: 0.0045 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0054
Sparsity: 6.6 | Dead Features: 0 | Total Loss: 0.0371 | Reconstruction Loss: 0.0338 | L1 Loss: 0.0033 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0194
Sparsity: 11.4 | Dead Features: 0 | Total Loss: 0.0468 | Reconstruction Loss: 0.0424 | L1 Loss: 0.0045 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Similarity: 0.0106
Sparsity: 10.1 | Dead Features: 0 | Total Loss: 0.0624 | Reconstruction Loss: 0.0538 | L1 Loss: 0.0086 | l1_alpha: 8.0000e-04 | Tokens: 614400 | Self Simila

  1%|          | 403/55054 [00:29<1:08:22, 13.32it/s]

Sparsity: 23.3 | Dead Features: 0 | Total Loss: 0.0456 | Reconstruction Loss: 0.0332 | L1 Loss: 0.0124 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0120
Sparsity: 5.5 | Dead Features: 0 | Total Loss: 0.0132 | Reconstruction Loss: 0.0105 | L1 Loss: 0.0027 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: -0.0441
Sparsity: 10.4 | Dead Features: 0 | Total Loss: 0.0301 | Reconstruction Loss: 0.0253 | L1 Loss: 0.0048 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0056
Sparsity: 8.9 | Dead Features: 0 | Total Loss: 0.0355 | Reconstruction Loss: 0.0318 | L1 Loss: 0.0037 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0204
Sparsity: 14.1 | Dead Features: 0 | Total Loss: 0.0446 | Reconstruction Loss: 0.0396 | L1 Loss: 0.0050 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Similarity: 0.0116
Sparsity: 11.8 | Dead Features: 0 | Total Loss: 0.0605 | Reconstruction Loss: 0.0520 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 819200 | Self Simil

  1%|          | 503/55054 [00:36<1:08:13, 13.33it/s]

Sparsity: 25.3 | Dead Features: 0 | Total Loss: 0.0430 | Reconstruction Loss: 0.0302 | L1 Loss: 0.0128 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0129
Sparsity: 6.1 | Dead Features: 0 | Total Loss: 0.0135 | Reconstruction Loss: 0.0108 | L1 Loss: 0.0027 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: -0.0457
Sparsity: 13.2 | Dead Features: 0 | Total Loss: 0.0285 | Reconstruction Loss: 0.0236 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0040
Sparsity: 12.5 | Dead Features: 0 | Total Loss: 0.0344 | Reconstruction Loss: 0.0304 | L1 Loss: 0.0040 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0210
Sparsity: 16.8 | Dead Features: 0 | Total Loss: 0.0431 | Reconstruction Loss: 0.0375 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Self Similarity: 0.0116
Sparsity: 13.9 | Dead Features: 0 | Total Loss: 0.0597 | Reconstruction Loss: 0.0511 | L1 Loss: 0.0086 | l1_alpha: 8.0000e-04 | Tokens: 1024000 | Sel

  1%|          | 603/55054 [00:43<1:07:16, 13.49it/s]

Sparsity: 28.1 | Dead Features: 0 | Total Loss: 0.0438 | Reconstruction Loss: 0.0304 | L1 Loss: 0.0134 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0139
Sparsity: 6.7 | Dead Features: 0 | Total Loss: 0.0134 | Reconstruction Loss: 0.0106 | L1 Loss: 0.0027 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: -0.0451
Sparsity: 16.4 | Dead Features: 0 | Total Loss: 0.0293 | Reconstruction Loss: 0.0241 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0051
Sparsity: 15.5 | Dead Features: 0 | Total Loss: 0.0357 | Reconstruction Loss: 0.0313 | L1 Loss: 0.0044 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0222
Sparsity: 20.3 | Dead Features: 0 | Total Loss: 0.0427 | Reconstruction Loss: 0.0369 | L1 Loss: 0.0058 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Self Similarity: 0.0123
Sparsity: 15.0 | Dead Features: 0 | Total Loss: 0.0600 | Reconstruction Loss: 0.0514 | L1 Loss: 0.0085 | l1_alpha: 8.0000e-04 | Tokens: 1228800 | Sel

  1%|▏         | 703/55054 [00:51<1:07:25, 13.43it/s]

Sparsity: 25.2 | Dead Features: 0 | Total Loss: 0.0388 | Reconstruction Loss: 0.0258 | L1 Loss: 0.0130 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0145
Sparsity: 7.1 | Dead Features: 0 | Total Loss: 0.0129 | Reconstruction Loss: 0.0101 | L1 Loss: 0.0027 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: -0.0441
Sparsity: 16.8 | Dead Features: 0 | Total Loss: 0.0276 | Reconstruction Loss: 0.0222 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0055
Sparsity: 15.8 | Dead Features: 0 | Total Loss: 0.0323 | Reconstruction Loss: 0.0277 | L1 Loss: 0.0046 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0230
Sparsity: 21.3 | Dead Features: 0 | Total Loss: 0.0419 | Reconstruction Loss: 0.0353 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Self Similarity: 0.0147
Sparsity: 17.0 | Dead Features: 0 | Total Loss: 0.0587 | Reconstruction Loss: 0.0496 | L1 Loss: 0.0091 | l1_alpha: 8.0000e-04 | Tokens: 1433600 | Sel

  1%|▏         | 803/55054 [00:58<1:07:38, 13.37it/s]

Sparsity: 29.5 | Dead Features: 0 | Total Loss: 0.0421 | Reconstruction Loss: 0.0281 | L1 Loss: 0.0139 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0155
Sparsity: 7.7 | Dead Features: 0 | Total Loss: 0.0131 | Reconstruction Loss: 0.0104 | L1 Loss: 0.0027 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: -0.0431
Sparsity: 18.2 | Dead Features: 0 | Total Loss: 0.0278 | Reconstruction Loss: 0.0225 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0065
Sparsity: 18.0 | Dead Features: 0 | Total Loss: 0.0336 | Reconstruction Loss: 0.0288 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0243
Sparsity: 22.3 | Dead Features: 0 | Total Loss: 0.0411 | Reconstruction Loss: 0.0348 | L1 Loss: 0.0063 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Self Similarity: 0.0174
Sparsity: 18.8 | Dead Features: 0 | Total Loss: 0.0581 | Reconstruction Loss: 0.0492 | L1 Loss: 0.0090 | l1_alpha: 8.0000e-04 | Tokens: 1638400 | Sel

  2%|▏         | 903/55054 [01:05<1:05:56, 13.69it/s]

Sparsity: 24.3 | Dead Features: 0 | Total Loss: 0.0353 | Reconstruction Loss: 0.0224 | L1 Loss: 0.0129 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0164
Sparsity: 7.3 | Dead Features: 0 | Total Loss: 0.0113 | Reconstruction Loss: 0.0088 | L1 Loss: 0.0026 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: -0.0428
Sparsity: 18.7 | Dead Features: 0 | Total Loss: 0.0266 | Reconstruction Loss: 0.0211 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0079
Sparsity: 17.0 | Dead Features: 0 | Total Loss: 0.0312 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0049 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0242
Sparsity: 22.0 | Dead Features: 0 | Total Loss: 0.0384 | Reconstruction Loss: 0.0319 | L1 Loss: 0.0065 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Self Similarity: 0.0213
Sparsity: 19.6 | Dead Features: 0 | Total Loss: 0.0552 | Reconstruction Loss: 0.0460 | L1 Loss: 0.0091 | l1_alpha: 8.0000e-04 | Tokens: 1843200 | Sel

  2%|▏         | 1003/55054 [01:13<1:07:01, 13.44it/s]

Sparsity: 29.6 | Dead Features: 0 | Total Loss: 0.0398 | Reconstruction Loss: 0.0257 | L1 Loss: 0.0140 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0172
Sparsity: 9.6 | Dead Features: 0 | Total Loss: 0.0140 | Reconstruction Loss: 0.0112 | L1 Loss: 0.0029 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: -0.0412
Sparsity: 19.7 | Dead Features: 0 | Total Loss: 0.0263 | Reconstruction Loss: 0.0210 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0109
Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0314 | Reconstruction Loss: 0.0263 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0255
Sparsity: 24.7 | Dead Features: 0 | Total Loss: 0.0391 | Reconstruction Loss: 0.0324 | L1 Loss: 0.0068 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Self Similarity: 0.0254
Sparsity: 21.4 | Dead Features: 0 | Total Loss: 0.0549 | Reconstruction Loss: 0.0455 | L1 Loss: 0.0094 | l1_alpha: 8.0000e-04 | Tokens: 2048000 | Sel

  2%|▏         | 1103/55054 [01:20<1:05:26, 13.74it/s]

Sparsity: 28.1 | Dead Features: 0 | Total Loss: 0.0374 | Reconstruction Loss: 0.0235 | L1 Loss: 0.0139 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0178
Sparsity: 8.9 | Dead Features: 0 | Total Loss: 0.0130 | Reconstruction Loss: 0.0102 | L1 Loss: 0.0027 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: -0.0385
Sparsity: 20.1 | Dead Features: 0 | Total Loss: 0.0266 | Reconstruction Loss: 0.0212 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0127
Sparsity: 20.4 | Dead Features: 0 | Total Loss: 0.0316 | Reconstruction Loss: 0.0264 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0271
Sparsity: 25.7 | Dead Features: 0 | Total Loss: 0.0393 | Reconstruction Loss: 0.0324 | L1 Loss: 0.0069 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Self Similarity: 0.0301
Sparsity: 23.9 | Dead Features: 0 | Total Loss: 0.0565 | Reconstruction Loss: 0.0466 | L1 Loss: 0.0099 | l1_alpha: 8.0000e-04 | Tokens: 2252800 | Sel

  2%|▏         | 1203/55054 [01:27<1:05:16, 13.75it/s]

Sparsity: 26.4 | Dead Features: 0 | Total Loss: 0.0357 | Reconstruction Loss: 0.0224 | L1 Loss: 0.0133 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0181
Sparsity: 7.5 | Dead Features: 0 | Total Loss: 0.0111 | Reconstruction Loss: 0.0086 | L1 Loss: 0.0025 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: -0.0368
Sparsity: 19.9 | Dead Features: 0 | Total Loss: 0.0256 | Reconstruction Loss: 0.0201 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0159
Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0300 | Reconstruction Loss: 0.0248 | L1 Loss: 0.0052 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0293
Sparsity: 25.1 | Dead Features: 0 | Total Loss: 0.0369 | Reconstruction Loss: 0.0300 | L1 Loss: 0.0069 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Self Similarity: 0.0349
Sparsity: 23.5 | Dead Features: 0 | Total Loss: 0.0528 | Reconstruction Loss: 0.0431 | L1 Loss: 0.0097 | l1_alpha: 8.0000e-04 | Tokens: 2457600 | Sel

  2%|▏         | 1303/55054 [01:34<1:04:52, 13.81it/s]

Sparsity: 25.9 | Dead Features: 0 | Total Loss: 0.0347 | Reconstruction Loss: 0.0216 | L1 Loss: 0.0132 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0186
Sparsity: 7.9 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0090 | L1 Loss: 0.0025 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: -0.0351
Sparsity: 19.5 | Dead Features: 0 | Total Loss: 0.0243 | Reconstruction Loss: 0.0189 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0187
Sparsity: 18.9 | Dead Features: 0 | Total Loss: 0.0284 | Reconstruction Loss: 0.0233 | L1 Loss: 0.0051 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0316
Sparsity: 25.5 | Dead Features: 0 | Total Loss: 0.0360 | Reconstruction Loss: 0.0290 | L1 Loss: 0.0070 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Self Similarity: 0.0416
Sparsity: 24.3 | Dead Features: 0 | Total Loss: 0.0520 | Reconstruction Loss: 0.0420 | L1 Loss: 0.0100 | l1_alpha: 8.0000e-04 | Tokens: 2662400 | Sel

  3%|▎         | 1403/55054 [01:42<1:05:46, 13.59it/s]

Sparsity: 23.1 | Dead Features: 0 | Total Loss: 0.0319 | Reconstruction Loss: 0.0192 | L1 Loss: 0.0127 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0192
Sparsity: 8.3 | Dead Features: 0 | Total Loss: 0.0115 | Reconstruction Loss: 0.0089 | L1 Loss: 0.0025 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: -0.0329
Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0245 | Reconstruction Loss: 0.0191 | L1 Loss: 0.0054 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0224
Sparsity: 20.0 | Dead Features: 0 | Total Loss: 0.0290 | Reconstruction Loss: 0.0237 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0337
Sparsity: 28.8 | Dead Features: 0 | Total Loss: 0.0383 | Reconstruction Loss: 0.0306 | L1 Loss: 0.0077 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Self Similarity: 0.0477
Sparsity: 25.6 | Dead Features: 0 | Total Loss: 0.0537 | Reconstruction Loss: 0.0436 | L1 Loss: 0.0101 | l1_alpha: 8.0000e-04 | Tokens: 2867200 | Sel

  3%|▎         | 1503/55054 [01:49<1:04:48, 13.77it/s]

Sparsity: 24.5 | Dead Features: 0 | Total Loss: 0.0328 | Reconstruction Loss: 0.0199 | L1 Loss: 0.0130 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0197
Sparsity: 8.0 | Dead Features: 0 | Total Loss: 0.0110 | Reconstruction Loss: 0.0085 | L1 Loss: 0.0025 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: -0.0312
Sparsity: 20.2 | Dead Features: 0 | Total Loss: 0.0240 | Reconstruction Loss: 0.0185 | L1 Loss: 0.0055 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0260
Sparsity: 19.8 | Dead Features: 0 | Total Loss: 0.0279 | Reconstruction Loss: 0.0226 | L1 Loss: 0.0053 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0359
Sparsity: 27.4 | Dead Features: 0 | Total Loss: 0.0359 | Reconstruction Loss: 0.0286 | L1 Loss: 0.0073 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Self Similarity: 0.0534
Sparsity: 25.7 | Dead Features: 0 | Total Loss: 0.0510 | Reconstruction Loss: 0.0409 | L1 Loss: 0.0102 | l1_alpha: 8.0000e-04 | Tokens: 3072000 | Sel

  3%|▎         | 1519/55054 [01:50<1:04:18, 13.88it/s]

# Model + Data

In [3]:
from transformers import AutoModel, AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification, GPTJForSequenceClassification


# Load in the model
def load_model(cfg):
    model = AutoModelForCausalLM.from_pretrained(cfg.model_name)
    model = model.to(cfg.device)
    tokenizer = AutoTokenizer.from_pretrained(cfg.model_name)
    return model, tokenizer

In [4]:
# Download the dataset
# TODO iteratively grab dataset?
def init_dataloader(cfg, model, tokenizer, split="train"):
    cfg.max_length = 256
    token_loader = setup_token_data(cfg, tokenizer, model, seed=cfg.seed, split=split)
    num_tokens = cfg.max_length*cfg.model_batch_size*len(token_loader)
    print(f"Number of tokens: {num_tokens}")
    cfg.total_tokens = num_tokens
    
    return token_loader

In [5]:
# Run 1 datapoint on model to get the activation size

def get_activation_size(cfg, model, tokenizer):
    text = "1"
    tensor_names = [cfg.tensor_name.format(layer=layer) for layer in cfg.layers]
    tokens = tokenizer(text, return_tensors="pt").input_ids.to(cfg.device)
    # Your activation name will be different. In the next cells, we will show you how to find it.
    with torch.no_grad():
        with Trace(model, tensor_names[0]) as ret:
            _ = model(tokens)
            representation = ret.output
            # check if instance tuple
            if(isinstance(representation, tuple)):
                representation = representation[0]
            activation_size = representation.shape[-1]
    print(f"Activation size: {activation_size}")
    cfg.activation_size = activation_size
    return activation_size

In [6]:
# text = "1"
# tokens = tokenizer(text, return_tensors="pt").input_ids.to(cfg.device)
# # Your activation name will be different. In the next cells, we will show you how to find it.
# with torch.no_grad():
#     with Trace(model, 'gpt_neox.layers.0.mlp') as ret:
#         _ = model(tokens)
#         representation = ret.output
#         # check if instance tuple
#         if(isinstance(representation, tuple)):
#             representation = representation[0]
#         activation_size = representation.shape[-1]
# print(f"Activation size: {activation_size}")
# cfg.activation_size = activation_size


In [7]:
# # Set target sparsity to 10% of activation_size if not set

# # NOT USED
# if cfg.sparsity is None:
#     cfg.sparsity = int(activation_size*0.05)
#     print(f"Target sparsity: {cfg.sparsity}")

# target_lower_sparsity = cfg.sparsity * 0.9
# target_upper_sparsity = cfg.sparsity * 1.1
# adjustment_factor = 0.1  # You can set this to whatever you like

# Sparse Autoencocer init

In [8]:
def init_autoencoder(cfg):
    autoencoders = []
    optimizers = []
    for layer in range(len(cfg.layers)):
        params = dict()
        n_dict_components = cfg.activation_size*cfg.ratio # Sparse Autoencoder Size
        params["encoder"] = torch.empty((n_dict_components, cfg.activation_size), device=cfg.device)
        nn.init.xavier_uniform_(params["encoder"])
    
        params["decoder"] = torch.empty((n_dict_components, cfg.activation_size), device=cfg.device)
        nn.init.xavier_uniform_(params["decoder"])
    
        params["encoder_bias"] = torch.empty((n_dict_components,), device=cfg.device)
        nn.init.zeros_(params["encoder_bias"])
    
        params["shift_bias"] = torch.empty((cfg.activation_size,), device=cfg.device)
        nn.init.zeros_(params["shift_bias"])
    
        autoencoder = AnthropicSAE(  # TiedSAE, UntiedSAE, AnthropicSAE
            # n_feats = n_dict_components, 
            # activation_size=cfg.activation_size,
            encoder=params["encoder"],
            encoder_bias=params["encoder_bias"],
            decoder=params["decoder"],
            shift_bias=params["shift_bias"],
        )
        autoencoder.to_device(cfg.device)
        autoencoder.set_grad()
    
        optimizer = torch.optim.Adam(
            [
                autoencoder.encoder, 
                autoencoder.encoder_bias,
                autoencoder.decoder,
                autoencoder.shift_bias,
            ], lr=cfg.lr)
        autoencoders.append(autoencoder)
        optimizers.append(optimizer)
    return autoencoders, optimizers

# Training Run

In [9]:
# tensor_names = ['gpt_neox.layers.0.mlp', 'gpt_neox.layers.5.mlp']

In [10]:
# original_bias = autoencoder.encoder_bias.clone().detach()
# Wandb setup
def setup_wandb(cfg, wandb_run_name):
    secrets = json.load(open("secrets.json"))
    wandb.login(key=secrets["wandb_key"])
    wandb.init(project="Sparse Coding >70m", config=dict(cfg), name=wandb_run_name)
    return wandb_run_name

In [11]:
def training_run(cfg, model, optimizers, autoencoders, token_loader):

    time_since_activation = torch.zeros(autoencoders[0].encoder.shape[0])
    total_activations = torch.zeros(autoencoders[0].encoder.shape[0])
    tensor_names = [cfg.tensor_name.format(layer=layer) for layer in cfg.layers]
    max_num_tokens = cfg.total_tokens # 100_000_000
    save_every = 30_000
    num_saved_so_far = 0

    # Freeze model parameters 
    model.eval()
    model.requires_grad_(False)
    model.to(cfg.device)
    
    last_encoder = autoencoders[0].encoder.clone().detach()
    assert len(cfg.layers) == len(tensor_names), "layers and tensor_names have different lengths"
    for epoch in range(cfg.num_epochs):
        for i, batch in enumerate(tqdm(token_loader)): #,total=int(max_num_tokens/(cfg.max_length*cfg.model_batch_size)))):
            tokens = batch["input_ids"].to(cfg.device)
            # print(f"tokens shape: {tokens.shape}")
            
            with torch.no_grad(): # As long as not doing KL divergence, don't need gradients for model
                
                #print(tensor_names)
                representations = []
                with TraceDict(model, tensor_names) as ret:
                    _ = model(tokens)
                    for tensor_name in tensor_names:
                        representations.append(ret[tensor_name].output)
                    assert not isinstance(representations[0], tuple), "representations is type tuple"
                    # print(len(representations), representations[0].shape)
                    # if(isinstance(representation, tuple)):
                    #     representation = representation[0]
            #print(f"representation is: {representation}")
            #print(f"representation shape is: {representation.shape}")
            
        
            # activation_saver.save_batch(layer_activations.clone().cpu().detach())
            for layer in range(len(cfg.layers)):
                representation = representations[layer]
                layer_activations = rearrange(representation, "b seq d_model -> (b seq) d_model")
                autoencoder = autoencoders[layer]
                optimizer = optimizers[layer]
                
                c = autoencoder.encode(layer_activations)
                x_hat = autoencoder.decode(c)
                
                reconstruction_loss = (x_hat - layer_activations).pow(2).mean()
                l1_loss = torch.norm(c, 1, dim=-1).mean()
                total_loss = reconstruction_loss + cfg.l1_alpha*l1_loss
            
                time_since_activation += 1
                time_since_activation = time_since_activation * (c.sum(dim=0).cpu()==0)
                # total_activations += c.sum(dim=0).cpu()
                if ((i) % 100 == 0): # Check here so first check is model w/o change
                    # self_similarity = torch.cosine_similarity(c, last_encoder, dim=-1).mean().cpu().item()
                    # Above is wrong, should be similarity between encoder and last encoder
                    self_similarity = torch.cosine_similarity(autoencoder.encoder, last_encoder, dim=-1).mean().cpu().item()
                    last_encoder = autoencoder.encoder.clone().detach()
            
                    num_tokens_so_far = i*cfg.max_length*cfg.model_batch_size
                    with torch.no_grad():
                        sparsity = (c != 0).float().mean(dim=0).sum().cpu().item()
                        # Count number of dead_features are zero
                        num_dead_features = (time_since_activation >= min(i, 200)).sum().item()
                    print(f"Sparsity: {sparsity:.1f} | Dead Features: {num_dead_features} | Total Loss: {total_loss:.4f} | Reconstruction Loss: {reconstruction_loss:.4f} | L1 Loss: {cfg.l1_alpha*l1_loss:.4f} | l1_alpha: {cfg.l1_alpha:.4e} | Tokens: {num_tokens_so_far} | Self Similarity: {self_similarity:.4f}")
                    
                    if cfg.wandb_log:
                        wandb.log({
                            'Sparsity': sparsity,
                            'Dead Features': num_dead_features,
                            'Total Loss': total_loss.item(),
                            'Reconstruction Loss': reconstruction_loss.item(),
                            'L1 Loss': (cfg.l1_alpha*l1_loss).item(),
                            'l1_alpha': cfg.l1_alpha,
                            'Tokens': num_tokens_so_far,
                            'Self Similarity': self_similarity
                        })
                    
                    dead_features = torch.zeros(autoencoder.encoder.shape[0])
                    
                    # if(num_tokens_so_far > max_num_tokens):
                    #     print(f"Reached max number of tokens: {max_num_tokens}")
                    #     break
                    
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()
    wandb.finish()
        # resample_period = 10000
        # if (i % resample_period == 0):
        #     # RESAMPLING
        #     with torch.no_grad():
        #         # Count number of dead_features are zero
        #         num_dead_features = (total_activations == 0).sum().item()
        #         print(f"Dead Features: {num_dead_features}")
                
        #     if num_dead_features > 0:
        #         print("Resampling!")
        #         # hyperparams:
        #         max_resample_tokens = 1000 # the number of token activations that we consider for inserting into the dictionary
        #         # compute loss of model on random subset of inputs
        #         resample_loader = setup_token_data(cfg, tokenizer, model, seed=i)
        #         num_resample_data = 0
    
        #         resample_activations = torch.empty(0, activation_size)
        #         resample_losses = torch.empty(0)
    
        #         for resample_batch in resample_loader:
        #             resample_tokens = resample_batch["input_ids"].to(cfg.device)
        #             with torch.no_grad(): # As long as not doing KL divergence, don't need gradients for model
        #                 with Trace(model, tensor_names[0]) as ret:
        #                     _ = model(resample_tokens)
        #                     representation = ret.output
        #                     if(isinstance(representation, tuple)):
        #                         representation = representation[0]
        #             layer_activations = rearrange(representation, "b seq d_model -> (b seq) d_model")
        #             resample_activations = torch.cat((resample_activations, layer_activations.detach().cpu()), dim=0)
    
        #             c = autoencoder.encode(layer_activations)
        #             x_hat = autoencoder.decode(c)
                    
        #             reconstruction_loss = (x_hat - layer_activations).pow(2).mean(dim=-1)
        #             l1_loss = torch.norm(c, 1, dim=-1)
        #             temp_loss = reconstruction_loss + cfg.l1_alpha*l1_loss
                    
        #             resample_losses = torch.cat((resample_losses, temp_loss.detach().cpu()), dim=0)
                    
        #             num_resample_data +=layer_activations.shape[0]
        #             if num_resample_data > max_resample_tokens:
        #                 break
    
                    
        #         # sample num_dead_features vectors of input activations
        #         probabilities = resample_losses**2
        #         probabilities /= probabilities.sum()
        #         sampled_indices = torch.multinomial(probabilities, num_dead_features, replacement=True)
        #         new_vectors = resample_activations[sampled_indices]
    
        #         # calculate average encoder norm of alive neurons
        #         alive_neurons = list((total_activations!=0))
        #         modified_columns = total_activations==0
        #         avg_norm = autoencoder.encoder.data[alive_neurons].norm(dim=-1).mean()
    
        #         # replace dictionary and encoder weights with vectors
        #         new_vectors = new_vectors / new_vectors.norm(dim=1, keepdim=True)
                
        #         params_to_modify = [autoencoder.encoder, autoencoder.encoder_bias]
    
        #         current_weights = autoencoder.encoder.data
        #         current_weights[modified_columns] = (new_vectors.to(cfg.device) * avg_norm * 0.02)
        #         autoencoder.encoder.data = current_weights
    
        #         current_weights = autoencoder.encoder_bias.data
        #         current_weights[modified_columns] = 0
        #         autoencoder.encoder_bias.data = current_weights
                
        #         if hasattr(autoencoder, 'decoder'):
        #             current_weights = autoencoder.decoder.data
        #             current_weights[modified_columns] = new_vectors.to(cfg.device)
        #             autoencoder.decoder.data = current_weights
        #             params_to_modify += [autoencoder.decoder]
    
        #         for param_group in optimizer.param_groups:
        #             for param in param_group['params']:
        #                 if any(param is d_ for d_ in params_to_modify):
        #                     # Extract the corresponding rows from m and v
        #                     m = optimizer.state[param]['exp_avg']
        #                     v = optimizer.state[param]['exp_avg_sq']
                            
        #                     # Update the m and v values for the modified columns
        #                     m[modified_columns] = 0  # Reset moving average for modified columns
        #                     v[modified_columns] = 0  # Reset squared moving average for modified columns
            
        #     total_activations = torch.zeros(autoencoder.encoder.shape[0])
        
        
        
    
        # if ((i+2) % save_every ==0): # save periodically but before big changes
        #     model_save_name = cfg.model_name.split("/")[-1]
        #     save_name = f"{model_save_name}_sp{cfg.sparsity}_r{cfg.ratio}_{tensor_names[0]}_ckpt{num_saved_so_far}"  # trim year
    
        #     # Make directory traiend_models if it doesn't exist
        #     import os
        #     if not os.path.exists("trained_models"):
        #         os.makedirs("trained_models")
        #     # Save model
        #     torch.save(autoencoder, f"trained_models/{save_name}.pt")
            
        #     num_saved_so_far += 1
    
        # # Running sparsity check
        # num_tokens_so_far = i*cfg.max_length*cfg.model_batch_size
        # if(num_tokens_so_far > 200000):
        #     if(i % 100 == 0):
        #         with torch.no_grad():
        #             sparsity = (c != 0).float().mean(dim=0).sum().cpu().item()
        #         if sparsity > target_upper_sparsity:
        #             cfg.l1_alpha *= (1 + adjustment_factor)
        #         elif sparsity < target_lower_sparsity:
        #             cfg.l1_alpha *= (1 - adjustment_factor)
        #         # print(f"Sparsity: {sparsity:.1f} | l1_alpha: {cfg.l1_alpha:.2e}")

## Duplicated training run

# Saving

In [12]:
def model_save(cfg, autoencoder, storage_path, filename, layer):
    model_save_name = cfg.model_name.split("/")[-1]

    # start_time = datetime.now().strftime("%Y%m%d-%H%M%S")

    # save_name = f"{model_save_name}_sp{cfg.sparsity}_r{cfg.ratio}_{tensor_names[0]}_{start_time}"  # trim year
    storage_path_name = "trained_models/" + storage_path + f"/layer_{layer}"
    # Make directory traiend_models if it doesn't exist
    if not os.path.exists(storage_path_name):
        os.makedirs(storage_path_name)
    # Save model
    filename = f"L{layer}_{filename}"
    
    torch.save(autoencoder, f"{storage_path_name}/{filename}.pt")
    print(f"Saved file at: {storage_path_name}/{filename}.pt")