<a href="https://colab.research.google.com/github/UC-Berkeley-I-School/DATASCI266-Final-Project-Sean-Rini/blob/main/notebooks/sparse_autoencoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Instructions:

1. Install pipx
```
# Linux
sudo apt update
sudo apt install pipx
pipx ensurepath
sudo pipx ensurepath
```

2. Install poetry
```
pipx install poetry
source ~/.bashrc
poetry config virtualenvs.in-project false
poetry config virtualenvs.create false
```

3. Install/build sparse_autoencoders
```
git clone git@github.com:ai-safety-foundation/sparse_autoencoder.git
cd sparse_autoencoder
```
OR (if you don't want to setup SSH auth to GitHub):
```
wget https://github.com/ai-safety-foundation/sparse_autoencoder/archive/refs/heads/main.zip
unzip main.zip
cd sparse_autoencoder-main
```
then
```
poetry install --no-root
```

# take 1

Move in `"/content"`

In [1]:
import os
os.chdir('/content')

Setup git email and username

In [2]:
 !git config --global user.email "23294618+seansica@users.noreply.github.com"
 !git config --global user.name "Sean Sica"

Clone the project from git

In [3]:
 # https://github.com/ai-safety-foundation/sparse_autoencoder.git
 !git clone https://@github.com/ai-safety-foundation/sparse_autoencoder.git

Cloning into 'sparse_autoencoder'...
remote: Enumerating objects: 9230, done.[K
remote: Counting objects: 100% (1452/1452), done.[K
remote: Compressing objects: 100% (494/494), done.[K
remote: Total 9230 (delta 855), reused 1203 (delta 738), pack-reused 7778[K
Receiving objects: 100% (9230/9230), 12.44 MiB | 16.89 MiB/s, done.
Resolving deltas: 100% (5037/5037), done.


Install poetry from pip

In [4]:
!pip install poetry

Collecting poetry
  Downloading poetry-1.8.3-py3-none-any.whl (249 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.9/249.9 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Collecting cleo<3.0.0,>=2.1.0 (from poetry)
  Downloading cleo-2.1.0-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting crashtest<0.5.0,>=0.4.1 (from poetry)
  Downloading crashtest-0.4.1-py3-none-any.whl (7.6 kB)
Collecting dulwich<0.22.0,>=0.21.2 (from poetry)
  Downloading dulwich-0.21.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (514 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m514.7/514.7 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
Collecting installer<0.8.0,>=0.7.0 (from poetry)
  Downloading installer-0.7.0-py3-none-any.whl (453 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m453.8/453.8 kB[0m [31m13.6 MB/s[0m eta [

Move in the project folder (created after cloning the repository from git)



In [3]:
import os
os.chdir("/content/sparse_autoencoder")

Configure poetry not to create virtual environments in the project folder

In [6]:
!poetry config virtualenvs.in-project false

Install all dependencies and eventually add extra dependencies

In [10]:
!poetry install --no-ansi
# !poetry run pip install <Extra Dependency Name>

Installing dependencies from lock file

No dependencies to install or update

Installing the current project: sparse_autoencoder (0.0.0)


Note output from the `poetry install`:
```
Creating virtualenv sparse-autoencoder-0v-Hz7gT-py3.10 in /root/.cache/pypoetry/virtualenvs
```

Add poetry virtual environment to python path so that all installed dependencies can be found by the python interpreter

In [4]:
import sys
venv_name = "sparse-autoencoder-0v-Hz7gT-py3.10"
python_version = "3.10"
sys.path.append(f"/root/.cache/pypoetry/virtualenvs/{venv_name}/lib/python{python_version}/site-packages")

In [5]:
from sparse_autoencoder import (
    ActivationResamplerHyperparameters,
    AutoencoderHyperparameters,
    Hyperparameters,
    LossHyperparameters,
    Method,
    OptimizerHyperparameters,
    Parameter,
    PipelineHyperparameters,
    SourceDataHyperparameters,
    SourceModelHyperparameters,
    SweepConfig,
    sweep,
)

In [6]:
def train_gpt_small_mlp_layers(
    expansion_factor: int = 4,
    n_layers: int = 12,
) -> None:
    """Run a new sweep experiment on GPT 2 Small's MLP layers.

    Args:
        expansion_factor: Expansion factor for the autoencoder.
        n_layers: Number of layers to train on. Max is 12.

    """
    sweep_config = SweepConfig(
        parameters=Hyperparameters(
            loss=LossHyperparameters(
                l1_coefficient=Parameter(max=0.03, min=0.008),
            ),
            optimizer=OptimizerHyperparameters(
                lr=Parameter(max=0.001, min=0.00001),
            ),
            source_model=SourceModelHyperparameters(
                name=Parameter("gpt2"),
                cache_names=Parameter(
                    [f"blocks.{layer}.hook_mlp_out" for layer in range(n_layers)]
                ),
                hook_dimension=Parameter(768),
            ),
            source_data=SourceDataHyperparameters(
                dataset_path=Parameter("alancooney/sae-monology-pile-uncopyrighted-tokenizer-gpt2"),
                context_size=Parameter(256),
                pre_tokenized=Parameter(value=True),
                pre_download=Parameter(value=False),  # Default to streaming the dataset
            ),
            autoencoder=AutoencoderHyperparameters(
                expansion_factor=Parameter(value=expansion_factor)
            ),
            pipeline=PipelineHyperparameters(
                max_activations=Parameter(1_000_000_000),
                checkpoint_frequency=Parameter(100_000_000),
                validation_frequency=Parameter(100_000_000),
                max_store_size=Parameter(1_000_000),
            ),
            activation_resampler=ActivationResamplerHyperparameters(
                resample_interval=Parameter(200_000_000),
                n_activations_activity_collate=Parameter(100_000_000),
                threshold_is_dead_portion_fires=Parameter(1e-6),
                max_n_resamples=Parameter(4),
            ),
        ),
        method=Method.RANDOM,
    )

    sweep(sweep_config=sweep_config)

Troubleshooting:
```
ServiceStartProcessError: The wandb service process exited with 1. Ensure that `sys.executable` is a valid python interpreter. You can override it with the `_executable` setting or with the `WANDB__EXECUTABLE` environment variable.
```

https://github.com/wandb/wandb/issues/5765 : DID NOT WORK

In [9]:
import os
os.chdir("/content/sparse_autoencoder")
!poetry run python test.py

[34m[1mwandb[0m: [32m[41mERROR[0m Connection to wandb service failed: [Errno 111] Connection refused. 
Traceback (most recent call last):
  File "/root/.cache/pypoetry/virtualenvs/sparse-autoencoder-0v-Hz7gT-py3.10/lib/python3.10/site-packages/wandb/sdk/wandb_manager.py", line 116, in _service_connect
    svc_iface._svc_connect(port=port)
  File "/root/.cache/pypoetry/virtualenvs/sparse-autoencoder-0v-Hz7gT-py3.10/lib/python3.10/site-packages/wandb/sdk/service/service_sock.py", line 30, in _svc_connect
    self._sock_client.connect(port=port)
  File "/root/.cache/pypoetry/virtualenvs/sparse-autoencoder-0v-Hz7gT-py3.10/lib/python3.10/site-packages/wandb/sdk/lib/sock_client.py", line 102, in connect
    s.connect(("localhost", port))
ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/content/sparse_autoencoder/test.py", line 64, in <module>
    train_gpt_small_mlp_l

# take 2

In [11]:
VENV_PATH_ROOT = "/content/sparse_autoencoder-main/.venv"
VENV_PATH = f"{VENV_PATH_ROOT}/lib/python3.10/site-packages"

In [12]:
!ls $VENV_PATH

accelerate				       notebook-7.0.7.dist-info
accelerate-0.26.1.dist-info		       notebook_shim
aiohttp					       notebook_shim-0.2.3.dist-info
aiohttp-3.9.3.dist-info			       numpy
aiosignal				       numpy-1.26.3.dist-info
aiosignal-1.3.1.dist-info		       numpy.libs
annotated_types				       nvfuser
annotated_types-0.6.0.dist-info		       nvidia
anyio					       nvidia_cublas_cu12-12.1.3.1.dist-info
anyio-4.2.0.dist-info			       nvidia_cuda_cupti_cu12-12.1.105.dist-info
appdirs-1.4.4.dist-info			       nvidia_cuda_nvrtc_cu12-12.1.105.dist-info
appdirs.py				       nvidia_cuda_runtime_cu12-12.1.105.dist-info
argon2					       nvidia_cudnn_cu12-8.9.2.26.dist-info
argon2_cffi-23.1.0.dist-info		       nvidia_cufft_cu12-11.0.2.54.dist-info
_argon2_cffi_bindings			       nvidia_curand_cu12-10.3.2.106.dist-info
argon2_cffi_bindings-21.2.0.dist-info	       nvidia_cusolver_cu12-11.4.5.107.dist-info
arrow					       nvidia_cusparse_cu12-12.1.0.106.dist-info
arrow-1.3.0.dist-info			       n

Now, let's add poetry virtual environment to python path so that all installed dependencies can be found by the python interpreter.

In [34]:
! echo $PYTHONPATH
%env PYTHONPATH="$/env/python:/content/sparse_autoencoder-main/.venv/lib/python3.10/site-packages"
! echo $PYTHONPATH

"$/env/python:/content/sparse_autoencoder-main"
env: PYTHONPATH="$/env/python:/content/sparse_autoencoder-main/.venv/lib/python3.10/site-packages"
"$/env/python:/content/sparse_autoencoder-main/.venv/lib/python3.10/site-packages"


In [16]:
# Check if we're in Colab
try:
    import google.colab  # noqa: F401 # type: ignore

    in_colab = True
except ImportError:
    in_colab = False

#  Install if in Colab
if in_colab:
    # %pip install sparse_autoencoder transformer_lens transformers wandb
    # %pip install sparse_autoencoder_fork1==0.0.3 transformer_lens transformers wandb
    pass

# Otherwise enable hot reloading in dev mode
if not in_colab:
    %load_ext autoreload
    %autoreload 2

In [27]:
%cd /content/sparse_autoencoder-main
!poetry export > requirements.txt

/content/sparse_autoencoder-main
In order to avoid a breaking change and make your automation forward-compatible, please install poetry-plugin-export explicitly. See https://python-poetry.org/docs/plugins/#using-plugins for details on how to install a plugin.


In [28]:
%pip install -r requirements.txt --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.5/162.5 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m166.4/166.4 kB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m8.5 M

In [35]:
import os

from sparse_autoencoder import (
    ActivationResamplerHyperparameters,
    AutoencoderHyperparameters,
    Hyperparameters,
    LossHyperparameters,
    Method,
    OptimizerHyperparameters,
    Parameter,
    PipelineHyperparameters,
    SourceDataHyperparameters,
    SourceModelHyperparameters,
    SweepConfig,
    sweep,
)

AttributeError: module 'torch._functorch.eager_transforms' has no attribute 'grad_and_value'

In [None]:
!unzip sparse_autoencoder.zip && cd sparse_autoencoder

Archive:  sparse_autoencoder.zip
   creating: sparse_autoencoder/
  inflating: sparse_autoencoder/mkdocs.yml  
  inflating: sparse_autoencoder/LICENSE  
  inflating: __MACOSX/sparse_autoencoder/._LICENSE  
   creating: sparse_autoencoder/sparse_autoencoder/
  inflating: sparse_autoencoder/.pre-commit-config.yaml  
  inflating: sparse_autoencoder/pyproject.toml  
   creating: sparse_autoencoder/.checkpoints/
   creating: sparse_autoencoder/docs/
   creating: sparse_autoencoder/.devcontainer/
  inflating: sparse_autoencoder/README.md  
  inflating: sparse_autoencoder/.gitignore  
   creating: sparse_autoencoder/.github/
  inflating: sparse_autoencoder/poetry.lock  
   creating: sparse_autoencoder/.git/
   creating: sparse_autoencoder/.vscode/
  inflating: sparse_autoencoder/test.ipynb  
   creating: sparse_autoencoder/wandb/
   creating: sparse_autoencoder/sparse_autoencoder/metrics/
   creating: sparse_autoencoder/sparse_autoencoder/source_data/
   creating: sparse_autoencoder/sparse_au

In [None]:
!cd /content/sparse_autoencoder && pwd

/content/sparse_autoencoder


In [None]:
import os

%cd /content/sparse_autoencoder

from sparse_autoencoder import (
    ActivationResamplerHyperparameters,
    AutoencoderHyperparameters,
    Hyperparameters,
    LossHyperparameters,
    Method,
    OptimizerHyperparameters,
    Parameter,
    PipelineHyperparameters,
    SourceDataHyperparameters,
    SourceModelHyperparameters,
    SweepConfig,
    sweep,
)

/content/sparse_autoencoder


ImportError: cannot import name 'ActivationResamplerHyperparameters' from 'sparse_autoencoder' (unknown location)

In [None]:
def train_gpt_small_mlp_layers(
    expansion_factor: int = 4,
    n_layers: int = 12,
) -> None:
    """Run a new sweep experiment on GPT 2 Small's MLP layers.

    Args:
        expansion_factor: Expansion factor for the autoencoder.
        n_layers: Number of layers to train on. Max is 12.

    """
    sweep_config = SweepConfig(
        parameters=Hyperparameters(
            loss=LossHyperparameters(
                l1_coefficient=Parameter(max=0.03, min=0.008),
            ),
            optimizer=OptimizerHyperparameters(
                lr=Parameter(max=0.001, min=0.00001),
            ),
            source_model=SourceModelHyperparameters(
                name=Parameter("gpt2"),
                cache_names=Parameter(
                    [f"blocks.{layer}.hook_mlp_out" for layer in range(n_layers)]
                ),
                hook_dimension=Parameter(768),
            ),
            source_data=SourceDataHyperparameters(
                dataset_path=Parameter("alancooney/sae-monology-pile-uncopyrighted-tokenizer-gpt2"),
                context_size=Parameter(256),
                pre_tokenized=Parameter(value=True),
                pre_download=Parameter(value=False),  # Default to streaming the dataset
            ),
            autoencoder=AutoencoderHyperparameters(
                expansion_factor=Parameter(value=expansion_factor)
            ),
            pipeline=PipelineHyperparameters(
                max_activations=Parameter(1_000_000_000),
                checkpoint_frequency=Parameter(100_000_000),
                validation_frequency=Parameter(100_000_000),
                max_store_size=Parameter(1_000_000),
            ),
            activation_resampler=ActivationResamplerHyperparameters(
                resample_interval=Parameter(200_000_000),
                n_activations_activity_collate=Parameter(100_000_000),
                threshold_is_dead_portion_fires=Parameter(1e-6),
                max_n_resamples=Parameter(4),
            ),
        ),
        method=Method.RANDOM,
    )

    sweep(sweep_config=sweep_config)

In [None]:
train_gpt_small_mlp_layers()