In [12]:
import subprocess
import os
import sys
project_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_dir)
sys.path.append('../src')

from config import MODEL_FP_MAP

In [None]:
models = [
    # 'gptj-6b',
    # 'pythia-6.9b',
    # 'llama-7b',
    # 'llama2-7b',
    # 'llama2i-7b',
    # 'llama2-13b',
    # 'llama2i-13b',
    # 'llama2-70b',
    # 'llama3_0-8b',
    # 'llama3_0-70b',
    # 'llama3-8b',
    # 'llama3i-8b',
    # 'llama3-70b',
    # 'llama3i-70b',
    # 'llama3.2-3b',
    # 'mistral1-7b',
    # 'mistral3-7b',
    # 'mistral3i-7b',
    # 'gemma2-2b',
    # 'gemma2-9b',
    # 'gemma2i-9b',
    # 'gemma2-27b',
    # 'qwen2-1.5b', 
    # 'qwen2-7b',
    # 'qwen2i-7b',  
    # 'qwen2-72b',
    # 'qwen2.5-3b',
    # 'qwen2.5i-3b',
    # 'qwen2.5-7b',
    # 'qwen2.5i-7b',
    # 'qwen2.5-14b',
    # 'olmo-7b-20BT',
    # 'olmo-7b-50BT',
    # 'olmo-7b-100BT',
    # 'olmo-7b-2700BT',
    # 'olmo-7b',
    # 'olmoi-7b',
    # 'olmo2-7b',
    # 'olmo2i-7b',
    # 'olmo2-13b',
    # 'amber-7b-21BT',
    # 'amber-7b-49BT',
    # 'amber-7b-101BT',
    # 'amber-7b',
    # 'falcon3-7b',
]
tasks = [
    # 'antonym',
    # 'english-french',
    # 'english-german',
    # 'english-spanish',
    # 'french-english',
    # 'german-english',
    # 'spanish-english',
    # 'present-past',
    # 'country-capital',
    # 'tqa',
]
def get_slurm_cmd(model_name):
    if float(model_name.split('-')[1][:-1]) >= 13:
        n_gpu = '--gpus-per-node=1'
        if float(model_name.split('-')[1][:-1]) > 27:
            n_gpu = '--gpus-per-node=3'
        cluster = '--cluster=your_cluster'
        partition = '--partition=gpu_partition'
        n_cpu = '--ntasks-per-node=2'
        slurm_cmd = [cluster, partition, n_cpu, n_gpu]
    else:
        cluster = '--cluster=your_cluster'
        partition = '--partition=gpu_partition'
        n_gpu = '--gpus-per-node=1'
        slurm_cmd = [cluster, partition, n_gpu]
    return slurm_cmd

# Logit Lens

In [None]:
n_shots = '0'
max_samples = '1000'
# task should be set to tqa in the previous cell 
for model in models:
    slurm_cmd = get_slurm_cmd(model)
    for task in tasks:
        subprocess_arg = [
            'sbatch', *slurm_cmd,
            '../run_logit_lens.sh',
            model, task, MODEL_FP_MAP[model], n_shots, max_samples
        ]
        subprocess.run(subprocess_arg)

# Compute Indirect Effect

In [None]:
for model_name in models:
    slurm_cmd = get_slurm_cmd(model_name)
    if float(model_name.split('-')[1][:-1]) >= 13:
        runtime = '--time=04:00:00'
    elif float(model_name.split('-')[1][:-1]) >= 6:
        runtime = '--time=00:40:00'
    else:
        runtime = '--time=00:20:00'
    slurm_cmd.append(runtime)
    for task in tasks:
        subprocess_arg = [
            'sbatch', *slurm_cmd,
            '../run_cie.sh',
            model_name, task, MODEL_FP_MAP[model_name]
        ]
        subprocess.run(subprocess_arg)

# Run Function Vector Param Sweep
#### Requires:
1. Compute Indirect Effect
- 1.5 hr param sweep smaller models
- 2.5 hr param sweep larger models

In [None]:
for model_name in models:
    slurm_cmd = get_slurm_cmd(model_name)
    for task in tasks:
        subprocess_arg = [
            'sbatch', *slurm_cmd,
            '../run_fv_param_sweep.sh',
            model_name, task, MODEL_FP_MAP[model_name]
        ]
        process = subprocess.run(subprocess_arg)

# Run Task Vectors
- no new hyperparameters, follow instructions in README of icl_task_vectors

# Run DoLa
- please change the memory mapping in the ../DoLa/dola.py::load_model "max_memory" dictionary to value that work with your GPU setup (big models only)

In [14]:
from transformers import AutoConfig
from typing import List
def get_exit_layers(model_fp: str, bucket: List[int]):
    config = AutoConfig.from_pretrained(model_fp)
    lower = int(bucket[0]*config.num_hidden_layers)
    upper = int(bucket[1]*config.num_hidden_layers) + 1
    early_exit_layers = list(range(lower, upper, 2))
    if early_exit_layers[-1] != config.num_hidden_layers:
        early_exit_layers.append(config.num_hidden_layers)
    return early_exit_layers

In [None]:
dola_tasks = ['factor'] # 'factor', 'tfqa'
baselines = [False]
ln_types = ['none'] # 'next_layer', 'last_layer' (experimental)
# buckets = [
#     [0, 0.25],
#     [0.25, 0.5],
#     [0.5, 0.75],
#     [0.75, 1.0],
# ]
# buckets = [
#     [0, 0.5],
#     [0.25, 0.75],
#     [0.5, 1.0],
#     [0, 1.0],
# ]
buckets = [[0.0, 0.25]]
alphas = ['0.0', '0.25', '0.5', '0.75', '0.9']
# alphas = ['0.1']

for model_name in models:
    slurm_cmd = get_slurm_cmd(model_name)
    n_gpu = slurm_cmd[-1][-1]
    for baseline in baselines:
        for bucket in buckets:
            for alpha in alphas:
                if baseline:
                    early_exit_layers = []
                else:
                    early_exit_layers = get_exit_layers(MODEL_FP_MAP[model_name], bucket)
                for dola_task in dola_tasks:
                    for ln_type in ln_types:
                        subprocess_arg = [
                            'sbatch', *slurm_cmd, '--nodes=1',
                            # '/fs/ess/PAS2836/pqd_localization/reliability-challenges-steering-lms/run_dola.sh',
                            '../run_dola.sh',
                            model_name, MODEL_FP_MAP[model_name], dola_task, n_gpu, alpha,
                            ln_type, ','.join(map(str, early_exit_layers))
                        ]
                        print(subprocess_arg)
                        process = subprocess.run(subprocess_arg)