# Set Up/Imports

In [1]:
conda activate "/pscratch/sd/m/mansisak/memorization/env/"


Note: you may need to restart the kernel to use updated packages.


In [2]:
!pip list

Package                   Version            Editable project location
------------------------- ------------------ -----------------------------------------------------------------------------
accelerate                0.33.0
anyio                     4.2.0
argon2-cffi               23.1.0
argon2-cffi-bindings      21.2.0
arrow                     1.3.0
astropy                   6.0.0
astropy-iers-data         0.2024.1.8.0.30.55
asttokens                 2.4.1
async-lru                 2.0.4
attrs                     23.2.0
Authlib                   1.3.0
Babel                     2.14.0
beautifulsoup4            4.12.2
bleach                    6.1.0
bokeh                     3.3.3
Brotli                    1.0.9
cached-property           1.5.2
Cartopy                   0.22.0
certifi                   2023.11.17
cffi                      1.16.0
cftime                    1.6.3
charset-normalizer        3.3.2
click                     8.1.7
cloudpickle               3.0.0
colorama    

In [3]:
#pip install accelerate

In [4]:
import torch
import os
from transformers import AutoModelForCausalLM
from torch.utils.data import DataLoader

# Load in Models

In [5]:
def print_edited_model_paths(parent_path):
    
    total_exp = 0
    for model_name in ['pythia-6.9b-deduped', 'pythia-2.8b-deduped']:
        y_idx = 0
        for step in [36000, 72000, 108000, 143000]:
            for loc_method in [
                  "act",
                  "hc",
                  "slim",
                  "durable",
                  "durable_agg",
                  "random",
                  "random_greedy",
                  "greedy",
              ]:
                
                for ratio in [0.00001, 0.0001, 0.001, 0.01, 0.05, 0.1, 0.25, 0.3]:
                    result_path = f"{parent_path}{step}/EleutherAI_edit/{loc_method}/mem/{ratio}"
                    if loc_method not in ["random", "random_greedy"]:
                        if ratio >= 0.1:
                            continue

                    # this ratio is too small for neuron-level methods
                    if loc_method in ["zero", "hc", "ig", "slim", "act"]:
                        if ratio <= 0.0001:
                            continue

                    if loc_method in ["greedy"]:
                        if ratio > 0.00001:
                            continue
                            
                    ######
                    if loc_method in ["greedy", "durable", "durable_agg","act"]:
                        model_path = f'{result_path}/{model_name}'
                        total_exp += 1

                    if loc_method in ["slim", "hc"]:
                        for epochs in [1, 10, 20]:
                            total_exp += 1
                            model_path = f'{result_path}/{epochs}/1000/0.1/0.1/{model_name}'                      

                    if loc_method in ["random", "random_greedy"]:
                        for epochs in [1, 10, 20]:
                            total_exp += 1
                            model_path = f'{result_path}/{epochs}/0.1/0.9/0.0005/{model_name}'
                    if os.path.isfile(model_path):
                        print("edited model exists:", model_path)
                    else:
                        print("edited model doesn't exist yet: ", model_path)

    print("total_expeirments: ", total_exp)



In [6]:
parent_path = "/pscratch/sd/m/mansisak/memorization/model_ckpts/"

print_edited_model_paths(parent_path)

edited model exists: /pscratch/sd/m/mansisak/memorization/model_ckpts/36000/EleutherAI_edit/act/mem/0.001/pythia-6.9b-deduped
edited model exists: /pscratch/sd/m/mansisak/memorization/model_ckpts/36000/EleutherAI_edit/act/mem/0.01/pythia-6.9b-deduped
edited model exists: /pscratch/sd/m/mansisak/memorization/model_ckpts/36000/EleutherAI_edit/act/mem/0.05/pythia-6.9b-deduped
edited model exists: /pscratch/sd/m/mansisak/memorization/model_ckpts/36000/EleutherAI_edit/hc/mem/0.001/20/1000/0.1/0.1/pythia-6.9b-deduped
edited model exists: /pscratch/sd/m/mansisak/memorization/model_ckpts/36000/EleutherAI_edit/hc/mem/0.01/20/1000/0.1/0.1/pythia-6.9b-deduped
edited model exists: /pscratch/sd/m/mansisak/memorization/model_ckpts/36000/EleutherAI_edit/hc/mem/0.05/20/1000/0.1/0.1/pythia-6.9b-deduped
edited model exists: /pscratch/sd/m/mansisak/memorization/model_ckpts/36000/EleutherAI_edit/slim/mem/0.001/20/1000/0.1/0.1/pythia-6.9b-deduped
edited model exists: /pscratch/sd/m/mansisak/memorization/mo

In [14]:
# Example of how to load in a model:

#model = torch.load("/pscratch/sd/m/mansisak/memorization/model_ckpts/108000/EleutherAI_edit/durable_agg/mem/0.01/pythia-6.9b-deduped", map_location="cpu")

model = AutoModelForCausalLM.from_pretrained(
    "EleutherAI/pythia-2.8b-deduped",
    torch_dtype=torch.float16,
    device_map="auto"
    #evice_map="cpu"
)
sd_path = '/pscratch/sd/m/mansisak/memorization/model_ckpts/143000/EleutherAI_edit/random_greedy/mem/0.001/20/0.1/0.9/0.0005/pythia-2.8b-deduped'
sd = torch.load(sd_path, map_location="cpu")["model_state_dict"]
if "random" in sd_path:
    for k in sd:
        if "4h" in k:
            #rint(sd[k].shape)
            #rint(k)
            sd[k] = sd[k].T
model.load_state_dict(sd, assign=True)
model.eval()

GPTNeoXForCausalLM(
  (gpt_neox): GPTNeoXModel(
    (embed_in): Embedding(50304, 2560)
    (emb_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-31): 32 x GPTNeoXLayer(
        (input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (post_attention_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (post_attention_dropout): Dropout(p=0.0, inplace=False)
        (post_mlp_dropout): Dropout(p=0.0, inplace=False)
        (attention): GPTNeoXSdpaAttention(
          (rotary_emb): GPTNeoXRotaryEmbedding()
          (query_key_value): Linear(in_features=2560, out_features=7680, bias=True)
          (dense): Linear(in_features=2560, out_features=2560, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (mlp): GPTNeoXMLP(
          (dense_h_to_4h): Linear(in_features=2560, out_features=10240, bias=True)
          (dense_4h_to_h): Linear(in_features=10240, out_features=2560, bias=Tr

In [16]:
model.device

device(type='cpu')

# Load in Data

In [17]:
random_data = torch.load("/pscratch/sd/m/mansisak/memorization/src/data/pythia_mem_data/pile_random_batch.pt")
random_data_pile = torch.reshape(random_data[0:2040], (3264, 80))
random_data = random_data_pile[0:1632]
extra_data = random_data_pile[1632:]
random_dataloader = DataLoader(random_data, batch_size=1, shuffle=False)

# Model Inference

In [18]:
def perplexity(dataloader, model):
    avg_metric = 0
    for batch in dataloader:
        with torch.no_grad():
            model_output = model(batch, labels=batch)
        loss = model_output.loss
        avg_metric += torch.exp(loss)
        print(loss)
    return avg_metric.cpu().item() / len(dataloader)


In [19]:
perplexity(random_dataloader, model)

RuntimeError: "nll_loss_out_frame" not implemented for 'Half'