# Test the various tasks in the `tasks` module.

In [1]:
import os
os.getcwd()
# os.chdir('../mechanistic-unlearning')

'/data/phillip_guo/hp-unlrn'

In [2]:
%load_ext autoreload
%autoreload 2
import os
# from cb_utils.models import load_gpt2_weights, load_demo_gpt2, tokenizer as gpt2_tokenizer, DEVICE
from torch.optim import AdamW
import torch
import pickle
import datasets
from tqdm import tqdm_notebook as tqdm
from itertools import cycle
# from eval import evaluate_model
import pandas as pd
from tasks.inference_utils import get_final_logits, generate_text
from tasks.ioi.IOITask import IOITask_old, IOITask
from tasks.owt.OWTTask import OWTTask
from tasks.facts.SportsTask import SportsTask
from transformer_lens import HookedTransformer
# from tasks.kg_trips.ZSRETask import ZSRE
from tqdm import tqdm
import numpy as np


## Harry Potter Task Testing

### Trivia (Single Token testing)

In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM
llama = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf").cuda()
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
from tasks.hp.HPTask import HPTriviaTask
hp = HPTriviaTask(batch_size=16, tokenizer=tokenizer, device='cuda', chat_model=True, )

502
200


In [5]:
hp.get_test_loss(llama)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


tensor(0.7470, device='cuda:0')

In [6]:
hp.get_test_accuracy(llama, use_test_data=False, check_all_logits=False)

0.375

### Processing Harry Potter Book for data

In [7]:
import numpy as np
import re

with open("tasks/hp/data/Harry_Potter_all_char_separated.txt", "r") as f:
    hp_text = f.read()
hp_sentences = hp_text.split("|")
hp_sentences_processed = []
for sentence in hp_sentences:
    processed_sentence = sentence
    if len(sentence) < 2 or sentence[-2] != " ":
        continue
    if sentence[0] != " ":
        processed_sentence = processed_sentence[:-2] + processed_sentence[-1]
    else:
        assert sentence[0] == " " and sentence[-2] == " ", sentence
        processed_sentence = processed_sentence[1:-2] + processed_sentence[-1]
        
    # replace any instances of space + punctuation with just the punctuation
    processed_sentence = re.sub(r' ([.,!?])', r'\1', processed_sentence)
    # replace "“ " with just "“"
    processed_sentence = re.sub(r'“ ', r'“', processed_sentence)
    # replace " ’" with just "’"
    processed_sentence = re.sub(r' ’', r'’', processed_sentence)
    # replace ", ”" with just ",”"
    processed_sentence = re.sub(r', ”', r',”', processed_sentence)
    hp_sentences_processed.append(processed_sentence)

In [8]:
import numpy as np
def sample_passage(hp_sentences_processed, num_sentences=5):
    # get a contiguous passage of num_sentences sentences
    start = np.random.randint(0, len(hp_sentences_processed) - num_sentences)
    passage = hp_sentences_processed[start:start+num_sentences]
    return passage
sample_passage(hp_sentences_processed)

['“You took that from Sirius’s house,” said Harry, who was almost nose to nose with Mundungus and was breathing in an unpleasant smell of old tobacco and spirits.',
 '“That had the Black family crest on it.”',
 '“I no what?”',
 'spluttered Mundungus, who was slowly turning purple.',
 '“What did you do, go back the night he died and strip the place?”']

In [9]:
passage = sample_passage(hp_sentences_processed)
prompt = " ".join(passage[:-1])
completion = passage[-1]

In [10]:
# make a full train and test set
num_train = 1000
num_test = 200

hp_train_passages = []
hp_test_passages = []
for i in range(num_train):
    passage = sample_passage(hp_sentences_processed)
    hp_train_passages.append(passage)

for i in range(num_test):
    passage = sample_passage(hp_sentences_processed)
    hp_test_passages.append(passage)

import pickle
with open("tasks/hp/data/hp_verbatim_passages_train.pkl", "wb") as f:
    pickle.dump(hp_train_passages, f)
with open("tasks/hp/data/hp_verbatim_passages_test.pkl", "wb") as f:
    pickle.dump(hp_test_passages, f)


### Completions (Multi-token testing)

In [3]:
# might need to adapt to quantize for 24gb 3090, or remove .cuda()
from transformers import AutoTokenizer, AutoModelForCausalLM
hp_model = AutoModelForCausalLM.from_pretrained("microsoft/Llama2-7b-WhoIsHarryPotter").cuda()

In [21]:
from tasks import HPVerbatimTask
criterion = "levenshtein"
hp_verbatim = HPVerbatimTask(batch_size=16, tokenizer=tokenizer, device='cuda', num_completion_sentences=1, shuffle=False, criterion=criterion)
hp_verbatim_2 = HPVerbatimTask(batch_size=16, tokenizer=tokenizer, device='cuda', num_completion_sentences=1, shuffle=False, criterion=criterion)

In [22]:
llama_losses = []
hp_model_losses = []
for i in range(10):
    print("getting losses for llama")
    llama_loss = hp_verbatim.get_test_loss(llama).item()
    print()
    print("getting losses for hp model")
    hp_model_loss = hp_verbatim_2.get_test_loss(hp_model).item()
    llama_losses.append(llama_loss)
    hp_model_losses.append(hp_model_loss)
    print("\n-------\n")


getting losses for llama
Sentence: It seemed to burn feeling back into him, dispelling the numbness and sense of unreality, firing him with something that was like courage.
Top token losses: tensor([9., 7., 6., 5., 5.], device='cuda:0')
Tokens: ['something', 'feeling', 'seemed', 'like', 'firing']
Sentence: The Wizengamot was still muttering and fidgeting restlessly only when Fudge spoke again did they settle down.
Top token losses: tensor([8., 6., 5., 4., 4.], device='cuda:0')
Tokens: ['W', 'was', 'spoke', 'rest', 'still']
Sentence: “You’re losing it, too,” said Ron.
Top token losses: tensor([6., 4., 3., 3., 2.], device='cuda:0')
Tokens: ['losing', 'it', 'too', 'You', 're']
Sentence: Ron caught Harry’s eye and grinned Harry knew that he was remembering the ludicrous headdress they had seen on their visit to Xenophilius.
Top token losses: tensor([8., 5., 5., 5., 4.], device='cuda:0')
Tokens: ['remember', 'visit', 'caught', 'Harry', 'knew']
Sentence: “What are you doing here, Potter?”
To

In [23]:
print(np.mean(llama_losses))
print(np.mean(hp_model_losses))

1.9161671996116638
2.1185449719429017


In [27]:
sentences = ["In Harry Potter, the users of a magical device that allows them to travel through time are called Time-Turners.", 
             "In Harry Potter, the magical device that allows users to view memories in the third dimension is called a Pensieve. Test Test Test.",
             "In Harry Potter, the parents of this Gryffindor student's parents are dentists: Hermione Granger.",
             "I like food",]
tokenized_sentences = tokenizer(sentences, return_tensors='pt', padding='longest', truncation=True)

In [67]:
sentence_tokens = [tokenizer(sentence).input_ids for sentence in sentences]
prompt_tokens = [sentence_tokens[0][:3], sentence_tokens[1][:2], sentence_tokens[2][:3], sentence_tokens[3][:2]]
completion_tokens = [sentence_tokens[0][3:], sentence_tokens[1][2:], sentence_tokens[2][3:], sentence_tokens[3][2:]]

In [68]:
tokens = [torch.tensor(prompt_tokens[i] + completion_tokens[i]) for i in range(len(prompt_tokens))]
tokens = torch.nn.utils.rnn.pad_sequence(tokens, batch_first=True, padding_value=tokenizer(tokenizer.pad_token).input_ids[-1])

In [70]:
model_output = llama(tokenized_sentences.input_ids.cuda())
model_output_2 = llama(tokens.cuda())

In [84]:
for i in range(len(sentences)):
    loss_logits = model_output_2.logits[i, len(prompt_tokens[i]): len(prompt_tokens[i]) + len(completion_tokens[i])-1]
    # cross entropy loss between loss_logits[:, :-1] and completion_tokens[:, 1:]
    cross_entropy_loss = torch.nn.CrossEntropyLoss(reduce=False)
    print(loss_logits.shape)
    print(torch.tensor(completion_tokens[i][1:]).shape)
    loss = cross_entropy_loss(loss_logits, torch.tensor(completion_tokens[i][1:]).cuda())
    print(loss)
    # generated_tokens = tokenizer.decode(torch.argmax(loss_logits, dim=-1))
    # print(i)
    # print(tokenizer.decode(completion_tokens[i][1:]))
    # print(generated_tokens)
    

torch.Size([23, 32000])
torch.Size([23])
tensor([9.1429e-05, 1.1654e+00, 1.4605e+00, 1.1020e+01, 3.4447e-01, 6.4036e+00,
        3.1028e+00, 9.6882e-04, 3.3994e+00, 5.9247e+00, 1.0137e+00, 1.7588e-01,
        1.8208e-03, 8.8943e-01, 1.1944e+00, 4.3101e-02, 7.6505e-01, 8.2362e-01,
        3.8265e-01, 1.5264e-01, 1.2138e-01, 5.5751e-04, 1.1533e-02],
       device='cuda:0', grad_fn=<NllLossBackward0>)
torch.Size([29, 32000])
torch.Size([29])
tensor([2.1529e-01, 9.1429e-05, 1.1654e+00, 1.4605e+00, 3.2227e+00, 1.7177e-04,
        9.2850e+00, 1.5740e+00, 9.4078e-01, 4.2001e+00, 1.7808e-04, 7.4497e+00,
        5.6219e+00, 2.1458e-06, 6.3191e+00, 1.8685e+00, 1.2536e+01, 3.5377e+00,
        2.3908e-01, 1.6552e-01, 1.3248e+00, 2.3426e+00, 4.2892e-03, 3.7353e-04,
        8.6653e-03, 1.4467e+01, 1.2167e+01, 1.2560e+00, 1.5843e+00],
       device='cuda:0', grad_fn=<NllLossBackward0>)
torch.Size([24, 32000])
torch.Size([24])
tensor([9.1429e-05, 1.1654e+00, 1.4605e+00, 9.4727e+00, 2.4553e-02, 1.1625e



In [33]:
# model_output.logits
# get probabilities of each token from logits
probs = torch.softmax(model_output.logits, dim=-1) # (batch_size, seq_len, vocab_size)
# get top 5 tokens for each token in each sequence
top5 = torch.topk(probs, k=5, dim=-1) # (batch_size, seq_len, 5)
print(top5.indices[:, :, 0])

tensor([[19838,   278, 10173,   357,   322,   278,   281,   310,   278, 24706,
           936,  5960,  2000,  6511,   963,   304,  9850,   304,   931,   526,
          2000,  5974, 29899, 27407,   414, 29889,    13, 29879, 29873, 29873,
         29873, 29900],
        [19838,   278, 10173,   357,   322,   278,   281,   936,   907,  2998,
          6511,   281,   304,  9850,   322,  3842,   338,   263,   349,  2706,
           338,  2000,   385,   376,   575,  2418, 29889,    13,   596,  4321,
            13,    13],
        [19838,   278, 10173,   357,   322,   278,   281,   310, 10686,  2931,
           719,   600,   513,   272,  8368,   526, 29879,  1900,   892,  9445,
          2879, 29889,    13,  1421,  1632,  4600, 29915,    13, 29879, 29879,
         29879, 29889],
        [19838, 29915,   304, 29889, 29879, 29900, 29900, 29900, 29900, 29900,
         29900,    12,    13,  1576,    13,  1576,  1576,  1576,  1576,  1576,
          1576,    13,    13,    13,    13,    13,    13,  

In [40]:
# see which tokens are special tokens
print(tokenizer.special_tokens_map)
print(tokenizer(['<s>', '</s>', '<unk>'], return_tensors='pt', padding='longest', truncation=True))
print(f"{tokenizer.decode([13])=}")

{'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}
{'input_ids': tensor([[1, 1],
        [1, 2],
        [1, 0]]), 'attention_mask': tensor([[1, 1],
        [1, 1],
        [1, 1]])}
tokenizer.decode([13])='\n'


## New IOI Testing

In [77]:
# model = load_demo_gpt2(means=False)
gpt2_model = HookedTransformer.from_pretrained('gpt2-small').to('cuda')
gpt2_tokenizer = gpt2_model.tokenizer

Loaded pretrained model gpt2-small into HookedTransformer
Moving model to device:  cuda


In [43]:
from tasks.general.DatasetTasks import OWTTask, PileTask
owt = OWTTask(64, tokenizer)
pile = PileTask(64, tokenizer)
sports = SportsTask(64, tokenizer)
ioi = IOITask(64, tokenizer)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.
Resolving data files: 100%|██████████| 30/30 [00:01<00:00, 19.23it/s]
Resolving data files: 100%|██████████| 30/30 [00:00<00:00, 132591.28it/s]


In [44]:
from tasks.ioi.IOITask import IOITask_Uniform
ioi_uniform = IOITask_Uniform(64, tokenizer, uniform_over="all_tokens")

In [45]:
ioi_uniform.get_test_loss(model)

tensor(17.6684, device='cuda:0')

In [8]:
ioi.get_test_loss(model)

tensor(0.6259, device='cuda:0')

In [18]:
with open("tasks/general/data/toxic/train.pkl", "rb") as f:
    toxic = pickle.load(f)
print(toxic)

[(80200184, 0.9088885, 'lol this guy must live in the middle of nowherecops kill everyone but they prefer niggers. they just assume all niggers have guns.'), (80532576, 0.90646505, 'tfw half black but redpilled, browsing /pol/ since it came back in 2011white gfs family gives no fucks about me being nonwhite, even invited me to move inFUCKING STOP'), (79806084, 0.90610445, 'Sorry I dont give a shit  if you want to argue with someone who does'), (81500706, 0.90026146, 'FOR FUCKS SAKE SAIM AL HAID, SOMEONE STOP THIS MADMAN'), (80160033, 0.9026442, 'Well thats some spooky stuff. I dont know whats going on but this is like shitty science fanfiction is being brought to real life.'), (80916195, 0.9014249, 'Bro, do you even height?Im 61 and if I was 150 lbs Id have to be hospitalized for malnutrition. I have a friend thats at least 3-4 inches shorter than me who weighs 170 and is a stick.Get some muscle you scrawny faggot.'), (81361438, 0.90237534, 'http://www.dailymail.co.uk/news/article-3589

In [20]:
from tasks.general.DatasetTasks import ToxicTask
toxic = ToxicTask(64, tokenizer)

In [19]:
train_dataset = load_dataset('Skylion007/openwebtext', split='train', streaming=stream_dataset)

NameError: name 'load_dataset' is not defined

In [5]:
sports.get_batch()

{'prompt': ['Fact: Tiger Woods plays the sport of golf\nFact: Becky Hammon plays the sport of',
  'Fact: Tiger Woods plays the sport of golf\nFact: Tim Lincecum plays the sport of',
  'Fact: Tiger Woods plays the sport of golf\nFact: Chad Billingsley plays the sport of',
  'Fact: Tiger Woods plays the sport of golf\nFact: Muhammad Wilkerson plays the sport of',
  'Fact: Tiger Woods plays the sport of golf\nFact: Kyle Hendricks plays the sport of',
  'Fact: Tiger Woods plays the sport of golf\nFact: Baron Davis plays the sport of',
  'Fact: Tiger Woods plays the sport of golf\nFact: Ty Law plays the sport of',
  'Fact: Tiger Woods plays the sport of golf\nFact: Kenny Britt plays the sport of',
  'Fact: Tiger Woods plays the sport of golf\nFact: Masai Ujiri plays the sport of',
  'Fact: Tiger Woods plays the sport of golf\nFact: Austin Rivers plays the sport of',
  'Fact: Tiger Woods plays the sport of golf\nFact: Victor Cruz plays the sport of',
  'Fact: Tiger Woods plays the sport of g

In [7]:
sports.get_test_accuracy(model)

0.46875

In [8]:
print(owt.get_test_loss(model))
print(pile.get_test_loss(model))
print(sports.get_test_loss(model))
print(ioi.get_test_loss(model))

tensor(3.5744, device='cuda:0')
tensor(3.9375, device='cuda:0')
tensor(2.9486, device='cuda:0')
tensor(0.5690, device='cuda:0')


In [5]:
old_ioi = IOITask_old(10, gpt2_tokenizer)
print(next(old_ioi.train_iter))

{'text': ['Then, Crystal and Jeffrey went to the hospital. Jeffrey gave a snack to', 'Then, Patrick and Matthew went to the office. Matthew gave a necklace to', 'Then, Adam and Brian went to the store. Brian gave a basketball to', 'Then, Samantha and Matthew went to the garden. Matthew gave a kiss to', 'Then, Andrew and Justin went to the office. Justin gave a drink to', 'Then, Jacob and Sara went to the station. Sara gave a necklace to', 'Then, Samuel and Joshua went to the house. Joshua gave a necklace to', 'Then, Gregory and Megan went to the house. Megan gave a necklace to', 'Then, Allison and Benjamin went to the station. Benjamin gave a drink to', 'Then, Jose and Heather went to the school. Heather gave a snack to'], 'IO': ['Crystal', 'Patrick', 'Adam', 'Samantha', 'Andrew', 'Jacob', 'Samuel', 'Gregory', 'Allison', 'Jose'], 'S': ['Jeffrey', 'Matthew', 'Brian', 'Matthew', 'Justin', 'Sara', 'Joshua', 'Megan', 'Benjamin', 'Heather']}


In [6]:
from tasks.ioi.IOITask import IOIData
N = 1000
clean_dataset = IOIData(
    prompt_type='ABBA',
    N=N,
    tokenizer=gpt2_tokenizer,
    prepend_bos=False,
    seed=1,
    nb_templates=1,
    device=DEVICE
)
corr_dataset = clean_dataset.gen_flipped_prompts('ABC->XYZ, BAB->XYZ')

In [10]:
# make a dataset that is compatible with dataloader of clean_dataset.ioi_prompts

import torch
from torch.utils.data import Dataset, DataLoader
class IOIPromptsDataset(Dataset):
    def __init__(self, data_list):
        self.data = data_list

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        return item

# Create dataset instance
ioi_prompts_dataset = IOIPromptsDataset(clean_dataset.ioi_prompts)

# Create DataLoader instance
dataloader = DataLoader(ioi_prompts_dataset, batch_size=32, shuffle=True)
print(next(iter(dataloader)))

{'[PLACE]': ['garden', 'house', 'house', 'store', 'restaurant', 'school', 'garden', 'school', 'office', 'hospital', 'hospital', 'house', 'restaurant', 'station', 'restaurant', 'school', 'office', 'hospital', 'school', 'school', 'school', 'station', 'restaurant', 'store', 'hospital', 'garden', 'hospital', 'restaurant', 'school', 'house', 'station', 'school'], '[OBJECT]': ['computer', 'ring', 'snack', 'drink', 'drink', 'necklace', 'bone', 'basketball', 'kiss', 'drink', 'ring', 'necklace', 'necklace', 'basketball', 'drink', 'kiss', 'kiss', 'ring', 'basketball', 'computer', 'ring', 'basketball', 'computer', 'basketball', 'snack', 'basketball', 'bone', 'computer', 'computer', 'snack', 'snack', 'drink'], 'text': ['Then, George and Tyler went to the garden. Tyler gave a computer to George', 'Then, Ruby and Laura went to the house. Laura gave a ring to Ruby', 'Then, Alan and Dean went to the house. Dean gave a snack to Alan', 'Then, Crew and Kate went to the store. Kate gave a drink to Crew', 

In [18]:
new_ioi = IOITask(10, gpt2_tokenizer, prep_acdcpp=True)
print(next(new_ioi.train_iter)['text'])

['Then, Louis and Ruby went to the garden. Ruby gave a computer to', 'Then, Andrew and Mary went to the office. Mary gave a drink to', 'Then, Jane and Andre went to the hospital. Andre gave a kiss to', 'Then, Eric and Blake went to the restaurant. Blake gave a necklace to', 'Then, Cole and Rose went to the restaurant. Rose gave a kiss to', 'Then, Richard and Kate went to the restaurant. Kate gave a bone to', 'Then, Jamie and John went to the hospital. John gave a basketball to', 'Then, Edward and Georgia went to the hospital. Georgia gave a computer to', 'Then, Sullivan and Steven went to the restaurant. Steven gave a computer to', 'Then, David and Dean went to the hospital. Dean gave a snack to']


## GPT-2 Testing

In [61]:
model = load_demo_gpt2(means=False)

In [65]:
model.cfg

Config(d_model=768, debug=False, layer_norm_eps=1e-05, d_vocab=50257, init_range=0.02, n_ctx=1024, d_head=64, d_mlp=3072, n_heads=12, n_layers=12)

In [3]:
ioi_old = IOITask_old(batch_size=100, tokenizer=tokenizer)
ioi_new = IOITask(batch_size=100, tokenizer=tokenizer)
owt = OWTTask(batch_size=3, tokenizer=tokenizer)

In [None]:
with open(f"tasks/ioi/data/ioi_prompts_single_template_train.pkl", "rb") as f:
    ioi_prompts_train = pickle.load(f)
from torch.utils.data import Dataset, DataLoader
class IOIPromptsDataset(Dataset):
    def __init__(self, ioi_prompts, tokenizer):
        self.ioi_prompts = ioi_prompts
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.ioi_prompts)

    def __getitem__(self, idx):
        prompt = self.ioi_prompts[idx]
        text = prompt['text']
        text = " ".join(text.split(" ")[:-1])
        label = prompt['IO']
        return {
            'text': text,
            'IO': label,
            'S': prompt['S']
        }
ioi_dataset = IOIPromptsDataset(ioi_prompts_train, tokenizer)
def collate_batch(batch):

    texts = [item['text'] for item in batch]
    ios = [item['IO'] for item in batch]
    subjects = [item['S'] for item in batch]
    return {
        'text': texts,
        'IO': ios,
        'S': subjects
    }

ioi_dataloader = DataLoader(ioi_dataset, batch_size=100, shuffle=True, collate_fn=collate_batch)


In [4]:
# ioi_texts = [ioi.ioi_prompts_train_dataset[i]['text'] for i in range(3)]
# for i in range(3):
#     ioi_texts.append(ioi_texts[i][5:])=
ioi_texts = ['Then, Sarah and Tyler went to the garden. Tyler gave a bone to Sarah',
 'Then, Tyler and Sarah went to the garden. Sarah gave a bone to Tyler',
 'Then, Timothy and Stephen went to the school. Stephen gave a necklace to Timothy',
 'Then, Sarah and Tyler went to the flower garden. Tyler gave a bone to Sarah',
 'Then, Tyler and Sarah went to the flower garden. Sarah gave a bone to Tyler',
 'Then, Timothy and Stephen went to the old school. Stephen gave a necklace to Timothy']

# cut last name from ioi_texts
for i in range(6):
    ioi_texts[i] = ioi_texts[i][:-len(ioi_texts[i].split()[-1])-1]

In [5]:
# tokenize ioi_texts
tokens = tokenizer(ioi_texts, return_tensors='pt', padding=True, truncation=True).input_ids

# detokenize ioi_texts, token by token
for i in range(6):
    for token in tokens[i]:
        print(tokenizer.decode(token.item()), end=':')
    print()    

Then:,: Sarah: and: Tyler: went: to: the: garden:.: Tyler: gave: a: bone: to:<|endoftext|>:
Then:,: Tyler: and: Sarah: went: to: the: garden:.: Sarah: gave: a: bone: to:<|endoftext|>:
Then:,: Timothy: and: Stephen: went: to: the: school:.: Stephen: gave: a: necklace: to:<|endoftext|>:
Then:,: Sarah: and: Tyler: went: to: the: flower: garden:.: Tyler: gave: a: bone: to:
Then:,: Tyler: and: Sarah: went: to: the: flower: garden:.: Sarah: gave: a: bone: to:
Then:,: Timothy: and: Stephen: went: to: the: old: school:.: Stephen: gave: a: necklace: to:


In [7]:
final_logits = get_final_logits(model, tokenizer, ioi_texts)

# decode final_logits
for i in range(6):
    print(tokenizer.decode(final_logits[i].argmax(-1).tolist()), end='')

[15, 15, 15, 16, 16, 16]
 Sarah Tyler Timothy Sarah Tyler Timothy

In [19]:
t = torch.cuda.get_device_properties(0).total_memory
r = torch.cuda.memory_reserved(0)
a = torch.cuda.memory_allocated(0)
f = r-a  # free inside reserved
print(f"Total: {t*1e-9}, Reserved: {r*1e-9}, Allocated: {a*1e-9}, Free: {f*1e-9}")

Total: 84.986691584, Reserved: 1.9566428160000002, Allocated: 1.1659540480000001, Free: 0.7906887680000001


In [None]:
generate_text(model, tokenizer, 'Then, Sarah and Tyler went to the flower garden. Tyler gave a bone to', 3)

'Then, Sarah and Tyler went to the flower garden. Tyler gave a bone to Sarah, and'

In [11]:
from tasks.kg_trips.ZSRETask import MENDQADataset
mendqa_dataset = MENDQADataset(data_dir="tasks/kg_trips", tok=tokenizer)

In [12]:
# call the __getitem__ method of the dataset
for i in range(3):
    print(mendqa_dataset[i])

{'case_id': 0, 'requested_rewrite': {'prompt': 'What university did {} attend?', 'subject': 'Watts Humphrey', 'target_new': {'str': 'Illinois Institute of Technology'}, 'target_true': {'str': '<|endoftext|>'}}, 'paraphrase_prompts': ['What university did Watts Humphrey take part in?'], 'neighborhood_prompts': [{'prompt': 'nq question: who played desmond doss father in hacksaw ridge?', 'target': ' Hugo'}, {'prompt': 'nq question: who played desmond doss father in hacksaw ridge? Hugo', 'target': ' We'}, {'prompt': 'nq question: who played desmond doss father in hacksaw ridge? Hugo We', 'target': 'aving'}], 'attribute_prompts': [], 'generation_prompts': []}
{'case_id': 1, 'requested_rewrite': {'prompt': 'Which family does {} belong to?', 'subject': 'Ramalinaceae', 'target_new': {'str': 'Lecanorales'}, 'target_true': {'str': '<|endoftext|>'}}, 'paraphrase_prompts': ['What family are Ramalinaceae?'], 'neighborhood_prompts': [{'prompt': 'nq question: types of skiing in the winter olympics 20

## Pythia Testing

In [5]:
import pandas as pd
from transformer_lens import HookedTransformer

In [6]:
pythia_model = HookedTransformer.from_pretrained(
    "pythia-70m"
)
pythia_tokenizer = pythia_model.tokenizer

Loaded pretrained model pythia-2.8b into HookedTransformer


In [7]:
df = pd.read_csv("tasks/facts/data/sports.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,athlete,sport,log_prob_one_shot,num_athlete_tokens,sport_index,sport_token,prompt
0,1642,DeForest Buckner,football,-0.492917,5,2,5842,Fact: Tiger Woods plays the sport of golf\nFac...
1,738,Walter Payton,football,-0.105714,3,2,5842,Fact: Tiger Woods plays the sport of golf\nFac...
2,16778,Anthony DeSclafani,baseball,-0.292668,6,0,14623,Fact: Tiger Woods plays the sport of golf\nFac...
3,14501,Kevin Millwood,baseball,-0.372979,3,0,14623,Fact: Tiger Woods plays the sport of golf\nFac...
4,188,Vonta Leach,football,-0.648644,5,2,5842,Fact: Tiger Woods plays the sport of golf\nFac...


In [8]:
from tasks.inference_utils import generate_text
for i in range(5):
    prompt = df['prompt'].iloc[i]
    print(f"Prompt: {prompt}")
    print(generate_text(pythia_model, pythia_tokenizer, prompt, 1))
    print(f"Correct sport: {df['sport'].iloc[i]}")
    print()

Prompt: Fact: Tiger Woods plays the sport of golf
Fact: DeForest Buckner plays the sport of


  0%|          | 0/1 [00:00<?, ?it/s]

Fact: Tiger Woods plays the sport of golf
Fact: DeForest Buckner plays the sport of basketball
Correct sport: football

Prompt: Fact: Tiger Woods plays the sport of golf
Fact: Walter Payton plays the sport of


  0%|          | 0/1 [00:00<?, ?it/s]

Fact: Tiger Woods plays the sport of golf
Fact: Walter Payton plays the sport of football
Correct sport: football

Prompt: Fact: Tiger Woods plays the sport of golf
Fact: Anthony DeSclafani plays the sport of


  0%|          | 0/1 [00:00<?, ?it/s]

Fact: Tiger Woods plays the sport of golf
Fact: Anthony DeSclafani plays the sport of baseball
Correct sport: baseball

Prompt: Fact: Tiger Woods plays the sport of golf
Fact: Kevin Millwood plays the sport of


  0%|          | 0/1 [00:00<?, ?it/s]

Fact: Tiger Woods plays the sport of golf
Fact: Kevin Millwood plays the sport of baseball
Correct sport: baseball

Prompt: Fact: Tiger Woods plays the sport of golf
Fact: Vonta Leach plays the sport of


  0%|          | 0/1 [00:00<?, ?it/s]

Fact: Tiger Woods plays the sport of golf
Fact: Vonta Leach plays the sport of football
Correct sport: football



In [9]:
football_token, baseball_token, basketball_token = pythia_tokenizer(" football baseball basketball").input_ids
print(f"{football_token=} {baseball_token=} {basketball_token=}")

football_token=5842 baseball_token=14623 basketball_token=14648


In [10]:
# set up dataloader to batch through df
from torch.utils.data import DataLoader
from tasks.inference_utils import get_final_logits
criterion = torch.nn.CrossEntropyLoss()

class SportsDataset(torch.utils.data.Dataset):
    def __init__(self, df, tokenizer):
        self.df = df
        self.tokenizer = tokenizer

    def __getitem__(self, idx):
        return self.df['prompt'].iloc[idx], self.df['sport'].iloc[idx]

    def __len__(self):
        return len(self.df)
    
sports_dataset = SportsDataset(df, pythia_tokenizer)
sports_dataloader = DataLoader(sports_dataset, batch_size=3)

# batch through dataloader
for batch in sports_dataloader:
    prompts, labels = batch
    labels = [' ' + sport for sport in labels]
    final_logits = get_final_logits(pythia_model, pythia_tokenizer, prompts, model_returns_tuple=False)
    print(final_logits)
    tokenized_labels = pythia_tokenizer(labels, return_tensors='pt', padding=True, truncation=True).input_ids[:, 0]
    print(tokenized_labels)
    print(criterion(final_logits, tokenized_labels))

    print(criterion(final_logits, torch.Tensor([14648, 14648, 14648]).long()))
    break

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


tensor([[ 8.3688, -3.1681,  4.5482,  ..., -2.6834, -2.6928, -2.4418],
        [10.1932, -2.6702,  6.6471,  ..., -2.3068, -2.4198, -2.0895],
        [ 8.2223, -2.5343,  3.4966,  ..., -2.3035, -2.3677, -2.1004]],
       grad_fn=<StackBackward0>)
tensor([ 5842,  5842, 14623])
tensor(0.7214, grad_fn=<NllLossBackward0>)
tensor(2.7030, grad_fn=<NllLossBackward0>)


In [18]:
from tasks.facts.SportsTask import SportsTask

sports_task = SportsTask(batch_size=1000, tokenizer=pythia_tokenizer)
print(sports_task.get_test_loss(pythia_model))
print(sports_task.get_test_accuracy(pythia_model))

tensor(0.1563)
0.99


In [12]:
t = torch.cuda.get_device_properties(0).total_memory
r = torch.cuda.memory_reserved(0)
a = torch.cuda.memory_allocated(0)
f = r-a  # free inside reserved
print(f"Total: {t*1e-9}, Reserved: {r*1e-9}, Allocated: {a*1e-9}, Free: {f*1e-9}")

Total: 84.986691584, Reserved: 17.471373312, Allocated: 15.943849984000002, Free: 1.527523328


## Model Size Testing

In [29]:
import pandas as pd
from transformer_lens import HookedTransformer
pythia_model = HookedTransformer.from_pretrained(
    "pythia-70m"
)
pythia_tokenizer = pythia_model.tokenizer

ioi_pythia_old = IOITask_old(batch_size=100, tokenizer=pythia_tokenizer, handle_multitoken_labels=True)
ioi_pythia = IOITask(batch_size=100, tokenizer=pythia_tokenizer, handle_multitoken_labels=True)
sports_pythia = SportsTask(batch_size=100, tokenizer=pythia_tokenizer)
owt_pythia = OWTTask(batch_size=100, tokenizer=pythia_tokenizer)

ioi_gpt2_old = IOITask_old(batch_size=100, tokenizer=gpt2_tokenizer)
ioi_gpt2 = IOITask(batch_size=100, tokenizer=gpt2_tokenizer)
sports_gpt2 = SportsTask(batch_size=100, tokenizer=gpt2_tokenizer)
owt_gpt2 = OWTTask(batch_size=100, tokenizer=gpt2_tokenizer)

Loaded pretrained model pythia-70m into HookedTransformer


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

In [23]:

# pythia model sizes: 70M, 160M, 410M, 1B, 1.4B, 2.8B, 6.9B, and 12B
models = {
    "pythia-70m": HookedTransformer.from_pretrained("pythia-70m"),
    "pythia-160m": HookedTransformer.from_pretrained("pythia-160m"),
    "pythia-410m": HookedTransformer.from_pretrained("pythia-410m"),
    "pythia-1b": HookedTransformer.from_pretrained("pythia-1b"),
    "pythia-1.4b": HookedTransformer.from_pretrained("pythia-1.4b"),
    "pythia-2.8b": HookedTransformer.from_pretrained("pythia-2.8b"),

    "gpt2-small": HookedTransformer.from_pretrained("gpt2"),
    "gpt2-medium": HookedTransformer.from_pretrained("gpt2-medium"),
    "gpt2-large": HookedTransformer.from_pretrained("gpt2-large"),
    "gpt2-xl": HookedTransformer.from_pretrained("gpt2-xl"),
}

Loaded pretrained model pythia-70m into HookedTransformer
Loaded pretrained model pythia-160m into HookedTransformer
Loaded pretrained model pythia-410m into HookedTransformer
Loaded pretrained model pythia-1b into HookedTransformer
Loaded pretrained model pythia-1.4b into HookedTransformer
Loaded pretrained model pythia-2.8b into HookedTransformer
Loaded pretrained model gpt2 into HookedTransformer
Loaded pretrained model gpt2-medium into HookedTransformer
Loaded pretrained model gpt2-large into HookedTransformer
Loaded pretrained model gpt2-xl into HookedTransformer


In [24]:
# how much cuda memory taken
t = torch.cuda.get_device_properties(0).total_memory
r = torch.cuda.memory_reserved(0)
a = torch.cuda.memory_allocated(0)
f = r-a  # free inside reserved
print(f"Total: {t*1e-9}, Reserved: {r*1e-9}, Allocated: {a*1e-9}, Free: {f*1e-9}")

Total: 84.986691584, Reserved: 38.247858176, Allocated: 38.035869184, Free: 0.21198899200000001


In [31]:
accuracy_df = pd.DataFrame(columns=['IOI-old Loss', 'IOI-old Accuracy', 'IOI Loss', 'IOI Accuracy', 'Sports Loss', 'Sports Accuracy', 'OWT Loss'])

for model_name, model in tqdm(models.items()):
    if "pythia" in model_name:
        # print(f"For {model_name} on IOI, loss is {ioi_pythia.get_test_loss(model)}, ioi accuracy is {ioi_pythia.get_test_accuracy(model)}, overall accuracy is {ioi_pythia.get_test_accuracy(model, check_all_logits=True)}")
        # print(f"For {model_name} on Sports, loss is {sports_pythia.get_test_loss(model)}, sports accuracy is {sports_pythia.get_test_accuracy(model)}, overall accuracy is {sports_pythia.get_test_accuracy(model, check_all_logits=True)}")

        accuracy_df.loc[model_name] = [ioi_pythia_old.get_test_loss(model).item(), ioi_pythia_old.get_test_accuracy(model), ioi_pythia.get_test_loss(model).item(), ioi_pythia.get_test_accuracy(model), sports_pythia.get_test_loss(model).item(), sports_pythia.get_test_accuracy(model), owt_pythia.get_test_loss(model).item()]
    
    elif "gpt2" in model_name:
        # print(f"For {model_name} on IOI, loss is {ioi_gpt2.get_test_loss(model)}, ioi accuracy is {ioi_gpt2.get_test_accuracy(model)}, overall accuracy is {ioi_gpt2.get_test_accuracy(model, check_all_logits=True)}")
        # print(f"For {model_name} on Sports, loss is {sports_gpt2.get_test_loss(model)}, sports accuracy is {sports_gpt2.get_test_accuracy(model)}, overall accuracy is {sports_gpt2.get_test_accuracy(model, check_all_logits=True)}")

        accuracy_df.loc[model_name] = [ioi_gpt2_old.get_test_loss(model).item(), ioi_gpt2_old.get_test_accuracy(model), ioi_gpt2.get_test_loss(model).item(), ioi_gpt2.get_test_accuracy(model), sports_gpt2.get_test_loss(model).item(), sports_gpt2.get_test_accuracy(model), owt_gpt2.get_test_loss(model).item()]


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for model_name, model in tqdm(models.items()):


  0%|          | 0/10 [00:00<?, ?it/s]

In [32]:
accuracy_df

Unnamed: 0,IOI-old Loss,IOI-old Accuracy,IOI Loss,IOI Accuracy,Sports Loss,Sports Accuracy,OWT Loss
pythia-70m,6.674894,0.5,6.765071,0.47,4.327347,0.36,3.774127
pythia-160m,1.510956,0.98,1.605831,0.93,5.229007,0.428571,3.382833
pythia-410m,1.14594,1.0,1.312516,1.0,1.831539,0.53,2.731288
pythia-1b,1.439045,0.99,1.304217,1.0,1.348832,0.785714,2.732046
pythia-1.4b,1.1466,1.0,1.30569,1.0,1.003749,0.9,2.546326
pythia-2.8b,1.659012,1.0,1.607781,1.0,0.15023,0.928571,2.584105
gpt2-small,0.446071,1.0,0.531604,1.0,2.938488,0.357143,4.077174
gpt2-medium,0.733108,1.0,0.827779,1.0,1.577617,0.83,3.743425
gpt2-large,0.778143,1.0,0.801914,1.0,1.386537,0.642857,3.416237
gpt2-xl,1.086272,1.0,1.010668,1.0,0.568901,0.86,3.477935


In [10]:
pythia_tokenizer([' Erica', ' Samuel', ' Joshua', ' Nicole', ' Rebecca', ' Lindsey', ' Nicole', ' Gregory'], return_tensors='pt')

ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`input_ids` in this case) have excessive nesting (inputs type `list` where type `int` is expected).

In [15]:
# from tasks.inference_utils import generate_text
# generate_text(models[0], pythia_tokenizer, , 1)

HookedTransformer(
  (embed): Embed()
  (hook_embed): HookPoint()
  (blocks): ModuleList(
    (0-5): 6 x TransformerBlock(
      (ln1): LayerNormPre(
        (hook_scale): HookPoint()
        (hook_normalized): HookPoint()
      )
      (ln2): LayerNormPre(
        (hook_scale): HookPoint()
        (hook_normalized): HookPoint()
      )
      (attn): Attention(
        (hook_k): HookPoint()
        (hook_q): HookPoint()
        (hook_v): HookPoint()
        (hook_z): HookPoint()
        (hook_attn_scores): HookPoint()
        (hook_pattern): HookPoint()
        (hook_result): HookPoint()
        (hook_rot_k): HookPoint()
        (hook_rot_q): HookPoint()
      )
      (mlp): MLP(
        (hook_pre): HookPoint()
        (hook_post): HookPoint()
      )
      (hook_attn_in): HookPoint()
      (hook_q_input): HookPoint()
      (hook_k_input): HookPoint()
      (hook_v_input): HookPoint()
      (hook_mlp_in): HookPoint()
      (hook_attn_out): HookPoint()
      (hook_mlp_out): HookPoint()
