In [6]:
import torch
from transformer_lens import HookedTransformer
from datasets import load_dataset

In [2]:
cfact = load_dataset("azhx/counterfact")

Downloading readme: 100%|██████████| 1.12k/1.12k [00:00<00:00, 15.1kB/s]
Downloading data: 100%|██████████| 11.1M/11.1M [00:00<00:00, 22.2MB/s]
Downloading data: 100%|██████████| 1.25M/1.25M [00:00<00:00, 5.54MB/s]
Generating train split: 100%|██████████| 19728/19728 [00:00<00:00, 171696.25 examples/s]
Generating test split: 100%|██████████| 2191/2191 [00:00<00:00, 137739.74 examples/s]


In [47]:
model = HookedTransformer.from_pretrained("Qwen/Qwen2-1.5B", default_padding_side="left")
model.tokenizer.padding_side = "left"



Loaded pretrained model Qwen/Qwen2-1.5B into HookedTransformer


In [3]:
cfact

DatasetDict({
    train: Dataset({
        features: ['case_id', 'pararel_idx', 'requested_rewrite', 'paraphrase_prompts', 'neighborhood_prompts', 'attribute_prompts', 'generation_prompts'],
        num_rows: 19728
    })
    test: Dataset({
        features: ['case_id', 'pararel_idx', 'requested_rewrite', 'paraphrase_prompts', 'neighborhood_prompts', 'attribute_prompts', 'generation_prompts'],
        num_rows: 2191
    })
})

In [4]:
cfact["train"]["requested_rewrite"]

[{'prompt': 'The mother tongue of {} is',
  'relation_id': 'P103',
  'subject': 'Danielle Darrieux',
  'target_new': {'id': 'Q1860', 'str': 'English'},
  'target_true': {'id': 'Q150', 'str': 'French'}},
 {'prompt': 'The official religion of {} is',
  'relation_id': 'P140',
  'subject': 'Edwin of Northumbria',
  'target_new': {'id': 'Q432', 'str': 'Islam'},
  'target_true': {'id': 'Q5043', 'str': 'Christianity'}},
 {'prompt': '{}, the',
  'relation_id': 'P1303',
  'subject': 'Toko Yasuda',
  'target_new': {'id': 'Q5994', 'str': 'piano'},
  'target_true': {'id': 'Q6607', 'str': 'guitar'}},
 {'prompt': '{}, which is located in',
  'relation_id': 'P17',
  'subject': 'Autonomous University of Madrid',
  'target_new': {'id': 'Q34', 'str': 'Sweden'},
  'target_true': {'id': 'Q29', 'str': 'Spain'}},
 {'prompt': 'What is the twin city of {}? It is',
  'relation_id': 'P190',
  'subject': 'Lyon',
  'target_new': {'id': 'Q1461', 'str': 'Manila'},
  'target_true': {'id': 'Q3820', 'str': 'Beirut'}},

In [68]:
def get_key_cov(model, wiki_dataset):
    ''' Estimate E[KK^T] using 100k samples from wiki_dataset. '''
    ...

def correctness_filter(model, dataset, verbose=False):
    ''' Filter out any examples that the model gets wrong. '''
    def get_correctness(model, examples):
        ''' Populate dataset with first token of correct answer and model answer '''
        true_string = [' ' + r['target_true']['str'] for r in examples['requested_rewrite']]
        question_string = [r['prompt'].format(r['subject']) for r in examples['requested_rewrite']]
        print(question_string)
        model.tokenizer.padding_side = "right"
        correct_tokens = list(model.tokenizer(true_string, return_tensors="pt", padding=True)["input_ids"][:, 0])
        model.tokenizer.padding_side = "left"
        question_tokens = model.tokenizer(question_string, return_tensors="pt", padding=True)["input_ids"]
        model_preds = list(torch.argmax(
            model(question_tokens),
            dim=-1
        )[:, 0])
        if verbose:
            print(f"String: {true_string[0]} tokenized as {correct_tokens[0]}, model predicted {model_preds[0]}")

        return {"correct_token": correct_tokens, "model_pred": model_preds}
    
    with torch.no_grad():
        dataset = dataset.map(
            lambda row: get_correctness(model, row),
            batched=True,
            batch_size=1_000
        )
    return dataset

def get_subject_representations(model, dataset):
    ''' Get the subject representations k* for each example in the dataset. '''
    ...

def get_object_representations(model, dataset):
    ''' Get the object representations v* for each example in the dataset. '''
    ...

def get_rome_edit(model, dataset, cov_dataset):
    ''' Implement the ROME edit function. 
        W_hat = W + A(C^-1 * k_star)^T, where:
        k_star = E[mlp_out(x_i + subject)], x_i is random prefix
        C = covariance matrix of keys
        A = (v_star - W * k_star) / (C^-1 * k_star)^T * k_star
        v_star optimizes log probability of outputting correct object and KL with original model
    '''
    k_star = get_subject_representations(model, dataset)
    v_star = get_object_representations(model, dataset)
    C = get_key_cov(model, cov_dataset)
    A = (v_star - model.W * k_star) / torch.matmul(torch.linalg.inv(C), k_star).T * k_star

    return torch.matmul(A, torch.linalg.inv(C).T)

In [67]:
model.tokenizer.decode([323])

' and'

In [69]:
cfact["train"] = correctness_filter(model, cfact["train"], verbose=True)

Map:   0%|          | 0/19728 [00:00<?, ? examples/s]



Map:   5%|▌         | 1000/19728 [00:03<01:00, 310.32 examples/s]

String:  French tokenized as 8585, model predicted 323
['The language used by Timo Koivusalo is', 'What sport does Marián Hossa play? They play', 'Beersheba is a twin city of', 'Phocaea is in', 'The Long Winters was founded in', 'George Mihai Florescu plays in the position of', 'The Chi-Lites is represented by music label', 'Coomera River can be found in', 'Dziga Vertov passed away in', 'Svenn Stray worked in the city of', 'Shironamhin, that was formulated in', 'The original language of Le Magnifique was', 'Where is Cleveland Classic? It is located in', 'Calder Freeway, in', 'The location of Radical War is', 'Pawn Stars debuted on', 'Windows Server 2003 is a product of', 'Marcelin Pleynet is a native speaker of', 'Halchidhoma is located in', 'Michel Pablo is originally from', 'Mohammad Hatta writes in', 'The original language of Doida Por Ti was', 'The location of Regina Coeli is', 'The language of The Harvest Month was', 'Robbery Under Arms, that originated in', 'Chakrabongse Bhuvanad

Map:  10%|█         | 2000/19728 [00:06<00:59, 295.92 examples/s]

String:  Finnish tokenized as 57853, model predicted 323
['Sydney Brenner works in the field of', 'George Gershwin plays', 'Alabama Shakes was founded in', 'Ellar Coltrane works as', 'Friday Night Videos was released on', 'Pietro Foscari, who has the position of', 'Sgt. Frog, that was created in', 'Co-operative College, whose headquarters are in', 'The headquarter of Cardiff University is located in', 'Honda Legend, produced by', 'Louis-Ernest Dubois has the position of', 'Pavol Demitra is a citizen of', 'The Gullet is in', 'Namkeen, that originated in', 'Markus Halsti plays in the position of', 'North Holland is named after', 'MacPaint was created by', 'Pennsylvania Route 198, in', 'The genre played by Hugo Weisgall is', 'The law in Aceh declares the language', 'Angelo Comastri holds the position of', 'Albert Eulenburg died at', 'Amazing Nurse Nanako was developed in', 'Kamikita District is located in the country of', 'Soulbender, that was formed in', 'The occupation of John Gawsworth

Map:  15%|█▌        | 3000/19728 [00:09<00:51, 325.64 examples/s]

String:  biology tokenized as 33358, model predicted 323
['Marat Makhmutov originates from', 'Hohenwestedt, in', 'Jean-Baptiste Philibert Willaumez, speaker of', 'Yvonne Monlaur, a native', 'The language of By the Light of a Star was', 'Mario Monti, who holds a citizenship from', 'The original language of The Divergent Series is', 'Bobby Orr, the', "The language of L'Age d'Or was", 'Lisa Appignanesi, a citizen of', 'Feijoada from', 'Belle du Seigneur, developed in', 'Buraswa, in', 'Howe Yoon Chong, who has a citizenship from', 'Patrice Leconte spoke the language', 'Ansonica was created in the country of', 'Daniel Ernst Jablonski worked in', 'Mount Queequeg is a part of the continent of', 'The law in Serbian Orthodox Church declares the language', 'Pinoy Idol, that was from', 'Paul Justin, who plays the position', 'Republic Airways Holdings, whose headquarters are in', 'Pennsylvania Route 372 is located in', 'The Chevrolet Tele-Theatre was originally aired on', 'Nintendo 3DS, a product 

Map:  20%|██        | 4000/19728 [00:12<00:50, 310.19 examples/s]

String:  Moscow tokenized as 22415, model predicted 323
['Johnny Hallyday is a native speaker of', "Kazuo Ishiguro's profession is an", 'Josh Beckett, the', 'Berwick Glacier belongs to the continent of', 'Minoru Yasui succumbed at', 'Melodifestivalen 2013, that was developed in', 'Which position does Lou Saban play? They play as', 'Sven Elvestad writes in', 'The domain of work of Govert Bidloo is', 'John XXI is a', 'Sallust writes in', 'The language of Monsieur Gangster is', 'Honda EV Plus is a product of', 'Ken Follett is originally from', 'Krisdayanti, who has a citizenship from', 'The mother tongue of Francis de Croisset is', 'Skanderbeg follows the religion of', "The language used by Ch'ien Mu is", 'Vicente Benavides, who is a citizen of', 'Paschal II holds the position of', 'Valdemar I of Denmark is follower of', 'Akhoond follows the religion of', 'Serge Nigg died in', 'The language of Office Romance is', 'Taipei is a twin city of', 'Leone Caetani writes in', 'Maria Fiore speaks',

Map:  25%|██▌       | 5000/19728 [00:16<00:49, 299.26 examples/s]

String:  French tokenized as 8585, model predicted 323
['John Joseph Williams holds the title of', 'In Friesland, the language spoken is', 'Palito Ortega writes in', 'The Third of May 1808, that was from', "Sean O'Hagan, the", 'Elizabeth Eastlake writes in', 'Indiana State Museum owner', 'What does Mocean Worker play? They play', 'Sumayyah bint Khayyat is follower of', 'IBM 2780/3780 is a product of', 'The law in Pakistan declares the language', "Karl Menger's area of work is", 'The genre played by Zutty Singleton is', 'Boroughmuir High School is located in', 'In Switzerland, they understand', "Michael Denis's profession is a", 'Tommy Harper plays as', 'The law in Uttar Pradesh declares the language', 'Danish Film Institute, whose headquarters are in', 'Gmina Bodzentyn, which is located in', 'Federally Administered Tribal Areas is located in', 'Piet Mondrian found employment in', 'Jesse McCartney works as', 'What does Art Tatum play? They play', 'International Trade Centre is headquart

Map:  30%|███       | 6000/19728 [00:19<00:43, 312.40 examples/s]

String:  bishop tokenized as 53206, model predicted 323
['Helsinki is located in', 'The headquarter of Phaidon Press is in', 'Samuel Wilberforce holds the position of', 'Vladimir Mayakovsky is a native speaker of', 'Clement II holds the title of', "Olive Blakeney's profession is a", 'Gray Davis speaks', 'The Jean Arthur Show was originally aired on', 'CHUM Limited is based in', 'Earl Klugh, the', 'Jake Rudock plays in the position of', 'Rang De Basanti was created in the country of', 'Telelogic, from', 'The language used by Fulvio Tomizza is', 'Road Rules was released on', 'Napoleon III used to work in', 'Ray Stubbs is employed by', 'Bristol & West is headquartered in', 'In Bolama, they understand', 'Hockey Night in Canada originated in', 'Sylvain Chauveau is originally from', 'Tomy Drissi is native to', 'Portuguese cuisine, from', 'Samuel Peak is in', 'The mother tongue of Henri Garat is', "Eliakim Carmoly's life ended in", 'What is the twin city of Warsaw? It is', 'In Ireland, the la

Map:  35%|███▌      | 7000/19728 [00:21<00:38, 332.88 examples/s]

String:  Europe tokenized as 4505, model predicted 323
['Nokia 1100, created by', 'Waverley Root died in', 'Marmaduke Hussey, Baron Hussey of North Bradley, of', 'Which position does Dan LeFevour play? They play as', 'United Launch Alliance owner', 'High Rollers debuted on', 'Pentagram formed in', 'Derrick Sherwin writes in', 'The Ray Bradbury Theater was originally aired on', 'Derek Paravicini, the', 'The original language of A Serbian Film was', 'Cape Royds is in', 'Night Zoo was created in the country of', 'Blackadder, who plays', 'Berchtold Haller passed away in', 'Inoue Genan Inseki originates from', 'Augustine of Hippo follows the religion of', "Rank Organisation's headquarters are in", 'Plymouth Rock can be found in', 'What sport does Antonio Nocerino play? They play', 'The twin city of Mexico City is', 'Orly Airport, which is named for', 'Sapo National Park is located in the country of', 'Bundesautobahn 8, from', 'Peter Bonerz is native to', 'Isabel Bassett, who works as', 'Dig

Map:  41%|████      | 8000/19728 [00:25<00:35, 328.74 examples/s]

String:  Nokia tokenized as 35706, model predicted 323
['Jamie Storr plays in the position of', 'Ball Park Music started in', 'The law in Paltamo declares the language', 'Hans Lassen Martensen holds the position of', 'Slobodna Dalmacija was from', 'The language of Vettai was', 'Sabratha is located in the country of', 'Free National Movement is located in', 'Rich Text Format, a product manufactured by', 'The occupation of William Bowen Rowlands is', 'In Pervomaysky District, Yaroslavl Oblast, they understand', 'Mantecadas, that was created in', 'Karl Malone plays', "Damian O'Flynn was native to", 'Adolphe Schneider spoke the language', 'St. Elsewhere was released on', 'Madcon was started in', 'Antoine Augustin Cournot works in the area of', 'Son Amores was created in', 'George Best Belfast City Airport, which was named after', 'Berliner Zeitung is written in', 'The original language of Gia was', 'Michel Delebarre, a native', 'Walter Zenga is a professional', 'Daniel Mananta originated f

Map:  46%|████▌     | 9000/19728 [00:28<00:32, 325.71 examples/s]

String:  goaltender tokenized as 79622, model predicted 323
['Windows Movie Maker is developed by', 'Czechoslovakia is a part of the continent of', 'Google Latitude, developed by', 'Sachiko Kobayashi has a citizenship from', 'Martin Kuhl plays as', 'Dynaco started in', 'Fort William Point is in', "Robert I. Soare's profession is an", 'Jugurtha died in', 'Interface Builder was created by', 'Claude Louis Berthollet works in the field of', 'Givers was founded in', 'Valspar, that originated in', 'The original language of Ma vie en rose was', 'Evan Bayh, who works as', 'Gmina Warlubie, located in', 'Anwar el Sadat follows the religion of', 'In Central Visayas, they understand', 'The original language of Calamari Union was', 'In Kitee, an official language is', 'New York City is a twin city of', 'Kingdom of the Netherlands, which has the capital city', 'Malak Karsh passed away at', 'Raisul Islam Asad is affiliated with the religion', 'Colmar Freiherr von der Goltz has a citizenship from', 'L

Map:  51%|█████     | 10000/19728 [00:31<00:29, 329.76 examples/s]

String:  Microsoft tokenized as 5100, model predicted 323
['The twin city of Tianjin is', 'Alexandre Trauner is a citizen of', 'The twin city of Barcelona is', 'Giacomo Devoto worked in the city of', 'Ibn al-Qalanisi expired at', 'Afro-dite from', 'Meyer Guggenheim, who has the position of', 'Case Western Reserve University is based in', 'Santi Cazorla, the', 'PGM-17 Thor is produced by', 'Robert James Manion worked in the city of', 'The Xcerts was started in', 'City of Cranbourne, in', 'Valery Sigalevitch, playing the', 'Geoff Eales plays the instrument', "Comintern's headquarters are in", 'Adare Manor, in', "Haruko Sugimura's profession is a", 'Saint Boniface is a', 'In Brussels-Capital Region, an official language is', 'Karl Marx took up work in', 'The Vagabonds originated in', 'Cmolas, which is located in', 'The expertise of Raymond Smullyan is', 'The original language of Hip Hop Is Dead is', 'Pat Haden, the', 'Tina Turner is a native speaker of', 'The expertise of Granius Licinian

Map:  56%|█████▌    | 11000/19728 [00:34<00:27, 322.64 examples/s]

String:  Philadelphia tokenized as 19335, model predicted 323
['Seven Servants, that was formulated in', "Romano d'Ezzelino, which is located in", 'Rafael Alberti, who holds a citizenship from', 'Cowboy Junkies from', 'Bahamas Football Association belongs to the organization of', 'The World Sinks Except Japan, created in', 'The language used by Louis Bonaparte is', 'Silvia Monfort is a native speaker of', 'The mother tongue of Christiane Martel is', 'La famille Plouffe is located in', 'Yoav Gelber writes in', "Bertold Hummel's profession is an", 'Otto Brahm worked in', 'Kenneth Cragg has the position of', 'Michel Massot is known for performing', 'Porsche 914, produced by', 'Shaan Shahid speaks', 'What does Joe Wilder play? They play', 'The genre played by Lucky Louie is', 'In Republic of Mordovia, the language spoken is', 'Apple A8, a product of', 'Scott Island belongs to the continent of', 'Coast Mountains, in', 'Qasim Amin was originally from', 'Beirut is a twin city of', 'George Per

Map:  61%|██████    | 12000/19728 [00:37<00:23, 328.14 examples/s]

String:  Germany tokenized as 9856, model predicted 323
['The law in Navarre declares the language', 'Ole Ritter is a citizen of', 'Mansaf from', "Dermot O'Leary, who is employed by", 'Ojos del Salado, in', 'This Et Al was created in', 'Yvan Goll, a native', 'Moses Malone plays', 'Himyarite Kingdom follows the religion of', "Ian Khama's occupation is", 'The law in United Nations declares the language', "Charles Kaiser's profession is a", 'Honda S600, produced by', 'Sony Mavica is a product of', 'Gerald Frederick Kicanas holds the position of', 'Suzuki Alto, created by', 'Namor is a part of the', 'EA-18G Growler was a product of', 'Yevgeny Yevtushenko is a native speaker of', 'Park Seo-joon originates from', 'The genre played by Ola Kvernberg is', 'The original language of Die Zeit was', 'RC-135 is created by', 'Embrik Strand is a citizen of', 'Leonidas Polk is a', 'The language used by Louis Loucheur is', 'The headquarter of Grameen Bank is in', 'Koerner, Ray & Glover, that was created

Map:  66%|██████▌   | 13000/19728 [00:41<00:21, 308.35 examples/s]

String:  Spanish tokenized as 15154, model predicted 323
['Ezhupunna is within', 'What is the twin city of Luanda? It is', 'Singarapettai, which is located in', 'Ge You originated from', 'Eustache-Hyacinthe Langlois, a native', 'Jean Marais writes in', 'Suzuki Ignis is a product of', 'Sunrise Party is headquartered in', 'Honda Life, produced by', 'Eric VI of Denmark was originally from', 'Bandila, that was from', 'The language of Air Mata Iboe was', 'Ivan Sechenov specializes in', 'Tarak Ben Ammar holds a citizenship from', 'Carpathian Mountains, in', 'Ohio gubernatorial elections is within', 'Michael Ben-Ari originated from', 'Mario Scelba writes in', 'In Tornio, an official language is', 'Oleta Adams originates from', 'The language of Schrei was', 'Jaromír Jágr professionally plays the sport', 'One Big Happy (sitcom) debuted on', 'Christopher Smart, speaker of', 'Hilir Perak is located in', 'Cuba libre, which was called after', 'Kola Real, that was created in', 'Post-Soviet states is

Map:  71%|███████   | 14000/19728 [00:44<00:19, 300.15 examples/s]

String:  Kerala tokenized as 60407, model predicted 323
['Francis Hagerup was employed in', 'Pentium 4 was a product of', 'Normandy landings can be found in', 'Forster Ice Piedmont is located in the continent', 'Rudolf Loman is originally from', 'Tomasz Kamusella has a citizenship from', 'KSTP-FM is within', 'The domain of work of August Weismann is', 'Taha Hussein, who is a citizen of', 'Plus belle la vie was created in', 'Dutch disease, which is called after', 'Toyota Avalon, created by', 'Kurt Vile that was founded in', 'Kay Goldsworthy holds the position of', 'Matson, Inc. is based in', 'Patrick Roy, the', 'In Republic of North Ossetia-Alania, they understand', 'The law in Sakha Republic declares the language', 'Combat Hospital was developed in', 'Louane, a native', 'Carlo Porta speaks', 'Ferrari 212 Inter, developed by', 'The official language of Netherlands is', 'Keltie Glacier is located in', 'Maria Reining was employed in', 'Buichi Terasawa, who has a citizenship from', 'The ge

Map:  76%|███████▌  | 15000/19728 [00:47<00:15, 311.17 examples/s]

String:  Oslo tokenized as 57858, model predicted 323
['Hammarby IF DFF, which is located in', 'The law in Seychelles declares the language', 'Ted Robert Gurr is native to', 'Deborah Wiles is originally from', 'Neil Hagerty plays the', 'The Home Court is to debut on', 'Adam Amin works for', 'Ethel Waters performs', 'Mike Harrington works for', 'chorizo, that originated in', 'The domain of work of Antoine Augustin Cournot is', 'Granville Sharp speaks the language', 'Yahoo Search is from', 'Thanh Lam, who has a citizenship from', "Mongolian People's Republic is follower of", 'Scott Milanovich, the', 'John Hiatt plays the', 'Skeletonwitch started in', 'A News was from', 'Spain national football team is a part of the', 'The language of A Very Private Affair is', 'The language of La Haine was', 'Eric Gill is originally from', "Nilakantha Somayaji's domain of activity is", 'In Panama, the language spoken is', 'Warfaze was created in the country of', 'Alcatel-Lucent, by', 'Zehra Sheerazi is o

Map:  81%|████████  | 16000/19728 [00:51<00:12, 297.48 examples/s]

String:  Sweden tokenized as 23190, model predicted 323
['Michel Bernstein lost their life at', 'Henri Bouchard passed away at', 'Suzuki RGV500 is developed by', 'Castrovirreyna District, in', 'Operation Barbarossa can be found in', 'Doug Mitchell Thunderbird Sports Centre owner', 'Pedro Beato, the', 'Pugh Rogefeldt, who is a citizen of', 'Ajinomoto Stadium, by', 'Marc Ribot plays the', 'Alain de Cadenet is employed by', 'La Paz is a twin city of', 'The twin city of London is', 'Julian Rachlin, playing the', 'Graciliano Ramos, a citizen of', 'Copenhagen is a twin city of', 'Karachi Cantonment is located in', 'Carnival Films, whose headquarters are in', 'Jeff Francis, who plays the position', 'The mother tongue of Learco Guerra is', 'Johann Adolph Hasse, who plays', 'Lionel Nathan de Rothschild worked in', 'Pius VII, whose position is that of', 'George Plimpton is a professional', 'Gustaaf Schlegel, speaker of', 'Mother 3 is developed by', 'Cheers Beacon Hill formed in', 'In Democratic 

Map:  86%|████████▌ | 17000/19728 [00:53<00:08, 315.10 examples/s]

String:  Paris tokenized as 12095, model predicted 323
['LBi, that was created in', 'The law in ASEAN declares the language', 'Gregor Mendel works in the area of', 'Tesco Organisation, located in', 'The Coca-Cola Company was started in', 'Ward Cunningham, of', 'Abd al-Qadir al-Husayni is native to', 'The Murder City Devils that was founded in', 'The language of Paradise Ranch is', 'Hamtaro originated in', 'Dodge Omni is a product of', 'Wilhelm Krause works in the area of', 'Zelzal-2 is developed by', 'Dunedin Range is located in the continent', 'The twin city of Casablanca is', 'Leser Landshuth was employed in', 'Tokugawa shogunate follows the religion of', 'South Indian Lake, Manitoba can be found in', "Ernest Lawrence's domain of work is", 'Then Came Bronson is to debut on', 'Yutaka Enatsu plays in the position of', 'The Gene Autry Show premieres on', 'tatami, that was formulated in', 'Curtis Amy performs', 'Colefax Group, whose headquarters are in', 'Zarah Leander died at', 'Thielem

Map:  91%|█████████ | 18000/19728 [00:57<00:05, 316.20 examples/s]

String:  Amsterdam tokenized as 37741, model predicted 323
['The profession of Beata Tyszkiewicz is', 'The Naked Vicar Show, that was formulated in', 'Amol Palekar is native to', 'Paul Desjardins originates from', 'Said Seyam belongs to the organization of', 'George Hardinge speaks the language', 'Mentors started in', 'What is the twin city of Istanbul? It is', 'Cliff Morgan, of', 'The language of Captain Khorshid was', 'Wolf Gang started in', 'Georg Apenes worked in', 'Grace Lynn Kung is originally from', 'In Kaskinen, they understand', 'Braden Holtby plays in the position of', 'The language of France Info is', 'Napoleon Chagnon specializes in', 'Herman Wilhelm Bissen, who has a citizenship from', 'The headquarters of Library House is in', 'Super Audio CD is created by', 'Alevi is follower of', 'The Reputation, that was started in', 'Brzydula was developed in', 'Stephen Sackur, of', 'James Watt worked in the city of', 'What sport does Robbie Keane play? They play', 'Tile Ridge is loca

Map:  96%|█████████▋| 19000/19728 [01:00<00:02, 316.88 examples/s]

String:  actor tokenized as 12089, model predicted 323
['Mount Cocks is a part of the continent of', 'Bobobobs was from', 'Saint Margaret of Scotland lost their life at', 'Brock Lesnar professionally plays the sport', 'Lanyon Place railway station is located in', 'Ellen Gleditsch, who is a citizen of', 'Byron White professionally plays the sport', 'Eduardo Paes has the position of', 'Mookencheril Cherian Joseph has a citizenship from', "Sydney Theatre Company's headquarters are in", 'Sultan Ahmed Mosque is in', 'Debdas, formulated in', 'Call Cobbs, Jr. performs', "Michele Giordano's position is", 'Brian Paddick found employment in', 'Charles Grafly died in the city of', 'What does Cold Comfort Farm play? They play', 'Michel Peissel writes in', 'IBM Personal Computer, produced by', 'Haymo of Halberstadt holds the title of', 'Edvard Hagerup Bull was employed in', 'Tebey holds a citizenship from', 'Scotia Bay is located in', 'Jazz Party performs', 'Jeffrey Archer found employment in', 'Ye

Map: 100%|██████████| 19728/19728 [01:02<00:00, 314.49 examples/s]

String:  Antarctica tokenized as 71687, model predicted 323





In [50]:
cfact["train"]["correct_token"][0]

43197

In [51]:
model.tokenizer(cfact["train"]["requested_rewrite"][0]["target_true"]["str"])

{'input_ids': [43197], 'attention_mask': [1]}

In [None]:
# Usage: model.W_out[layer] += get_rome_edit(model)