## Model Analysis for Indirect Object Identification (IOI) in GPT2
Blackbox analysis of GPT 2 small. 

In [771]:
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, top_k_top_p_filtering
import torch
import random
import tensorflow as tf
from tabulate import tabulate
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
from torch import nn

config = AutoConfig.from_pretrained("gpt2")
gpt2tokenizer = AutoTokenizer.from_pretrained("gpt2")
gpt2model = AutoModelForCausalLM.from_pretrained("gpt2")

gpt2ltokenizer = AutoTokenizer.from_pretrained("gpt2-large")
gpt2lmodel = AutoModelForCausalLM.from_pretrained("gpt2-large")

neo13model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")
neo13tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")

neo125model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neo-125M")
neo125tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-125M")

In [772]:
# completion generation
def get_completion(prompt, model = gpt2model, tokenizer=gpt2tokenizer, wPrompt=True):
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"]
    # get logits of last hidden state
    next_token_logits = model(**inputs).logits[:, -1, :]
    # filter
    filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=1, top_p=1.0)
    # sample
    probs = nn.functional.softmax(filtered_next_token_logits, dim=-1)
    next_token = torch.multinomial(probs, num_samples=1)
    if wPrompt:
        generated = torch.cat([input_ids, next_token], dim=-1)
    else:
        generated = next_token
    resulting_string = tokenizer.decode(generated.tolist()[0])
    return  resulting_string 

# completion generation
def get_logprobs(prompt, n=10, model=gpt2model, tokenizer=gpt2tokenizer):
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"]
    # get logits of last hidden state
    next_token_logits = model(**inputs).logits[:, -1, :]
    # filter
    filtered_next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=n, top_p=1.0)
    # sample
    probs = nn.functional.softmax(filtered_next_token_logits, dim=-1)
    next_tokens = torch.multinomial(probs, num_samples=n)
    problist = probs.tolist()[0] #get a list of all the probabilities indexed with their token
    tokenlist = next_tokens.tolist()[0] #get a list of all the tokens, size n, from highest probability to lowest.
    output = {}
    for token in tokenlist:
        word = tokenizer.decode(token)
        output[word] = problist[token]
    output = dict(sorted(output.items(), reverse=True, key=lambda item: item[1]))
    return  output

    # plot a graph of two log probabilities of the same model with two different prompts
def plot_compare_prompt_logprobs(prompt1, prompt2,xaxis, yaxis, model=gpt2model, tokenizer=gpt2tokenizer):
    lp1 = get_logprobs(prompt1, model=model, tokenizer=tokenizer)
    lp2 = get_logprobs(prompt2, model=model, tokenizer=tokenizer)
    data = {k: (lp1.get(k, 0), lp2.get(k,0)) for k in lp1.keys() | lp2.keys()}
    fig = go.Figure(
        data=go.Scatter(
            x=[v[0] for v in data.values()],
            y=[v[1] for v in data.values()],
            mode='markers+text',
            text=list(data.keys()),
            textposition="bottom center"
                       )
                    )
    fig.update_layout(
    title={
        'text': "GPT 2 small",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    xaxis_title={
            'text': xaxis
        },
    yaxis_title={
            'text': yaxis
        }
    
    )

    fig.show()

        # plot two graphs side by side
def plot_two(prompt1, prompt2, prompt3, prompt4, xaxis1, yaxis1, xaxis2, yaxis2, model=gpt2model, tokenizer=gpt2tokenizer):
    fig = make_subplots(rows=1, cols=2)
    lp1 = get_logprobs(prompt1, model=model, tokenizer=tokenizer)
    lp2 = get_logprobs(prompt2, model=model, tokenizer=tokenizer)
    data = {k: (lp1.get(k, 0), lp2.get(k,0)) for k in lp1.keys() | lp2.keys()}
    fig.add_trace( 
        go.Scatter(
            x=[v[0] for v in data.values()],
            y=[v[1] for v in data.values()],
            mode='markers+text',
            text=list(data.keys()),
            textposition="bottom center"
           ),
        row=1, col=1
                 )
    lp1 = get_logprobs(prompt3, model=model, tokenizer=tokenizer)
    lp2 = get_logprobs(prompt4, model=model, tokenizer=tokenizer)
    data = {k: (lp1.get(k, 0), lp2.get(k,0)) for k in lp1.keys() | lp2.keys()}
    fig.add_trace( 
        go.Scatter(
            x=[v[0] for v in data.values()],
            y=[v[1] for v in data.values()],
            mode='markers+text',
            text=list(data.keys()),
            textposition="bottom center"
           ),
        row=1, col=2
                 )
    fig.update_layout(
    title={
        'text': "test",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
    )
    
    # update axis
    fig.update_xaxes(title_text=xaxis1, row=1, col=1)
    fig.update_xaxes(title_text=xaxis2, row=1, col=2)
    fig.update_yaxes(title_text=yaxis1, row=1, col=1)
    fig.update_yaxes(title_text=yaxis2, row=1, col=2)

    fig.show()
    

In [773]:
def avgcorrect(engine, model, tokenizer, ntests = 100,):
    results = [[0,0]]*ntests
    for x in range(ntests):
        A = random.choice(Alist)
        B = random.choice(Blist)
        P = random.choice(Plist)
        O = random.choice(Olist)
        prompt = f"When {A} and {B} got a {O} at the {P}, {B} decided to give it to"
        lps = get_logprobs(prompt, model=model, tokenizer=tokenizer)
        results[x] = [lps[f' {A}'], lps[f' her']]

    avg_vals = [0,0]
    for result in results:
            avg_vals[0]+=result[0]
            avg_vals[1]+=result[1]
    ntests= 100/ntests
    avg_vals[0] = avg_vals[0] * ntests
    avg_vals[1] = avg_vals[1] * ntests
    return ([[engine, '{0:.3f}'.format(avg_vals[0]), '{0:.3f}'.format(avg_vals[1]),'{0:.3f}'.format(avg_vals[0]-avg_vals[1])]])


def performance(engine, model, tokenizer, ntests = 100):
    score = 0
    for x in range(ntests):
        A = random.choice(Alist)
        B = random.choice(Blist)
        P = random.choice(Plist)
        O = random.choice(Olist)
        prompt = f"When {A} and {B} got a {O} at the {P}, {B} decided to give it to"
        prediction = get_completion(prompt, model=model, tokenizer=tokenizer, wPrompt=False)
        if prediction == f" {A}" or prediction == f" him":
            score += 1

    return score

def performance4(engine, model, tokenizer, ntests = 100):
    score = 0
    for x in range(ntests):
        print(x)
        samples = random.sample(Alist, 4)
        A = samples[0]
        B =  samples[1] #person B
        C =  samples[2] #person C
        D =  samples[3] #person D
        P = random.choice(Plist) #place
        prompt = f"When {A}, {B}, {C}, and {D} were at the {P}, {D} was with {A}, and {B} was with"
        prediction = get_completion(prompt, model=model, tokenizer=tokenizer, wPrompt=False)
        if prediction == f" {C}":
            score += 1

    return score


## First steps
Initially, we should recreate the behavior as stated in the document and ensure that behavior is not being caused by obvious/simple pattern recognitions.

In [774]:
Alist = ['Bob', 'Josh', 'Nathan', 'John', 'George', 'Michael', 'Steve']
Blist = ['Alice', 'Jess', 'Kim', 'Mary', 'Sophia', 'Mia', 'Linda']
Plist = ['palace', 'house', 'grocery store', 'market', 'warehouse', 'restaurant']
Olist = ['ring', 'coin', 'vase', 'flower', 'card', 'key']

In [775]:
A = random.choice(Alist) #person A
B = random.choice(Blist) #person B
P = random.choice(Plist) #place
O = random.choice(Olist) #object

In [776]:
prompt1 = f"When {A} and {B} got a {O} at the {P}, {B} decided to give it to"
prompt2 = f"When {B} and {A} got a {O} at the {P}, {B} decided to give it to"
prompt3 = f"When {A} and {B} got a {O} at the {P}, {A} decided to give it to"
prompt4 = f"When {B} and {A} got a {O} at the {P}, {A} decided to give it to"

In [777]:
print(get_completion(prompt1, wPrompt=True))
print(plot_compare_prompt_logprobs(prompt1, prompt2, f"{A} {B} {O} {P} {B}", f"{B} {A} {O} {P} {B}"))
print(plot_compare_prompt_logprobs(prompt3, prompt4, f"{A} {B} {O} {P} {A}", f"{B} {A} {O} {P} {B}"))

When John and Sophia got a flower at the warehouse, Sophia decided to give it to John


None


None


In [82]:
print("gpt2")
gpt2prob = avgcorrect("gpt2", gpt2model, gpt2tokenizer, 100)
print("gpt-neo-125m")
neo125prob = avgcorrect("gpt-neo-125m",neo125model, neo125tokenizer, 100)
print("gpt-neo-1.3b")
neo13prob = avgcorrect("gpt-neo-1.3b",neo13model, neo125tokenizer, 100)

gpt2
gpt-neo-125m
gpt-neo-1.3b


In [84]:
print(tabulate([['model', 'avg correct', 'avg incorrect', 'avg net']] + gpt2prob + neo125prob + neo13prob))

------------  -----------  -------------  -------
model         avg correct  avg incorrect  avg net
gpt2          63.714       10.947         52.768
gpt-neo-125m  50.717       7.321          43.396
gpt-neo-1.3b  58.498       16.566         41.932
------------  -----------  -------------  -------


In [762]:
print("gpt2")
gpt2score = performance("gpt2", gpt2model, gpt2tokenizer, 1000)
print("gpt2-large")
gpt2lscore = performance("gpt2-large", gpt2lmodel, gpt2ltokenizer, 1000)
print("gpt-neo-125m")
neo125score = performance("gpt-neo-125m",neo125model, neo125tokenizer, 1000)
print("gpt-neo-1.3b")
neo13score = performance("gpt-neo-1.3b",neo13model, neo125tokenizer, 1000)


gpt2
gpt2-large
gpt-neo-125m
gpt-neo-1.3b


In [770]:
print(tabulate([['engine', 'score/1000']] + [['gpt2', gpt2score], ['gpt2-large', gpt2lscore],['gpt-neo-125m', neo125score], ['gpt-neo-1.3b', neo13score]]))

------------  ----------
engine        score/1000
gpt2          1000
gpt2-large    994
gpt-neo-125m  986
gpt-neo-1.3b  991
------------  ----------


### gender bias
gender bias seems to be present but in much smaller scales, similar to gpt neo 125m

In [129]:
MA = "Eric" #male A
MB = "George" #male B
FA = "Jess" #female A
FB = "Kim" #female B
P = "shop" #place
O = "plate" #object

In [130]:
prompt1 = f"When {MA} and {MB} got a {O} at the {P}, {MB} decided to give it to"
prompt2 = f"When {FB} and {FA} got a {O} at the {P}, {FB} decided to give it to"

In [131]:
plot_compare_prompt_logprobs(prompt1, prompt2, "All Male", "All Female")

In [207]:
samples = random.sample(Alist, 3)
A1 = "Alice"
B1 =  samples[0] #person B
C1 =  samples[1] #person C
D1 =  samples[2] #person D
P = random.choice(Plist) #place

samples = random.sample(Alist, 3)
A2 = samples[0] #person A
B2 =  "Alice"
C2 =  samples[1] #person C
D2 =  samples[2] #person D


samples = random.sample(Alist, 3)
A3 = samples[0] #person A
B3 =  samples[1] #person B
C3 =  "Alice"
D3 =  samples[2] #person D


samples = random.sample(Alist, 3)
A4 = samples[0] #person A
B4 =  samples[1] #person B
C4 =  samples[2] #person C
D4 =  "Alice"


In [208]:
prompt1 = f"When {A1}, {B1}, {C1}, and {D1} were at the {P}, {D1} was with {A1}, and {B1} was with"
prompt2 = f"When {A1}, {B1}, {C1}, and {D1} were at the store, {D1} was with {A1}, since {B1} followed"

prompt3 = f"When {A2}, {B2}, {C2}, and {D2} were at the {P}, {D2} was with {A2}, and {B2} was with"
prompt4 = f"When {A2}, {B2}, {C2}, and {D2} were at the store, {D2} was with {A2}, since {B2} followed"

prompt5 = f"When {A3}, {B3}, {C3}, and {D3} were at the {P}, {D3} was with {A3}, and {B3} was with"
prompt6 = f"When {A3}, {B3}, {C3}, and {D3} were at the store, {D3} was with {A3}, since {B3} followed"

prompt7 = f"When {A4}, {B4}, {C4}, and {D4} were at the {P}, {D4} was with {A4}, and {B4} was with"
prompt8 = f"When {A4}, {B4}, {C4}, and {D4} were at the store, {D4} was with {A4}, since {B4} followed"



In [209]:
plot_two(prompt1, prompt2, prompt3, prompt4, f"{A1} {B1} {C1} {D1} [{C1}]", f"{A1} {B1} {C1} {D1} followed [{C1}]", f"{A2} {B2} {C2} {D2} [{C2}]", f"{A2} {B2} {C2} {D2} followed [{C2}]")
plot_two(prompt5, prompt6, prompt7, prompt8, f"{A3} {B3} {C3} {D3} [{C3}]", f"{A3} {B3} {C3} {D3} followed [{C3}]", f"{A4} {B4} {C4} {D4} [{C4}]", f"{A4} {B4} {C4} {D4} followed [{C4}]")

In [199]:
samples = random.sample(Alist, 4)
A1 = samples[3]
B1 =  samples[0] #person B
C1 =  samples[1] #person C
D1 =  samples[2] #person D
P = random.choice(Plist) #place

samples = random.sample(Alist, 4)
A2 = samples[0] #person A
B2 =  samples[3]
C2 =  samples[1] #person C
D2 =  samples[2] #person D


samples = random.sample(Alist, 4)
A3 = samples[0] #person A
B3 =  samples[1] #person B
C3 =  samples[3]
D3 =  samples[2] #person D


samples = random.sample(Alist, 4)
A4 = samples[0] #person A
B4 =  samples[1] #person B
C4 =  samples[2] #person C
D4 =  samples[3]


In [200]:
prompt1 = f"When {A1}, {B1}, {C1}, and {D1} were at the {P}, {D1} was with {A1}, and {B1} was with"
prompt2 = f"When {A1}, {B1}, {C1}, and {D1} were at the store, {D1} was with {A1}, since {B1} followed"

prompt3 = f"When {A2}, {B2}, {C2}, and {D2} were at the {P}, {D2} was with {A2}, and {B2} was with"
prompt4 = f"When {A2}, {B2}, {C2}, and {D2} were at the store, {D2} was with {A2}, since {B2} followed"

prompt5 = f"When {A3}, {B3}, {C3}, and {D3} were at the {P}, {D3} was with {A3}, and {B3} was with"
prompt6 = f"When {A3}, {B3}, {C3}, and {D3} were at the store, {D3} was with {A3}, since {B3} followed"

prompt7 = f"When {A4}, {B4}, {C4}, and {D4} were at the {P}, {D4} was with {A4}, and {B4} was with"
prompt8 = f"When {A4}, {B4}, {C4}, and {D4} were at the store, {D4} was with {A4}, since {B4} followed"



In [206]:
plot_two(prompt1, prompt2, prompt3, prompt4, f"{A1} {B1} {C1} {D1} [{C1}]", f"{A1} {B1} {C1} {D1} followed [{C1}]", f"{A2} {B2} {C2} {D2} [{C2}]", f"{A2} {B2} {C2} {D2} followed [{C2}]")

### observation
GPT2 seems to perform suboptimally when more than one agent is present and does not seem to properly understand syntactic structure and when to use which rule consistently, similar to gpt neo small.
Let's compare performance between models.


In [219]:
print("gpt2")
gpt2score = performance4("gpt2", gpt2model, gpt2tokenizer, 1000)
print("gpt2-large")
gpt2lscore = performance4("gpt2-large",gpt2lmodel, gpt2ltokenizer, 1000)
print("gpt-neo-125m")
neo125score = performance4("gpt-neo-125m",neo125model, neo125tokenizer, 1000)
print("gpt-neo-1.3b")
neo13score = performance4("gpt-neo-1.3b",neo13model, neo125tokenizer, 1000)


gpt2
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
2

In [220]:
print(tabulate([['engine', 'score/1000']] + [['gpt2', gpt2score], ['gpt2-large', gpt2lscore],['gpt-neo-125m', neo125score], ['gpt-neo-1.3b', neo13score]]))

------------  ----------
engine        score/1000
gpt2          231
gpt2-large    833
gpt-neo-125m  601
gpt-neo-1.3b  986
------------  ----------


### Observation 1. GPT Neo is better
GPT neo is better at predicting entities in 4 entity settings, this also implies GPT neo is better at identifying certain sentence structure and syntactical meaning.


### Test out previous hypothesis
seems unlikely to work since syntax and context is not understood properly.

In [288]:
samples = random.sample(Alist, 4)
A = samples[3] #person A
B =  samples[0] #person B
C =  samples[1] #person C
D =  samples[2] #person D
P = random.choice(Plist) #place

In [289]:
prompt1 = f"When {A}, {B}, {C}, and {D} were at the {P}, {D} was with {A}, and {B} was with"
prompt2 = f"When {A}, {B}, {C}, and {D} were at the {P}, {D} was with {C} and {A}, and {B} was with"
prompt3 = f"When {A}, {B}, {C}, and {D} were at the {P}, {D} was with {C} and {A}, {A}, and {B} was with"


In [290]:
print(get_completion(prompt1))
plot_two(prompt1,prompt2,prompt1,prompt3, f"Normal [{C}]", f"Force {D}, 2 counts each", f"Normal [{C}]", f"Force {D}, 3 {A}s")
plot_compare_prompt_logprobs(prompt2, prompt3, f"Force {D}, 2 counts each.", f"Force {D}, 3 {A}s")

When John, Michael, Steve, and Bob were at the palace, Bob was with John, and Michael was with John


### Observation 2. Subject can be object
GPT2 small does not seem to impose as great of a penalty as neo models when it comes to not allowing the subject to be the object in the same sentence. As seen in the previous 4 entity example, Josh is predicted to going with Josh.

### Observation 3. Entity Sensitivity
The model seems to be extremely sensitive as to which names are the entities from. Similar to the gender bias, this seems to govern which name gets the highest probability, with ‘common’ names such as ‘John’ and ‘Mary’ getting predicted independent of the syntactic structure.<br>

This also explains the inconsistency, where just by changing the names the model interprets something different going on and chooses a different set of rules.


In [323]:
samples = random.sample(Alist, 2)
A = samples[1] #person A
B =  samples[0] #person B
P = random.choice(Plist) #place
O = random.choice(Olist) #object

In [324]:
prompt1 = f"When {A} and {B} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
print(prompt1)
print(tabulate(list(get_logprobs(prompt1, n=5).items())))

When Bob and Steve were buying an expensive ring at the luxurious house far from home, Steve decided to pass it to
---  ---------
Bob  0.350118
his  0.320828
a    0.136463
the  0.123022
him  0.0695684
---  ---------


In [389]:
Alist = ['Bob', 'Nathan', 'Josh', 'John', 'George', 'Michael', 'Steve']

In [390]:
samples = random.sample(Alist, 3)
A = samples[2] #person A
B =  samples[0] #person B
C =  samples[1] #person C
D =  "Michael"
P = random.choice(Plist) #place

In [391]:
prompt1 = f"When {A}, {B}, {C}, and {D} were at the {P}, {D} was with {A}, and {B} was with"
prompt2 = f"When {A}, {B}, {C}, and {D} were at the {P}, {D} was with {C} and {A}, and {B} was with"
prompt3 = f"When {A}, {B}, {C}, and {D} were at the {P}, {D} was with {A} and {C}, and {B} was with"


In [392]:
print(get_completion(prompt1))
plot_two(prompt1,prompt2,prompt1,prompt3, f"Normal [{C}]", f"Force {D}, {D} was with {C} and {A}", f"Normal [{C}]", f"Force {D}, {D} was with {A} and {C}")
plot_compare_prompt_logprobs(prompt2, prompt3, f"Force {D}, {D} was with {C} and {A}.", f"Force {D}, {D} was with {A} and {C}")

When George, Bob, Steve, and Michael were at the market, Michael was with George, and Bob was with George


In [434]:
Alist = ['Bob', 'Nathan', 'Josh', 'John', 'George', 'Michael', 'Steve']

In [447]:
# low priority names get overriden no matter the context.
A = "Nathan"
B =  "Michael"
C =  "Josh" #high prob
P = random.choice(Plist) #place
O = random.choice(Olist)
prompt1 = f"When {A}, {B}, and {C} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
print(prompt1)
print(tabulate(list(get_logprobs(prompt1, n=5).items())))
prompt1 = f"When {C}, {B}, and {A} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
print(prompt1)
print(tabulate(list(get_logprobs(prompt1, n=5).items())))

When Nathan, Michael, and Josh were buying an expensive vase at the luxurious house far from home, Michael decided to pass it to
------  ---------
his     0.36615
Josh    0.304623
a       0.135985
the     0.111517
Nathan  0.0817252
------  ---------
When Josh, Michael, and Nathan were buying an expensive vase at the luxurious house far from home, Michael decided to pass it to
------  --------
his     0.393708
Nathan  0.186762
Josh    0.180482
a       0.135408
the     0.103639
------  --------


In [448]:
# low priority names even get overriden by pronouns/else
A = "Nathan"
B =  "Josh"
C =  "Michael" #high prob
P = random.choice(Plist) #place
O = random.choice(Olist)
prompt1 = f"When {A}, {B}, and {C} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
print(prompt1)
print(tabulate(list(get_logprobs(prompt1, n=5).items())))
prompt1 = f"When {C}, {B}, and {A} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
print(prompt1)
print(tabulate(list(get_logprobs(prompt1, n=5).items())))

When Nathan, Josh, and Michael were buying an expensive ring at the luxurious palace far from home, Josh decided to pass it to
-------  --------
his      0.344937
the      0.214008
Michael  0.157133
a        0.14748
him      0.136441
-------  --------
When Michael, Josh, and Nathan were buying an expensive ring at the luxurious palace far from home, Josh decided to pass it to
------  --------
his     0.322945
Nathan  0.222458
the     0.188224
a       0.149974
Josh    0.116399
------  --------


In [460]:
#when it comes to high prioirty names, it seems like the one closest after the subject gets assigned (in ambiguous cases)
A = "Josh"
B =  "Nathan"
C =  "George" #high prob
P = random.choice(Plist) #place
O = random.choice(Olist)
prompt1 = f"When {A}, {B}, and {C} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
print(prompt1)
print(tabulate(list(get_logprobs(prompt1, n=5).items())))
prompt1 = f"When {C}, {B}, and {A} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
print(prompt1)
print(tabulate(list(get_logprobs(prompt1, n=5).items())))

When Josh, Nathan, and George were buying an expensive card at the luxurious grocery store far from home, Nathan decided to pass it to
------  ---------
George  0.381823
his     0.265299
the     0.154113
a       0.11874
Josh    0.0800258
------  ---------
When George, Nathan, and Josh were buying an expensive card at the luxurious grocery store far from home, Nathan decided to pass it to
------  ---------
Josh    0.304315
his     0.293166
the     0.158728
a       0.153333
George  0.0904577
------  ---------


In [471]:
#upon further testing, it seems like induction heads try to follow a pattern. If they saw A, B, they will want to repeat A, B
A = "James" #try changing this to Mary, Jake, and John
B =  "Nathan"
C =  "George" #
P = random.choice(Plist) #place
O = random.choice(Olist)
prompt1 = f"When {A}, {C}, and {B} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
print(prompt1)
print(tabulate(list(get_logprobs(prompt1, n=5).items())))
prompt1 = f"When {C}, {A}, and {B} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
print(prompt1)
print(tabulate(list(get_logprobs(prompt1, n=5).items())))

When James, George, and Nathan bought a key at the warehouse, Nathan decided to pass it to
------  ---------
James   0.506292
his     0.177917
the     0.158738
him     0.0789018
George  0.0781512
------  ---------
When George, James, and Nathan bought a key at the warehouse, Nathan decided to pass it to
------  ---------
George  0.503559
his     0.186795
the     0.164365
James   0.0873114
a       0.0579701
------  ---------


In [482]:
A = "James" #try changing this to Mary, Jake, and John
B =  "Nathan"
C =  "George" #
P = random.choice(Plist) #place
O = random.choice(Olist)
prompt1 = f"When {A}, {C}, and {B} bought a {O} at the {P}, {B} decided to pass it to"
print(prompt1)
print(tabulate(list(get_logprobs(prompt1, n=5).items())))
prompt1 = f"When {C}, {A}, and {B} bought a {O} at the {P}, {B} decided to pass it to"
print(prompt1)
print(tabulate(list(get_logprobs(prompt1, n=5).items())))

When James, George, and Nathan bought a key at the warehouse, Nathan decided to pass it to
------  ---------
James   0.506292
his     0.177917
the     0.158738
him     0.0789018
George  0.0781512
------  ---------
When George, James, and Nathan bought a key at the warehouse, Nathan decided to pass it to
------  ---------
George  0.503559
his     0.186795
the     0.164365
James   0.0873114
a       0.0579701
------  ---------


In [740]:
A = "James" #
B =  "Nathan"
C =  "George" #
P = random.choice(Plist) #place
O = random.choice(Olist)

In [741]:
prompt1 = f"When {A}, {C}, and {B} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
prompt2 = f"When {C}, {A}, and {B} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
prompt3 = f"When {A}, {C}, and {B} bought a {O} at the {P}, {B} decided to pass it to"
prompt4 = f"When {C}, {A}, and {B} bought a {O} at the {P}, {B} decided to pass it to"

In [742]:
plot_two(prompt1,prompt2,prompt3,prompt4,f"{A} {C} {B} {B} long",f"{C} {A} {B} {B} long",f"{A} {C} {B} {B} short",f"{C} {A} {B} {B} short")
plot_two(prompt1,prompt3,prompt2,prompt4,f"{A} {C} {B} {B} long",f"{A} {C} {B} {B} short",f"{C} {A} {B} {B} short",f"{C} {A} {B} {B} long")

In [489]:
prompt1 = f"When {A}, {B}, and {C} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
prompt2 = f"When {C}, {B}, and {A} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
prompt3 = f"When {A}, {B}, and {C} bought a {O} at the {P}, {B} decided to pass it to"
prompt4 = f"When {C}, {B}, and {A} bought a {O} at the {P}, {B} decided to pass it to"
plot_two(prompt1,prompt2,prompt3,prompt4,f"{A} {B} {C} {B} long",f"{C} {B} {A} {B} long",f"{A} {B} {C} {B} short",f"{C} {B} {A} {B} short")
plot_two(prompt1,prompt3,prompt2,prompt4,f"{A} {B} {C} {B} long",f"{A} {B} {C} {B} short",f"{C} {B} {A} {B} short",f"{C} {B} {A} {B} long")

In [491]:
prompt1 = f"When {B}, {A}, and {C} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
prompt2 = f"When {B}, {C}, and {A} were buying an expensive {O} at the luxurious {P} far from home, {B} decided to pass it to"
prompt3 = f"When {B}, {A}, and {C} bought a {O} at the {P}, {B} decided to pass it to"
prompt4 = f"When {B}, {C}, and {A} bought a {O} at the {P}, {B} decided to pass it to"
plot_two(prompt1,prompt2,prompt3,prompt4,f"{B} {A} {C} {B} long",f"{B} {C} {A} {B} long",f"{B} {A} {C} {B} short",f"{B} {C} {A} {B} short")
plot_two(prompt1,prompt3,prompt2,prompt4,f"{B} {A} {C} {B} long",f"{B} {A} {C} {B} short",f"{B} {C} {A} {B} short",f"{B} {C} {A} {B} long")

The results suggests that aside from name weight/priority, the next governing rule is something related to pattern recognition/matching, which wouldn’t be surprising considering it is a small model and they tend to have simpler behaviors such as proximity rule or pattern recognition.


In [555]:
prompt = f"Mary and Michael bought a toy, Michael ended up giving it to"
print(prompt)
print(tabulate(list(get_logprobs(prompt, n=5).items())))
prompt = f"Michael and Mary bought a toy, Michael ended up giving it to"
print(prompt)
print(tabulate(list(get_logprobs(prompt, n=5).items())))

Mary and Michael bought a toy, Michael ended up giving it to
-------  --------
Michael  0.387418
his      0.250195
her      0.136563
a        0.114939
the      0.110885
-------  --------
Michael and Mary bought a toy, Michael ended up giving it to
----  ---------
Mary  0.721931
his   0.109531
the   0.0587479
a     0.0559372
him   0.053853
----  ---------


In [589]:
prompt1 = f"When Josh bought a toy, Mary gave it to"
prompt2 = f"When Mary bought a toy, Josh gave it to"
prompt3 = f"When Josh bought a toy, John gave it to"
prompt4 = f"When Mary bought a toy, Alice gave it to"
plot_two(prompt1, prompt2,prompt3,prompt4,prompt1, prompt2,prompt3,prompt4)

In [556]:
prompt = f"George and James gave John bought a toy, James gave it to"
print(get_logprobs(prompt))

{' John': 0.36457258462905884, ' James': 0.22606277465820312, ' him': 0.10422253608703613, ' his': 0.08796662092208862, ' the': 0.08750275522470474, ' Mary': 0.031157728284597397, ' George': 0.02985706925392151, ' me': 0.02499261498451233, ' William': 0.021988531574606895, ' them': 0.021676717326045036}


In [554]:
prompt1 = f"When George and James bought a toy, James gave it to"
prompt2 = f"When James and George bought a toy, James gave it to"
plot_compare_prompt_logprobs(prompt1, prompt2, prompt1, prompt2)

In [593]:
prompt1 = f"When Alice and James were skateboarding, John called"
prompt2 = f"When James and Alice were skateboarding, John called"
plot_compare_prompt_logprobs(prompt1, prompt2, prompt1, prompt2)

In [None]:
prompt1 = f"When George, John, and James were playing, since John teamed up with"
prompt2 = f"When James, John, and George were playing, since John teamed up with"
plot_compare_prompt_logprobs(prompt1, prompt2, prompt1, prompt2)

In [626]:
prompt1 = f"When George, John, and James were playing, James played alone while John teamed up with"
prompt2 = f"When George, John, and James were playing, George played alone while John teamed up with"
plot_compare_prompt_logprobs(prompt1, prompt2, "prompt1", "prompt2")

The prediction we wanted to force was “James” since it comes after John, however, we can see that George and John win on prompt 1. This strange behavior must be caused by the structure of the sentence.

In [633]:
prompt3 = f"When George, John, and James were playing, James played alone John teamed up with"
prompt4 = f"When George, John, and James were playing, George played alone John teamed up with"
plot_two(prompt1, prompt2, prompt3, prompt4, "prompt1", "prompt2", "prompt3", "prompt4")

In [635]:
prompt1 = f"When James, John, and George were playing, George played alone John teamed up with"
prompt2 = f"When James, John, and George were playing, James played alone John teamed up with"
prompt3 = f"When George, John, and James were playing, James played alone John teamed up with"
prompt4 = f"When George, John, and James were playing, George played alone John teamed up with"
plot_two(prompt1, prompt2, prompt3, prompt4, "force George", "force George", "force James", "force James")

In [637]:
prompt1 = f"When John, James, and George were playing, George played alone John teamed up with"
prompt2 = f"When John, James, and George were playing, James played alone John teamed up with"
prompt3 = f"When John, George, and James were playing, James played alone John teamed up with"
prompt4 = f"When John, George, and James were playing, George played alone John teamed up with"
plot_two(prompt1, prompt2, prompt3, prompt4, "George alone force George", "James alone force George", "James alone force James", "George alone force James")

In [685]:
prompt1 = f"When George, James, and John were playing George played alone John teamed up with"
prompt2 = f"When George, James, and John were playing James played alone John teamed up with"
prompt3 = f"When George, James, and John were playing, George played alone while John teamed up with"
prompt4 = f"When George, James, and John were playing, James played alone while John teamed up with"
plot_two(prompt1, prompt2, prompt3, prompt4, "George alone force George", "James alone force James", "James alone force James. pt", "George alone force James. pt")

### try entity count forcing

In [684]:
prompt1 = f"When George, James, and John were playing George played alone while John teamed up with"
prompt2 = f"When George, James, and John one two three James were playing George played alone while John teamed up with"
prompt3 = f"When George, James, and John one two three James were playing George played alone while John teamed up with"
prompt4 = f"When George, James, and John one two three James were playing George played alone John teamed up with"
plot_two(prompt1, prompt2, prompt3, prompt4, "normal (George)", "force James", "force James w while", "force James no while")

In [726]:
prompt1 = f"When John George and James. John George along with"
prompt2 = f"When George John and James James John along with"
prompt3 = f"When George James and John. John George along with"
prompt4 = f"When George James and John. James John George along with"
plot_two(prompt1, prompt2, prompt3, prompt4, "force James", "force James", "force John", "force John")

In [739]:
prompt1 = f"So Nathan sat next to Mary. Mary sat next to"
prompt2 = f"So Nathan sat next to Mary. John sat next to"
prompt3 =  f"So Mary sat next to Nathan. Mary gave the gift to"
prompt4 = f"So Mary sat next to Nathan. John gave the gift to"
plot_two(prompt1, prompt2, prompt3, prompt4, "force Nathan", "force Mary", "force Nathan", "force John")

In [None]:
prompt1 = f"So Nathan sat next to Mary. Mary sat next to"
prompt2 = f"So Nathan sat next to Mary. John sat next to"
prompt3 =  f"So Mary sat next to Nathan. Mary gave the gift to"
prompt4 = f"So Mary sat next to Nathan. John gave the gift to"
plot_two(prompt1, prompt2, prompt3, prompt4, "force Nathan", "force Mary", "force Nathan", "force John")

In [754]:
prompt1 = f"When {A}, {B}, and {C} bought a {O} at the {P}, {B} decided to pass it to"
prompt2 = f"When {B}, {A}, and {C} bought a {O} at the {P}, {B} decided to pass it to"
prompt3 = f"When {A}, {C}, and {B} bought a {O} at the {P}, {B} decided to pass it to"
prompt4 = f"When {A}, {B} bought a {O} at {C} the {P}, {B} decided to pass it to"
plot_two(prompt1, prompt2, prompt1, prompt4, "prompt1", "prompt2", "prompt1", "prompt4")

In [761]:
prompt1 = f"Yes, {A}, {B}, and {C} bought a {O} at the {P}. {B} went without {C} but with"
prompt2 = f"When George, James, and John one two, James were playing George played alone while John teamed up with"
prompt3 = f"When {A}, {C}, and {B} bought a {O} at the {P}, {B} decided to pass it to"
prompt4 = f"When {A}, {B} bought a {O} at {C} the {P}, {B} decided to pass it to"
plot_two(prompt1, prompt2, prompt1, prompt4, f"{A}", "prompt2", "prompt1", "prompt4")

In [780]:
A = random.choice(Alist) #person A
B = random.choice(Blist) #person B
P = random.choice(Plist) #place
O = random.choice(Olist) #object

In [795]:
prompt1 = f"Since {A} went to Denmark with {B}, {B} hasn't been the same with"
prompt2 = f"Since {A} went to the store with {B}, {B} hasn't been the same with"
prompt3 = f"Since {B} went to Denmark with {A}, {A} hasn't been the same with"
prompt4 = f"Since {B} went to the store with {A}, {A} hasn't been the same with"
plot_two(prompt1, prompt2, prompt3, prompt4, f"{prompt1}", f"{prompt2}", f"{prompt3}", f"{prompt4}")

In [813]:
Places = ['Denmark', 'New York', 'Berlin', 'Paris', 'Texas', 'Buenos Aires', 'America']
samples = random.sample(Places, 2)
P1 = samples[0]
P2 = samples[1]

In [814]:
prompt1 = f"Since {A} went to {P1} with {B}, {B} hasn't been the same with"
prompt2 = f"Since {A} went to {P2} with {B}, {B} hasn't been the same with"
prompt3 = f"Since {B} went to {P1} with {A}, {A} hasn't been the same with"
prompt4 = f"Since {B} went to {P2} with {A}, {A} hasn't been the same with"
plot_two(prompt1, prompt2, prompt3, prompt4, f"{A}{P1}{B}, {B}-[{A}]", f"{A}{P2}{B}, {B}-[{A}]", f"{B}{P1}{A}, {A}-[{B}]", f"{B}{P2}{A}, {A}-[{B}]")

In [815]:
LlistA = ["the singer", "the farmer", "the pianist", "the guitarist", "the actor"]
LlistB = ["the butcher", "the worker", "the painter", "the musician", "the scientist"]

In [816]:
A = random.choice(Alist) #person A
Al = random.choice(LlistA) #person A's alias
B = random.choice(Blist) #person B
Bl = random.choice(LlistB)#person B's alias
P = random.choice(Plist)#place
O = random.choice(Olist)#object

In [817]:
prompt1 = f"When {A} {Al} and {B} {Bl} got a {O} at the {P}, {B} decided to give it to the"
prompt2 = f"When {A} {Al} and {B} {Bl} got a {O} at the {P}, {Bl} decided to give it to the"
prompt3 = f"When {A} {Al} and {B} {Bl} got a {O} at the {P}, {B} decided to give it to"
prompt4 = f"When {A} {Al} and {B} {Bl} got a {O} at the {P}, {Bl} decided to give it to"

In [818]:
plot_two(prompt1,prompt2,prompt3,prompt4, f"{A} {B} {O} {P} {B} []", f"{A} {B} {O} {P} {Bl} []", f"{A} {B} {O} {P} {B} []", f"{A} {B} {O} {P} {Bl} []")

In [836]:
def performancealias(engine, model, tokenizer, ntests = 100):
    score = 0
    for x in range(ntests):
        print(x)
        A = random.choice(Alist) #person A
        Al = random.choice(LlistA) #person A's alias
        B = random.choice(Blist) #person B
        Bl = random.choice(LlistB)#person B's alias
        P = random.choice(Plist)#place
        O = random.choice(Olist)#object
        prompt1 = f"When {A} {Al} and {B} {Bl} got a {O} at the {P}, {Bl} decided to give it to"
        prediction1 = get_completion(prompt1, model=model, tokenizer=tokenizer, wPrompt=False)
        prompt2 = f"When {A} {Al} and {B} {Bl} got a {O} at the {P}, {Bl} decided to give it to the"
        prediction2 = get_completion(prompt2, model=model, tokenizer=tokenizer, wPrompt=False)
        if prediction1 == f" {A}" or prediction2 == f" {Al}":
            score += 1

    return score

In [837]:
print("gpt2")
gpt2score = performancealias("gpt2", gpt2model, gpt2tokenizer, 1000)
print("gpt2-large")
gpt2lscore = performancealias("gpt2-large", gpt2lmodel, gpt2ltokenizer, 1000)
print("gpt-neo-125m")
neo125score = performancealias("gpt-neo-125m",neo125model, neo125tokenizer, 1000)
print("gpt-neo-1.3b")
neo13score = performancealias("gpt-neo-1.3b",neo13model, neo125tokenizer, 1000)


gpt2
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
2

In [839]:
print(tabulate([['engine', 'score/1000']] + [['gpt2', gpt2score], ['gpt2-large', gpt2lscore],['gpt-neo-125m', neo125score], ['gpt-neo-1.3b', neo13score]]))

------------  ----------
engine        score/1000
gpt2          479
gpt2-large    752
gpt-neo-125m  145
gpt-neo-1.3b  701
------------  ----------


### finalizing tests (summary and rule pinpointing)

In [841]:
prompt = "George went to see James and John, John went to his house to see James and"
print(get_completion(prompt))

George went to see James and John, John went to his house to see James and John


In [844]:
prompt = "George went to see James and John. John went to his house to see James and"
print(get_logprobs(prompt))

{' John': 0.5151458382606506, ' James': 0.2067292332649231, ' Mary': 0.07548724114894867, ' his': 0.049963343888521194, ' said': 0.036713555455207825, ' see': 0.02789144404232502, ' the': 0.02539816126227379, ' to': 0.021491918712854385, ' George': 0.021050674840807915, ' he': 0.020128576084971428}


In [856]:
prompt = "Mary went to the store with John. John went to the store with"
print(get_logprobs(prompt))

{' his': 0.2815958261489868, ' John': 0.16131626069545746, ' the': 0.14290955662727356, ' Mary': 0.11713789403438568, ' a': 0.1028585284948349, ' me': 0.05610955134034157, ' her': 0.049639809876680374, ' him': 0.03329355642199516, ' George': 0.030386604368686676, ' Joseph': 0.024752432480454445}


In [862]:
prompt1 = "Bob was with Mary and Alice. Bob gave a flower to"
prompt2 = "John was with Mary and Alice. John gave a flower to"
prompt3 = "Bob was with Mary and Alice. Bob gave a flower to"
prompt4 = "John was with Mary and Alice. John gave a flower to"
plot_two(prompt1, prompt2, prompt3, prompt4, prompt1, prompt2, prompt3, prompt4)

In [864]:
prompt = "John was with Mary and Alice. John gave a flower to"
print(get_logprobs(prompt))

{' him': 0.17750494182109833, ' the': 0.17316697537899017, ' Mary': 0.15945345163345337, ' them': 0.12905745208263397, ' her': 0.10587342828512192, ' Alice': 0.10282686352729797, ' John': 0.041083406656980515, ' me': 0.040787335485219955, ' Peter': 0.036012277007102966, ' Jesus': 0.03423388674855232}


In [870]:
prompt = "When Mary brough John some apples from the store, John gave them to"
print(get_logprobs(prompt))

{' them': 0.4298013746738434, ' her': 0.19026830792427063, ' the': 0.10726717114448547, ' a': 0.07726695388555527, ' him': 0.04341266304254532, ' his': 0.04137979820370674, ' some': 0.03086932748556137, ' John': 0.027982743456959724, ' to': 0.026030102744698524, ' up': 0.02572154439985752}


In [871]:
prompt = "When Mary brough John some apples from the store, John gave them to"
print(get_logprobs(prompt))

{' them': 0.43739527463912964, ' her': 0.1373189240694046, ' the': 0.13587036728858948, ' a': 0.08380187302827835, ' his': 0.050551120191812515, ' some': 0.04705129191279411, ' John': 0.03160971775650978, ' him': 0.028640128672122955, ' up': 0.024415848776698112, ' to': 0.023345449939370155}
