Development, quick analyses for violin plot related code.

In [1]:
RESULTS_FOLDER = './intermediate_results/new_models_probs'

import transformers
from transformers import GPT2LMHeadModel, GPT2Tokenizer, BertForMaskedLM, BertTokenizer, BartForConditionalGeneration, BartTokenizer

import os
from os.path import join, exists

os.chdir('/home/nwong/chompsky/serial_chain/telephone-analysis-public')

from new_models import prep_probs, model_score_funcs, align_prep_words, model_score_utils
from new_models import in_progress

from new_models import sub_analysis

import importlib
import load_runs

import pandas as pd
import numpy as np

import torch
from collections import defaultdict


## Load the data

In [2]:

importlib.reload(prep_probs)

WORD_CHANGES_FOLDER = './intermediate_results/word_changes'
if not exists(WORD_CHANGES_FOLDER):
    os.makedirs(WORD_CHANGES_FOLDER)
    
substitution_df = pd.read_csv(join(WORD_CHANGES_FOLDER, 'edit_substitutions.csv'))

tokenizers = {
    'gpt2': GPT2Tokenizer.from_pretrained('gpt2'),
    'bert': BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking"),
    'bart': BartTokenizer.from_pretrained("facebook/bart-base"),
}

PROB_DF_PATH = '/home/nwong/chompsky/serial_chain/telephone-analysis-public/intermediate_results/new_models_probs'
raw_probs = {}
for model_name in ['bert', 'gpt2_normal', 'bart', 'gpt2_medium']:
    print(model_name)
    raw_probs[model_name] = prep_probs.load_word_scores(model_name, PROB_DF_PATH, give_probs = True)
    print(f'For model: {model_name}, length: {len(raw_probs[model_name])}')

DATA_PREP_FOLDER = '/home/nwong/chompsky/serial_chain/telephone-analysis-public/intermediate_results/data_prep_logistic' # What is meant by this path?
lm = prep_probs.load_postprocessed_logistic_prep_scores(DATA_PREP_FOLDER)

all_runs = load_runs.load_runs()

bert
For model: bert, length: 3193
gpt2_normal
For model: gpt2_normal, length: 3193
bart
For model: bart, length: 3193
gpt2_medium
For model: gpt2_medium, length: 3193
0    0
1    5
2    4
3    3
4    1
Name: index, dtype: int64


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  all_runs = pd.concat(dfs)


Checking for correctness of position of extraction/the softmax extracted

In [3]:
def report_mask_words(scores, sentence, tokenizer):
    """
    raw_scores = a (vocabulary,) tensor of selected softmax values for a pre-selected position.
    mask_idx, the position to select for analysis.
    
    sentence = the prefix to do the prediction on
    tokenizer = BERT/BART tokenizer
    """
    
    # It should intake the raw scores itself.
    score_vals, word_idxs = torch.sort(scores, descending = True)
    words = tokenizer.convert_ids_to_tokens(word_idxs)

    print(f"Reporting most likely tokens to complete '{sentence}' in descending order")

    num_report = 20

    score_df = pd.DataFrame.from_dict({
      'Word': words,
      'Score value': list(map(lambda x : round(x, 5), score_vals.numpy().tolist()))
      })

    return score_df[:num_report]

In [88]:
# Load the scores that you saved from the main run.

importlib.reload(prep_probs)

WORD_CHANGES_FOLDER = './intermediate_results/word_changes'
all_changes_probs = prep_probs.load_word_changes(WORD_CHANGES_FOLDER)


In [90]:
# Will compare the user transcription scored softmax against the ones
#     produced by the new code from predicting from editTables
# Since the model score utils function was generally checked for correctness

# i.e. will just check the user transcription, since the arguments/function for response
# are parallel.

# Load the original score from the right case

importlib.reload(sub_analysis)

print('For the "predicting at" printout, here I am only checking sWord, which is the first printout.')

mismatches = defaultdict(list)

for model_name in all_changes_probs:
    
    print(f'BEGINNING NEW MODEL SET: {model_name}')

    df = all_changes_probs[model_name]
    
    for case_idx in range(len(df)):

        if case_idx % 100 == 0: print(f'Current case idx: {case_idx} / {len(df)}')

        case_sentence = df.iloc[case_idx]['sentence']
        print(case_sentence)

        # 5/31 Indexing: https://stackoverflow.com/questions/176918/finding-the-index-of-an-item-in-a-list
        sentence_idx = list(all_runs['user_candidate_transcription']).index(case_sentence)
        orig_scores = raw_probs[model_name if model_name != 'gpt2' else model_name + '_normal'][sentence_idx]
        orig_orig_prob = orig_scores.iloc[int(df.iloc[case_idx]['sCounter'])]['prob']

        case_sentence = model_score_utils.prepSentence(case_sentence)
        
        print(case_sentence)
        
        orig_prob = df.iloc[case_idx]['orig_prob']

        if orig_prob is None:
            continue

        if abs(orig_prob - orig_orig_prob) > 1e-6:
            this_word = df.iloc[case_idx]['sWord']
            this_report = f"Mismatch in probabilities for entry {case_idx}."
            this_report += f"\n\t Original prob: {orig_prob}"
            this_report += f"\n\t List of probs, entry: {orig_orig_prob}"
            this_report += f"\n\t Word of interest: {this_word}"
            print(this_report)
            mismatches[model_name].append((case_idx, this_word, orig_prob, orig_scores))


For the "predicting at" printout, here I am only checking sWord, which is the first printout.
BEGINNING NEW MODEL SET: gpt2_normal
Current case idx: 0 / 1366
each nonfiction book has a call number on its spine
Each nonfiction book has a call number on its spine .
Mismatch in probabilities for entry 0.
	 Original prob: 0.043548859655857086
	 List of probs, entry: 0.0003632185107562691
	 Word of interest: on
each non fiction book has a call number in its spine
Each non fiction book has a call number in its spine .
each non fiction book has a call number in its spine
Each non fiction book has a call number in its spine .
each non fiction book had 12 numbers on its spine
Each non fiction book had 12 numbers on its spine .
each non fiction book had twelve numbers on its spine
Each non fiction book had twelve numbers on its spine .
each nonfiction book had twelve notches on its spine
Each nonfiction book had twelve notches on its spine .
Mismatch in probabilities for entry 5.
	 Original prob

the dates began tearing up the food by tearing it
The dates began tearing up the food by tearing it .
the dates begin tearing up the sheepish the tearing
The dates begin tearing up the sheepish the tearing .
Mismatch in probabilities for entry 442.
	 Original prob: 1.649344994802959e-06
	 List of probs, entry: 0.0003244583203922957
	 Word of interest: tearing
a date begins with tearing up the shingles
A date begins with tearing up the shingles .
a date begins with tearing up the shingles
A date begins with tearing up the shingles .
it begins with her pushing up her single
It begins with her pushing up her single .
it begins with fishing with cobble
It begins with fishing with cobble .
it begins with fishing with people
It begins with fishing with people .
the raspberry leaves are not very tasty when you have them
The raspberry leaves are not very tasty when you have them .
a fly buzzed over the oiled cloth on the kitchen table
A fly buzzed over the oiled cloth on the kitchen table .
a 

Some condiments you will know are vinegar and apple juice .
Mismatch in probabilities for entry 756.
	 Original prob: 0.05064612999558449
	 List of probs, entry: 0.002944289706647396
	 Word of interest: juice
some condiments you know are vinegar and apple cider
Some condiments you know are vinegar and apple cider .
Mismatch in probabilities for entry 757.
	 Original prob: 0.011608727276325226
	 List of probs, entry: 0.556446373462677
	 Word of interest: you
the chase leads across a field toward a nearby farm
The chase leads across a field toward a nearby farm .
the chase leads across the field towards the nearby farm
The chase leads across the field towards the nearby farm .
the chase leads across the field toward the nearby farm
The chase leads across the field toward the nearby farm .
the chase leads across the field toward the nearby farm
The chase leads across the field toward the nearby farm .
the chase leads across the field towards the new one
The chase leads across the field to

he rides his golden chariot across the sky every day
He rides his golden chariot across the sky every day .
he rides his golden chariot across the sky every day
He rides his golden chariot across the sky every day .
he walks his golden tree across the sky everyday
He walks his golden tree across the sky everyday .
we watch the golden keys across the golden skies every day
We watch the golden keys across the golden skies every day .
we watch the golden keys across the golden skies every day
We watch the golden keys across the golden skies every day .
we watch the golden keys across the golden skies every day
We watch the golden keys across the golden skies every day .
we watched the golden bees across the golden sky every day
We watched the golden bees across the golden sky every day .
they watched the golden bees across the golden sky every day
They watched the golden bees across the golden sky every day .
they watched the golden bees across the golden sky every day
They watched the go

The funny hats provide short covering .
the funny hat is covered in pepper
The funny hat is covered in pepper .
the funny hat was covered in pepper
The funny hat was covered in pepper .
the food was covered with pepper
The food was covered with pepper .
the food was covered in pepper
The food was covered in pepper .
often the village was burned to the ground by fires
Often the village was burned to the ground by fires .
often the village was burned to the ground by fires
Often the village was burned to the ground by fires .
after the gullet was burned to the ground by raiders
After the gullet was burned to the ground by raiders .
after the gallium was bunt to the ground by raiders
After the gallium was bunt to the ground by raiders .
after the gallium was bunt to the ground by raiders
After the gallium was bunt to the ground by raiders .
after the goal we went to the ground for reading
After the goal we went to the ground for reading .
after the goal he went to the ground for a rebound

you cant publicly give them god and coal
You cant publicly give them god and coal .
we cannot publicly give them that and coal
We cannot publicly give them that and coal .
meadow mice and gophers eat the roots of some weeds
Meadow mice and gophers eat the roots of some weeds .
Mismatch in probabilities for entry 558.
	 Original prob: 1.3806934475724121e-05
	 List of probs, entry: 0.0013810121454298496
	 Word of interest: mice
meadow mice and gophers eat the roots of some weeds
Meadow mice and gophers eat the roots of some weeds .
Mismatch in probabilities for entry 559.
	 Original prob: 0.0006954295677132906
	 List of probs, entry: 0.0005867514992132783
	 Word of interest: roots
meadow mice and gophers eat the roots of some weeds
Meadow mice and gophers eat the roots of some weeds .
Mismatch in probabilities for entry 560.
	 Original prob: 0.0013135724002495408
	 List of probs, entry: 0.0006954295677132905
	 Word of interest: weeds
meadow lice and gophers eat the rice of some leaves
Me

you can see what they have done to it already
You can see what they have done to it already .
he can see what they have done to it already
He can see what they have done to it already .
he can see what they have done to it already
He can see what they have done to it already .
i see what they have done to her already
I see what they have done to her already .
each non fiction book has a call number on its spine
Each non fiction book has a call number on its spine .
each non fiction book goes on the call of the manager
Each non fiction book goes on the call of the manager .
each non friction part goes on part of the mannequin 
Each non friction part goes on part of the mannequin  .
each non friction part goes on part of the mannequin 
Each non friction part goes on part of the mannequin  .
each non friction part goes on part of the mannequin 
Each non friction part goes on part of the mannequin  .
each non fiction part goes in part with the manicure 
Each non fiction part goes in part w

i am sorry to be informed that text matters are elsewhere
I am sorry to be informed that text matters are elsewhere .
i am sorry to be informed that the text matters else where 
I am sorry to be informed that the text matters else where  .
i am sorry to inform you the sell cannot go through
I am sorry to inform you the sell cannot go through .
i am sorry to inform you the sale can not go threw
I am sorry to inform you the sale can not go threw .
i am sorry to inform you the sale can not go threw
I am sorry to inform you the sale can not go threw .
sorry to inform you that the sale could not go through
Sorry to inform you that the sale could not go through .
i regret to inform you that the sale did not go through
I regret to inform you that the sale did not go through .
someone probably will hear you if you yell for help
Someone probably will hear you if you yell for help .
someone would probably hear you if you call for help
Someone would probably hear you if you call for help .
someon

Did you not notice the ladle beside the soup .
Mismatch in probabilities for entry 119.
	 Original prob: 0.005092133302241564
	 List of probs, entry: 0.00036156055284664035
	 Word of interest: beside
did you not notice the ladle inside the soup
Did you not notice the ladle inside the soup .
Mismatch in probabilities for entry 120.
	 Original prob: 0.0004136056813877076
	 List of probs, entry: 0.8774880766868591
	 Word of interest: soup
did you not notice the window beside the sea
Did you not notice the window beside the sea .
did you not notice the window beside the sea
Did you not notice the window beside the sea .
the discovery of oil has caused many cities to grow
The discovery of oil has caused many cities to grow .
the discovery of oil has caused many cities to run
The discovery of oil has caused many cities to run .
they discovered that oil has numerous useful resources
They discovered that oil has numerous useful resources .
they discovered the world has important resources
They

The dates began tearing up the food by tearing it .
the dates begin tearing up the sheepish the tearing
The dates begin tearing up the sheepish the tearing .
Mismatch in probabilities for entry 442.
	 Original prob: 5.6587938161101185e-05
	 List of probs, entry: 0.005247128661721945
	 Word of interest: tearing
a date begins with tearing up the shingles
A date begins with tearing up the shingles .
a date begins with tearing up the shingles
A date begins with tearing up the shingles .
it begins with her pushing up her single
It begins with her pushing up her single .
it begins with fishing with cobble
It begins with fishing with cobble .
it begins with fishing with people
It begins with fishing with people .
the raspberry leaves are not very tasty when you have them
The raspberry leaves are not very tasty when you have them .
Mismatch in probabilities for entry 448.
	 Original prob: 0.9872592091560364
	 List of probs, entry: 0.8359520435333252
	 Word of interest: are
a fly buzzed over th

We started in 4th grade the hottest .
we started in 4th grade the hottest
We started in 4th grade the hottest .
we started in fourth grade the highest
We started in fourth grade the highest .
he started it in the fourth grade
He started it in the fourth grade .
how do you know that the thing is a good deal
How do you know that the thing is a good deal .
how do you know if the thing is a good deal
How do you know if the thing is a good deal .
i knew that this pain was to much for me
I knew that this pain was to much for me .
i knew that this pain was too much for me
I knew that this pain was too much for me .
meadow mice and gophers eat the roots of some weeds
Meadow mice and gophers eat the roots of some weeds .
Mismatch in probabilities for entry 845.
	 Original prob: 0.00559140695258975
	 List of probs, entry: 0.9083026051521301
	 Word of interest: weeds
metal lice and gophers eat the roots of some leaves
Metal lice and gophers eat the roots of some leaves .
Mismatch in probabilities

it is nice in the home loaned in my country
It is nice in the home loaned in my country .
it is nice in the home alone in the country
It is nice in the home alone in the country .
it is nice to have a home alone it is quiet
It is nice to have a home alone it is quiet .
it is nice to have a home in spite of him
It is nice to have a home in spite of him .
falls are one kind of accident you can help prevent
Falls are one kind of accident you can help prevent .
flaws are one thing we can help to prevent
Flaws are one thing we can help to prevent .
flaws are one thing we can help to prevent
Flaws are one thing we can help to prevent .
claws are one thing that help to scratch
Claws are one thing that help to scratch .
claws are for clinging things to scratch
Claws are for clinging things to scratch .
claws are for clinging things to scratch
Claws are for clinging things to scratch .
mops are for cleaning things to scratch
Mops are for cleaning things to scratch .
bobs are for cleaning and th

He skipped rocks across the road away to the nearby pond .
he skipped rocks across the road away to the nearby pond
He skipped rocks across the road away to the nearby pond .
he kicked rocks across the road to a nearby sign 
He kicked rocks across the road to a nearby sign  .
he kicked rocks across the road to a nearby site
He kicked rocks across the road to a nearby site .
he kicked rocks across the road to the nearby site
He kicked rocks across the road to the nearby site .
he kicked rocks across the road to a nearby site
He kicked rocks across the road to a nearby site .
he kicked rocks across the road in the nearby sand
He kicked rocks across the road in the nearby sand .
he kicked rocks across the road in the nearby sand
He kicked rocks across the road in the nearby sand .
they kicked rocks across the road in the nearby town
They kicked rocks across the road in the nearby town .
your teeth begin breaking up food by chewing it
Your teeth begin breaking up food by chewing it .
the t

big water is a thing that moves things
Big water is a thing that moves things .
clean water is a thing that melts things
Clean water is a thing that melts things .
i think water is the thing that melts things
I think water is the thing that melts things .
i think water is some of those things
I think water is some of those things .
there are two forms of omelet comedy and tragedy
There are two forms of omelet comedy and tragedy .
Mismatch in probabilities for entry 665.
	 Original prob: 4.7450380407099155e-06
	 List of probs, entry: 1.3906230378779583e-05
	 Word of interest: tragedy
there are two forms of operas comedies and tragedies
There are two forms of operas comedies and tragedies .
there are two forms of operas harmonies and tragedies
There are two forms of operas harmonies and tragedies .
there are two types of charring on the isthmus
There are two types of charring on the isthmus .
there are two types of charring on the a frame
There are two types of charring on the a frame .


The thin changes in the marketplace for days .
the thin changes in the microwave for days
The thin changes in the microwave for days .
nothing changes in the microwave for days
Nothing changes in the microwave for days .
you may not notice yourself growing from day to day
You may not notice yourself growing from day to day .
you may not notice yourself going to sleep going to bed
You may not notice yourself going to sleep going to bed .
you may not know this yourself but honestly to the bed
You may not know this yourself but honestly to the bed .
you may not know this yourself but honestly to the bed
You may not know this yourself but honestly to the bed .
you may not notice this yourself but obviously she did
You may not notice this yourself but obviously she did .
you may not notice several things but probably she did
You may not notice several things but probably she did .
you may not notice several things but probably she did
You may not notice several things but probably she did .

he found that there are many of the same things
He found that there are many of the same things .
we found a very many of the same things
We found a very many of the same things .
but he found a very many of the same thing
But he found a very many of the same thing .
we found the very many of the saints 
We found the very many of the saints  .
he follows the very minimum of things
He follows the very minimum of things .
he follows the very minimum of things
He follows the very minimum of things .
he could see that they already turned at the turn back
He could see that they already turned at the turn back .
you could see that he had torn everything off
You could see that he had torn everything off .
you can see that he had torn everything off
You can see that he had torn everything off .
you could see that he had torn everything off
You could see that he had torn everything off .
you could see that she had torn everything off
You could see that she had torn everything off .
before you l

In [91]:
for m, l in mismatches.items():
    print(m, len(l)) # This is the number of mismatches? -- maybe just wait for Dr. Meylan's feedback

gpt2_normal 190
gpt2_medium 191
bert 86
bart 191


In [59]:
# Directly passing in a prefix to the single substitution method.

importlib.reload(sub_analysis)
importlib.reload(model_score_utils)

this_df = all_changes_probs['gpt2_normal'].iloc[2]
model, tokenizer, prefix_func = model_score_funcs.get_gpt2_modules()
new_sentence = this_df['sentence']

print(new_sentence)

orig_prob_results = sub_analysis.process_single_substitution(new_sentence,
                                                             this_df['sWord'],
                                                             this_df['sCounter'],
                                                             model,
                                                             tokenizer,
                                                             prefix_func,
                                                             verifying = True)

orig_prob, orig_prob_all, all_probs = orig_prob_results

each non fiction book has a call number in its spine


In [87]:
# GPT-2 prefix completion check

tok = tokenizers['gpt2']

test_sentence = f"Each nonfiction book has a call number on its "
#test_sentence = f"It's time to go to the "
this_tokens = tok.encode(test_sentence)
this_pred_pos = len(this_tokens) - 2

# The 0 is a filler index.
prob_at_ground_truth, probs, _ = model_score_utils.get_model_probabilities(this_tokens, model, 0, this_pred_pos, verifying = True)
result_df = report_mask_words(probs, test_sentence, tok)
print(result_df)

# It seems to be an inability to tolerate the sentence itself -- why?
# Despite it working on the "it's time to go to the" case

Reporting most likely tokens to complete 'Each nonfiction book has a call number on its ' in descending order
          Word  Score value
0       Ġcover      0.31122
1       Ġfront      0.16148
2        Ġback      0.13416
3       Ġspine      0.03917
4        Ġpage      0.03521
5     Ġwebsite      0.03431
6        Ġside      0.01393
7       Ġlabel      0.01026
8       Ġtitle      0.01002
9       Ġpages      0.00811
10        Ġweb      0.00559
11        Ġend      0.00527
12       Ġhome      0.00482
13  Ġpublisher      0.00468
14      Ġshelf      0.00456
15       Ġsite      0.00395
16   Ġhomepage      0.00336
17       Ġwall      0.00334
18       Ġleft      0.00327
19      Ġfirst      0.00310


## Additional checks

In [60]:
for pos in range(9):
    orig_prob_all = all_probs[0, pos, :]
    res = report_mask_words(orig_prob_all, model_score_utils.prepSentence(new_sentence), tokenizer)
    print(res)

Reporting most likely tokens to complete 'Each non fiction book has a call number in its spine .' in descending order
    Word  Score value
0      Ċ      0.06230
1    The      0.03770
2      "      0.02411
3      A      0.01940
4      I      0.01832
5     In      0.01158
6      .      0.01130
7     It      0.00912
8      S      0.00866
9   This      0.00777
10     B      0.00769
11     -      0.00765
12     C      0.00719
13    We      0.00701
14     1      0.00671
15     T      0.00665
16     '      0.00600
17     P      0.00575
18     (      0.00575
19     G      0.00551
Reporting most likely tokens to complete 'Each non fiction book has a call number in its spine .' in descending order
           Word  Score value
0         Ġyear      0.14706
1           Ġof      0.06469
2          Ġday      0.04879
3        Ġmonth      0.04291
4         Ġweek      0.04254
5         Ġtime      0.03119
6       Ġseason      0.01456
7         Ġteam      0.01269
8       Ġplayer      0.01194
9         Ġg

Finding the true token position

In [60]:
# Checking for correctness of substitution processing

WORD_CHANGES_FOLDER = '/home/nwong/chompsky/serial_chain/telephone-analysis-public/intermediate_results/word_changes'

substitution_df = pd.read_csv(join(WORD_CHANGES_FOLDER, 'edit_substitutions.csv'))


In [73]:

def find_true_token_pos_both(df_entry, model, tokenizer, prefix_func):
    
    # Need to check if these are 0 indexed?
    
    orig_loc = sub_analysis.find_true_token_position(df_entry['sentence'], df_entry['sWord'], int(df_entry['sCounter']), tokenizer)
    edited_loc = sub_analysis.find_true_token_position(df_entry['response'], df_entry['rWord'], int(df_entry['rCounter']), tokenizer)
    
    return orig_loc, edited_loc


In [74]:

# GPT-2 tests for find

# Handle unchanged and changed case, GPT-2
result = find_true_token_pos_both(substitution_df.iloc[0], *model_score_funcs.get_gpt2_modules())
assert result == (8,8)

# Handle cannot find case
result = find_true_token_pos_both(substitution_df.iloc[8], *model_score_funcs.get_gpt2_modules())
assert result == (-2,1) # The first one is "dietitian", which is broken up. The second is "priest", which is normal.


# BERT tests for find

# Handle unchanged and changed case, BERT
result = find_true_token_pos_both(substitution_df.iloc[0], *model_score_funcs.get_bert_modules())
assert result == (7,8)

# Handle cannot find case
result = find_true_token_pos_both(substitution_df.iloc[8], *model_score_funcs.get_bert_modules())
assert result == (-2,1) # The first one is "dietitian", which is broken up. The second is "priest", which is normal.



# BART tests for find

# Handle unchanged and changed case, BART
result = find_true_token_pos_both(substitution_df.iloc[0], *model_score_funcs.get_bart_modules())
assert result == (8,8)

# Handle cannot find case
result = find_true_token_pos_both(substitution_df.iloc[8], *model_score_funcs.get_bart_modules())
assert result == (-2,1) # The first one is "dietitian", which is broken up. The second is "priest", which is normal.


Some weights of the model checkpoint at bert-large-uncased-whole-word-masking were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


KeyboardInterrupt: 

In [77]:
substitution_df.head()

Unnamed: 0.1,Unnamed: 0,code,sWord,rWord,sCounter,rCounter,sentence,response,sLeftSequence,rLeftSequence,sRightSequence,rRightSequence,input_subject,output_subject
0,11,S,on,in,7.0,8.0,each nonfiction book has a call number on its ...,each non fiction book has a call number in its...,call number on,call number in,on its spine,,0,8cf6535ea0ae4addb28f5f90a2b13a7d
1,11,S,has,had,4.0,3.0,each non fiction book has a call number in its...,each nonfiction book had a call number on its ...,fiction book has,nonfiction book had,has a call,had a call,8cf6535ea0ae4addb28f5f90a2b13a7d,edf8f705ab7241a191fa3ae24e381ebc
2,12,S,in,on,8.0,7.0,each non fiction book has a call number in its...,each nonfiction book had a call number on its ...,call number in,call number on,,on its spine,8cf6535ea0ae4addb28f5f90a2b13a7d,edf8f705ab7241a191fa3ae24e381ebc
3,13,S,12,twelve,5.0,4.0,each non fiction book had 12 numbers on its spine,each nonfiction book had twelve numbers on its...,book had 12,book had twelve,12 numbers on,twelve numbers on,dc02e8bc38234eed94e4e0cfbe083801,e1f5fb8bc21048cb8a50771b80eda39a
4,13,S,numbers,notches,6.0,5.0,each non fiction book had twelve numbers on it...,each nonfiction book had twelve notches on its...,had twelve numbers,had twelve notches,numbers on its,notches on its,53edaed0aee2422e96af6c0d3d636962,a575811edc0841b082724370f678d574


In [79]:
# Checking prefix correctness with the true position finding

result = sub_analysis.analyze_substitutions(substitution_df.head(), *model_score_funcs.get_bert_modules())

print(); print()

result = sub_analysis.analyze_substitutions(substitution_df.head(), *model_score_funcs.get_gpt2_modules())

print(); print()

result = sub_analysis.analyze_substitutions(substitution_df.head(), *model_score_funcs.get_bart_modules())


Some weights of the model checkpoint at bert-large-uncased-whole-word-masking were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Entry 0
['[CLS]', 'each', 'nonfiction', 'book', 'has', 'a', 'call', 'number', '[MASK]', 'its', 'spine', '[SEP]']
['[CLS]', 'each', 'non', 'fiction', 'book', 'has', 'a', 'call', 'number', '[MASK]', 'its', 'spine', '[SEP]']

Entry 1
['[CLS]', 'each', 'non', 'fiction', 'book', '[MASK]', 'a', 'call', 'number', 'in', 'its', 'spine', '[SEP]']
['[CLS]', 'each', 'nonfiction', 'book', '[MASK]', 'a', 'call', 'number', 'on', 'its', 'spine', '[SEP]']

Entry 2
['[CLS]', 'each', 'non', 'fiction', 'book', 'has', 'a', 'call', 'number', '[MASK]', 'its', 'spine', '[SEP]']
['[CLS]', 'each', 'nonfiction', 'book', 'had', 'a', 'call', 'number', '[MASK]', 'its', 'spine', '[SEP]']

Entry 3
['[CLS]', 'each', 'non', 'fiction', 'book', 'had', '[MASK]', 'numbers', 'on', 'its', 'spine', '[SEP]']
['[CLS]', 'each', 'nonfiction', 'book', 'had', '[MASK]', 'numbers', 'on', 'its', 'spine', '[SEP]']

Entry 4
['[CLS]', 'each', 'non', 'fiction', 'book', 'had', 'twelve', '[MASK]', 'on', 'its', 'spine', '[SEP]']
Fragmented



In [81]:
# Checking expected saving behavior for the single_substitution predictions and the edit saves.
# Results saved on the prefix. These are just for debugging purposes.

debug_save_results_folder = '/home/nwong/chompsky/serial_chain/telephone-analysis-public/intermediate_results/word_changes_prefix_only'
debug_save_results_path = join(debug_save_results_folder, 'word_change_probs.csv')

results_df = pd.read_csv(debug_save_results_path)

In [82]:
results_df

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,code,sWord,rWord,sCounter,rCounter,sentence,response,sLeftSequence,rLeftSequence,sRightSequence,rRightSequence,input_subject,output_subject,orig_prob,edited_prob
0,0,11,S,on,in,7.0,8.0,each nonfiction book has a call number on its ...,each non fiction book has a call number in its...,call number on,call number in,on its spine,,0,8cf6535ea0ae4addb28f5f90a2b13a7d,0.350487,0.057322
1,1,11,S,has,had,4.0,3.0,each non fiction book has a call number in its...,each nonfiction book had a call number on its ...,fiction book has,nonfiction book had,has a call,had a call,8cf6535ea0ae4addb28f5f90a2b13a7d,edf8f705ab7241a191fa3ae24e381ebc,0.338602,0.007203
2,2,12,S,in,on,8.0,7.0,each non fiction book has a call number in its...,each nonfiction book had a call number on its ...,call number in,call number on,,on its spine,8cf6535ea0ae4addb28f5f90a2b13a7d,edf8f705ab7241a191fa3ae24e381ebc,0.057322,0.355695
3,3,13,S,12,twelve,5.0,4.0,each non fiction book had 12 numbers on its spine,each nonfiction book had twelve numbers on its...,book had 12,book had twelve,12 numbers on,twelve numbers on,dc02e8bc38234eed94e4e0cfbe083801,e1f5fb8bc21048cb8a50771b80eda39a,0.002778,0.000954
4,4,13,S,numbers,notches,6.0,5.0,each non fiction book had twelve numbers on it...,each nonfiction book had twelve notches on its...,had twelve numbers,had twelve notches,numbers on its,notches on its,53edaed0aee2422e96af6c0d3d636962,a575811edc0841b082724370f678d574,0.005896,


## Quick dataset analyses

In [None]:
# Check how many sentences were affected by tokenization misalignments.
# This will load the aligned versions of the results, where the broken words were taken out


DATA_PREP_FOLDER = './intermediate_results/data_prep_logistic'
lm = load_runs.load_logistic_prep(DATA_PREP_FOLDER)


model_key = 'gpt2_normal'

# Need to compare words. Should be enough to compare length of the two as a first pass.

all_count = 0

for model in transformer_names:
    
    count = 0
    
    aligned_score_list = lm[model+'_scores']
    raw_score_list = raw_scores[model]
    assert len(aligned_score_list) == len(raw_score_list)
    
    
    for i in range(len(aligned_score_list)):
        if len(aligned_score_list[i]) != len(raw_score_list[i]):
            count += 1
    print(f'Model: {model}, Number of alignment length difference: {count} / {len(aligned_score_list)}')
    all_count += count

print(f'Total misalignment over all models: {all_count}')
            

Note that the editTables word substitution yield is kind of low:

22482 entries were matches

3135 entries were insertions

3442 entries were deletions

1366 entries were substitutions.

In [None]:
off_by_one_sub = []

def check_single_change(entry):
    sent = entry['sentence'].split()
    resp = entry['response'].split()
    
    filler_text = '<FILLER>'
    sent[int(entry['sCounter'])] = filler_text
    resp[int(entry['rCounter'])] = filler_text
    
    return sent == resp # Then?


def test_check_single_change():
    neg_case = check_single_change(substitution_df.iloc[0])
    pos_case = check_single_change(substitution_df.iloc[12])

    print(f'Neg case: {neg_case}')
    print(f'Pos case: {pos_case}') # This works. Now run this on everything

    
# Run on the entire dataframe?

ok_sentences = []
for i in range(len(substitution_df)):
    if check_single_change(substitution_df.iloc[i]):
        ok_sentences.append(i)
        
print(f'Number of single substitution sentences: {len(ok_sentences)}')