In [1]:
%config Completer.use_jedi = False

In [2]:
modules_path = '../modules'
import sys
sys.path.insert(0, modules_path)

In [3]:
from transformers import AutoTokenizer, pipeline, GPT2LMHeadModel

In [4]:
from yelp_evaluator import YelpEvaluator

In [5]:
from tqdm import tqdm
import numpy as np

In [6]:
evaluator = YelpEvaluator(3)

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [7]:
evaluator.perplexer_model =  GPT2LMHeadModel.from_pretrained('gpt2').to(evaluator.device)

In [8]:
unconstrained_prompts = {'pos2neg': ['../modules/outputs/prompts/gpt2-xl_all_pos2neg_ec2-18_3_2.out',
                                     '../modules/outputs/prompts/gpt2-xl_all_pos2neg_ec2-230_4_3.out',
                                     '../modules/outputs/prompts/gpt2-xl_all_pos2neg_ec2-82_2_0.out'],
                         
                         'neg2pos': ['../modules/outputs/prompts/gpt2-xl_all_neg2pos_ec2-18_2_0.out',
                                     '../modules/outputs/prompts/gpt2-xl_all_neg2pos_ec2-34_3_2.out',
                                     '../modules/outputs/prompts/gpt2-xl_all_neg2pos_ec2-82_4_3.out']}

constrained_prompts = {'pos2neg': ['../modules/outputs/prompts/gpt2-xl_all_pos2neg_ec2-82_2_0_fluent_0.out',
                                     '../modules/outputs/prompts/gpt2-xl_all_pos2neg_ec2-230_3_2_fluent_0.out'],
                       
                         'neg2pos': ['../modules/outputs/prompts/gpt2-xl_all_neg2pos_ec2-82_2_0_fluent_0.out',
                                     '../modules/outputs/prompts/gpt2-xl_all_neg2pos_ec2-82_3_2_fluent_0.out']}

In [9]:
unconstrained_prompts_combined = []
for pos2neg_path, neg2pos_path in zip(unconstrained_prompts['pos2neg'], unconstrained_prompts['neg2pos']): 
    prompts_combined = [line.strip() for line in open(pos2neg_path)] + [line.strip() for line in open(neg2pos_path)]
    unconstrained_prompts_combined.append(prompts_combined)

In [10]:
constrained_prompts_combined = []
for pos2neg_path, neg2pos_path in zip(constrained_prompts['pos2neg'], constrained_prompts['neg2pos']): 
    prompts_combined = ([line.strip() for line in open(pos2neg_path) if len(line.strip()) > 0] + 
                        [line.strip() for line in open(neg2pos_path) if len(line.strip()) > 0])
    constrained_prompts_combined.append(prompts_combined)

In [11]:
def compute_ppl(texts): 
    sent_lens = [evaluator._sent_len(t) for t in tqdm(texts)]
    sent_nlls = [sent_lens[i] * evaluator._sent_nll(t) for i, t in tqdm(enumerate(texts))]
    ppl = np.exp(sum(sent_nlls) / sum(sent_lens))
    return ppl

In [12]:
unconstrained_ppls = [compute_ppl(ts) for ts in unconstrained_prompts_combined]

100%|██████████| 1000/1000 [00:00<00:00, 17349.90it/s]
1000it [00:10, 97.06it/s]
100%|██████████| 1000/1000 [00:00<00:00, 18130.94it/s]
1000it [00:09, 101.46it/s]
100%|██████████| 1000/1000 [00:00<00:00, 17919.86it/s]
1000it [00:09, 101.30it/s]


In [14]:
unconstrained_ppls

[151514.95579180645, 582348.3591761496, 28228.27171138663]

In [21]:
"{:.2e}".format(np.mean(all_uncons_ppls))

'2.54e+05'

In [22]:
"{:.2e}".format(np.std(all_uncons_ppls) / np.sqrt(len(all_uncons_ppls)))

'4.34e+04'

In [16]:
all_uncons_ppls = unconstrained_ppls * 10

In [13]:
constrained_ppls = [compute_ppl(ts) for ts in constrained_prompts_combined]

100%|██████████| 1000/1000 [00:00<00:00, 19029.99it/s]
1000it [00:09, 101.26it/s]
100%|██████████| 1000/1000 [00:00<00:00, 20868.64it/s]
1000it [00:09, 101.58it/s]


In [15]:
constrained_ppls

[72.48564614348135, 57.51187051909577]

In [25]:
all_cons_ppls = constrained_ppls * 10

In [27]:
np.mean(all_cons_ppls)

64.99875833128854

In [28]:
np.std(all_cons_ppls) / np.sqrt(len(all_cons_ppls))

1.674119008797775

In [42]:
# GPT-2 model fine-tuned on Yelp
np.mean(unconstrained_ppls)

22988236.282792438

In [44]:
"{:.2e}".format(22988236.282792438)

'2.30e+07'

In [43]:
# GPT-2 model fine-tuned on Yelp
np.mean(constrained_ppls)

350.96558986225494

In [51]:
# GPT-2 model out-of-the-box
np.mean(unconstrained_ppls)

254030.5288931142

In [53]:
"{:.2e}".format(254030.5288931142)

'2.54e+05'

In [None]:
[254030.5288931142] * 10 + []

In [52]:
# GPT-2 model out-of-the-box
np.mean(constrained_ppls)

64.99875833128856

In [31]:
len(constrained_prompts_combined[0])

1003


 53%|█████▎    | 528/1003 [00:22<00:04, 102.97it/s][A

In [38]:
unconstrained_prompts_combined[0]

['Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent outcomes',
 'Fixed RemovedChanged Prevent out

In [58]:
constrained_prompts_combined[0][5]

'<|endoftext|>We do not have'

In [28]:
[p for p in constrained_prompts_combined[0] if len(p) == 0]

['', '', '']

In [27]:
min([len(p) for p in constrained_prompts_combined[0]])

0