In [72]:
from datasets import load_dataset

ds_book_corpus = load_dataset("bookcorpus/bookcorpus", trust_remote_code=True)

In [73]:
from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch
import torch.nn.functional as F
import numpy as np

# Load the RoBERTa model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")
model = AutoModelForMaskedLM.from_pretrained("google-bert/bert-base-uncased")

# def calculate_perplexity(sentence, model, tokenizer):
#     encoded_input = tokenizer(sentence, return_tensors='pt', truncation=True)
#     input_ids = encoded_input['input_ids'].squeeze()  # Shape: [seq_len]
#     attention_mask = encoded_input['attention_mask']
    
#     total_log_prob = 0.0
#     N = len(input_ids)
    
#     for i in range(1, N-1):  # Skip [CLS] and [SEP] if present
#         masked_input_ids = input_ids.clone()  # Clone to avoid in-place modification
#         masked_input_ids[i] = tokenizer.mask_token_id  # Mask the i-th token
        
#         # Predict the masked token
#         with torch.no_grad():
#             outputs = model(input_ids=masked_input_ids.unsqueeze(0), attention_mask=attention_mask)
#             logits = outputs.logits
        
#         # Convert logits to probabilities
#         predicted_probs = F.softmax(logits[0, i], dim=-1)
        
#         # Get the probability of the original token
#         original_token_id = input_ids[i].item()
#         original_token_prob = predicted_probs[original_token_id].item()
        
#         # Add the log probability of the original token
#         total_log_prob += np.log(original_token_prob)
    
#     # Compute pseudo-perplexity
#     avg_log_prob = total_log_prob / (N - 2)  # Exclude [CLS] and [SEP] tokens
#     pseudo_perplexity = np.exp(-avg_log_prob)
    
#     return pseudo_perplexity

def score(sentence, model, tokenizer):
    # https://arxiv.org/abs/1910.14659
    tensor_input = tokenizer(sentence, return_tensors='pt', truncation=True)
    repeat_input = tensor_input['input_ids'].repeat(tensor_input['input_ids'].size(-1)-2, 1)
    mask = torch.ones(tensor_input['input_ids'].size(-1) - 1).diag(1)[:-2]
    masked_input = repeat_input.masked_fill(mask == 1, tokenizer.mask_token_id)
    labels = repeat_input.masked_fill( masked_input != tokenizer.mask_token_id, -100)
    with torch.inference_mode():
        loss = model(masked_input, labels=labels).loss
    return np.exp(loss.item())


# Example usage
sentence = "London is the capital of Great Britain."
pseudo_perplexity = score(sentence, model, tokenizer)
print("Pseudo-perplexity:", pseudo_perplexity)



Some weights of the model checkpoint at google-bert/bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Pseudo-perplexity: 1.0968683577162155


In [74]:

from transformers import AutoModelForCausalLM, AutoTokenizer
gpt_tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
gpt_model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2")

In [75]:

from ml_security.utils.utils import get_device
from ml_security.utils.nlp_utils import calculate_perplexity

DEVICE = get_device(allow_mps=False)

Using CPU


In [77]:
file_path = "experiment-01.txt"
file_path_ids = file_path.replace(".txt", "") + "-idxs" + ".txt"
output_file = file_path.replace(".txt", "") + "-results" + ".json"

import zlib
from tqdm import tqdm
import json

with open(file_path_ids, 'r') as f:
    lines = f.readlines()
    idxs = [int(line.strip()) for line in lines]
# repeat each entry twice, but have them in sequence
idxs = [idx for idx in idxs for _ in range(2)]


with open(file_path, 'r') as f:
    lines = f.readlines()
    for line_idx, line in tqdm(enumerate(lines), total=len(lines)):
        perpl = score(line, model, tokenizer)
        zlib_entropy = len(zlib.compress(bytes(line, 'utf-8')))
        gpt2_perpl = calculate_perplexity(line, gpt_model, gpt_tokenizer, DEVICE).item()
        with open(output_file, 'a') as f:
            f.write(json.dumps({"line": line, "perplexity": perpl, "zlib_entropy": zlib_entropy, "gpt2_perplexity": gpt2_perpl, "idx": idxs[line_idx]}) + "\n")


 30%|███       | 606/2000 [05:20<13:26,  1.73it/s]

In [24]:
import pandas as pd
columns = ['perplexity', 'zlib_entropy', 'gpt2_perplexity', 'line', 'idx']
df_plots = pd.DataFrame(columns=columns)

with open(output_file+"yo", 'r') as f:
    # read line by line
    for line in f:
        # parse the line
        data = json.loads(line)
        # create a dictionary
        data_dict = {col: data[col] for col in columns}
        # create a dataframe
        df = pd.DataFrame(data_dict, index=[0])
        # append to the main dataframe
        df_plots = pd.concat([df_plots, df], ignore_index=True)





The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



In [25]:
df_plots

Unnamed: 0,perplexity,zlib_entropy,gpt2_perplexity,line,idx
0,3.839273,140,49.016003,i just wanted to peek inside one . i wanted to...,18570393
1,4.057195,157,96.813118,i just wanted to peek at it and learn some new...,18570393
2,2.389894,132,51.447018,the woman smiled as if she knew he lived with ...,73839698
3,2.750109,146,83.413483,the woman smiled as if she understood exactly ...,73839698
4,4.300704,140,147.972641,` ` not anymore . ' he opened the eminence doo...,1926356
...,...,...,...,...,...
1995,5.031240,140,150.152512,"hanna sat in the middle seat , a petite dark b...",29922948
1996,2.535484,140,44.280388,"i said after a few long breaths , although i k...",5417396
1997,3.278696,145,55.618668,i said after a few more little lectures . anyw...,5417396
1998,8.624334,148,263.678955,"he was non - discerning - of - school , but su...",47067598


In [54]:
df_plots['id'] = df_plots.index

df_plots['selected'] = (df_plots['perplexity'] < 15) & (df_plots['gpt2_perplexity'] > 390)

def calculate_3gram_accuracy(reference_text, predicted_text):
    def generate_ngrams(text, n):
        words = text.split()
        return [tuple(words[i:i+n]) for i in range(len(words)-n+1)]
    
    reference_3grams = generate_ngrams(reference_text, 3)
    predicted_3grams = generate_ngrams(predicted_text, 3)
    
    matching_3grams = set(reference_3grams) & set(predicted_3grams)
    accuracy = len(matching_3grams) / len(reference_3grams) if reference_3grams else 0
    
    return accuracy

df_plots['3gram_accuracy'] = df_plots.apply(lambda row: calculate_3gram_accuracy(row['line'], ds_book_corpus['train'][row['idx']]["text"]), axis=1)



In [27]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px


# plot scatter plot with regression line
fig = px.scatter(df_plots, x='perplexity', y='gpt2_perplexity', hover_data=['line', 'id', 'zlib_entropy'], color='selected')

# show the plot
fig.show()

In [65]:
df_plots.iloc[212]['line']

'a long , curved parabolic arch connected with a wide , high - explosive - ratchet structure connects the five diversion and dam gates of the dimapoa dam . facilities ( 23 out of 25 ) : 6 public toilets ;\n'

In [66]:
df_filtered = df_plots[df_plots['selected']]
df_filtered = df_plots.copy()

In [67]:
df_filtered

Unnamed: 0,perplexity,zlib_entropy,gpt2_perplexity,line,idx,id,selected,3gram_accuracy
0,3.839273,140,49.016003,i just wanted to peek inside one . i wanted to...,18570393,0,False,0.069767
1,4.057195,157,96.813118,i just wanted to peek at it and learn some new...,18570393,1,False,0.069767
2,2.389894,132,51.447018,the woman smiled as if she knew he lived with ...,73839698,2,False,0.069767
3,2.750109,146,83.413483,the woman smiled as if she understood exactly ...,73839698,3,False,0.071429
4,4.300704,140,147.972641,` ` not anymore . ' he opened the eminence doo...,1926356,4,False,0.025000
...,...,...,...,...,...,...,...,...
1995,5.031240,140,150.152512,"hanna sat in the middle seat , a petite dark b...",29922948,1995,False,0.075000
1996,2.535484,140,44.280388,"i said after a few long breaths , although i k...",5417396,1996,False,0.069767
1997,3.278696,145,55.618668,i said after a few more little lectures . anyw...,5417396,1997,False,0.069767
1998,8.624334,148,263.678955,"he was non - discerning - of - school , but su...",47067598,1998,False,0.000000


In [71]:
for row in df_filtered.iterrows():
    gram_accuracy = row[1]['3gram_accuracy']
    if gram_accuracy > 0.15:
        print(row[1]['3gram_accuracy'])
        print(row[1]['line'])
        print(ds_book_corpus['train'][row[1]['idx']]["text"])
        print()

0.16666666666666666
she asked as they sat down in a high - backed chair . " andrew is his friend tympanum sanitarium . where is marybeth ? of course she - " now hallie wished she could say more .

she asked as they sat down in a pair of empty seats .

