In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch, json
import random
import numpy as np
import tqdm
from utils_batch import InfillingModel
from torch.nn.functional import log_softmax
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

device = "cuda:1" # for GPU usage or "cpu" for CPU usage

In [None]:
def plot_roc_curve(human_scores, gpt_scores):
    # Data
    A, B = human_scores, gpt_scores
    # Combine scores and true labels
    scores = A + B
    labels = [0] * len(A) + [1] * len(B)
    # Calculate ROC curve
    fpr, tpr, thresholds = roc_curve(labels, scores)
    # Calculate AUC (Area Under Curve)
    roc_auc = auc(fpr, tpr)
    # Plot ROC curve
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC curve: Open-gen w/ GPT3.5-Reddit w prompts' )
    plt.legend(loc="lower right")
    plt.show()
    # what is the TPR for FPR = 0.1?
    for idx, fpr_ in enumerate(fpr):
        if fpr_ > 0.1:
            print(f"TPR at 1% FPR: {tpr[idx]:.4f}")
            break
    return roc_auc, tpr[idx]

In [None]:
# NinedayWang/PolyCoder-160M
# NinedayWang/PolyCoder-0.4B
# NinedayWang/PolyCoder-2.7B
model_name = 'codeparrot/codeparrot'
PyCodeGPT = AutoModelForCausalLM.from_pretrained( model_name ).to(device)
PyCodeGPT_tokenizer = AutoTokenizer.from_pretrained( model_name )

In [None]:
# from transformers import LlamaForCausalLM, LlamaTokenizer
# model_name = "/data/xianjun/project/llama/7B_hf/"
# model = LlamaForCausalLM.from_pretrained( model_name ).half().to(device) #.half() to use FP16
# model.eval() 
# PyCodeGPT = model
# PyCodeGPT_tokenizer = LlamaTokenizer.from_pretrained( model_name ) #.half() to use FP16

In [None]:
from transformers import AutoConfig
# Load the model's configuration
config = AutoConfig.from_pretrained(model_name)
# Get the default max_length
max_length = config.max_position_embeddings
max_length

In [None]:
# give an input, return the logits of input tokens
inputs = 'this is a test'
truncate_ratio=0.9
def get_logprob1(inputs ):
    input_ids = PyCodeGPT_tokenizer.encode(inputs, return_tensors='pt').to(device)
    input_ids = input_ids[:, :max_length]
    with torch.no_grad():
        output = PyCodeGPT(input_ids)
    logits = output[0]
    # Assuming the `logits` tensor contains the output from the model
    log_probs = log_softmax(logits, dim=-1)
    # Select the log probabilities for the specific tokens in the input
    input_log_probs = log_probs[0, torch.arange(log_probs.size(1)), input_ids[0]]
    # Multiply by -1 to get the negative log probabilities
    neg_log_probs = -input_log_probs
    neg_log_probs = neg_log_probs.cpu().numpy().tolist()
    return np.average( neg_log_probs[int( truncate_ratio*len(neg_log_probs)): ] ), neg_log_probs

In [None]:
#### merge the datasets
with open('results/regen_gpt-3.5-turbo_20_0.5.jsonl', 'r') as f:
    data1  = [json.loads(line) for line in f]
len(data1)

In [None]:
data1[0]['machine_gen_text']['choices'][0]['message']['content']

In [None]:
human_text = data1[0]['machine_prefix_prompt'] + data1[0]['machine_gen_text']['choices'][0]['message']['content']
human_text

In [None]:
len(data1[0]['machine_gen_text']['choices'])

In [None]:
avg_neg_log_probs, neg_log_probs = get_logprob1(human_text)
neg_log_probs[-500:]

In [None]:
# give an input, return the logits of input tokens
truncate_ratio=0.9
def get_logprob(inputs ):
    input_ids = PyCodeGPT_tokenizer.encode(inputs, return_tensors='pt').to(device)
    input_ids = input_ids[:, :max_length]
    with torch.no_grad():
        output = PyCodeGPT(input_ids)
    logits = output[0]
    # Assuming the `logits` tensor contains the output from the model
    log_probs = log_softmax(logits, dim=-1)
    # Select the log probabilities for the specific tokens in the input
    input_log_probs = log_probs[0, torch.arange(log_probs.size(1)), input_ids[0]]
    # Multiply by -1 to get the negative log probabilities
    neg_log_probs = -input_log_probs
    neg_log_probs = neg_log_probs.cpu().numpy().tolist()
    return np.average( neg_log_probs[int( truncate_ratio*len(neg_log_probs)): ] )

In [None]:
gold_prob_all = []
for id, ins in tqdm.tqdm(enumerate(data1), total=len(data1)):
    temp = []
    if len( ins['human_gen_text']['choices'] ) > 1:
        original_score = get_logprob( ins['gold_completion'] ) - get_logprob( ins['human_prefix_prompt'] )
        miu_scores = []
        for i in range( len(ins['human_gen_text']['choices'] ) ):
            one_regen = ins['human_prefix_prompt'] + ins['human_gen_text']['choices'][i]['message']['content']
            miu_scores.append( get_logprob( one_regen ) - get_logprob( ins['human_prefix_prompt'] ) )
        miu_scores_average_score = np.average( miu_scores )
        dx = original_score - miu_scores_average_score
        gold_prob_all.append( dx )

In [None]:
import tqdm
fim_prob_all = []
for id, ins in tqdm.tqdm(enumerate(data1), total=len(data1)):
    ins = data1[id]
    miu_scores = []
    original_score = get_logprob( ins['parsed_completion'] ) - get_logprob( ins['machine_prefix_prompt'])
    for i in range( len(ins['machine_gen_text']['choices'] ) ):
        one_regen = ins['machine_prefix_prompt'] + ins['machine_gen_text']['choices'][i]['message']['content']
        miu_scores.append( get_logprob( one_regen  ) - get_logprob( ins['machine_prefix_prompt'] ) )
    miu_scores_average_score = np.average( miu_scores )
    dx = original_score - miu_scores_average_score
    fim_prob_all.append( dx )

In [None]:
# plot and give different colors
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
plt.plot(gold_prob_all, label='human')
plt.plot(fim_prob_all, label='gpt')
plt.legend()
plt.show()

In [None]:
plot_roc_curve( gold_prob_all, fim_prob_all  )