In [None]:
import os
os.getcwd()
import sys 
sys.path.append('/data/users/miroojin/saksham/watermark-adapters')

In [None]:
import json
from src.utils import MbMark, MbClusterDetector, GaussMark
from sklearn.metrics import roc_auc_score
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from datasets import load_dataset, load_from_disk
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import math
import scipy.stats
torch.manual_seed(42)
import numpy as np
from sklearn.metrics import roc_auc_score, roc_curve
from scipy.interpolate import interp1d

In [None]:
model_name = "meta-llama/Llama-2-7b-hf"
model_suffix = model_name.split("/")[-1]
output_file = os.getenv("OUTPUT_FILE", "../output/test/output_delta1.5_gamma0.4.json")

assert output_file is not None, "Please set the OUTPUT_FILE environment variable to the path of your output file."


BATCH_SIZE = 64
print(f"Batch size: {BATCH_SIZE}")
print(f"Model name: {model_name}")


In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    device_map="auto")
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token


In [None]:

with open(output_file, "r") as f:
    output_data = json.load(f)

def filter_length(example):
    return len(tokenizer(example["text"])['input_ids']) >= 400

In [None]:
dataset_name = "Skylion007/openwebtext"
dataset_suffix = dataset_name.split("/")[-1]
dataset = load_dataset(dataset_name,
                       split="train", streaming=True, trust_remote_code=True)

dataset = dataset.filter(filter_length)

dataset = dataset.shuffle(seed=42).take(5000)

# Truncate each sample to random lengths between 150 and 400


def truncate(example):
    input_ids = tokenizer(example["text"])['input_ids']
    length = np.random.randint(150, 400)
    truncated_input_ids = input_ids[:length]
    example["text"] = tokenizer.decode(
        truncated_input_ids, skip_special_tokens=True)
    return example


dataset = dataset.map(truncate)
# Get text column as list
texts = [example["text"] for example in dataset]

In [None]:

model = AutoModelForCausalLM.from_pretrained(
    model_name, device_map="auto", torch_dtype=torch.bfloat16)

watermark_type = output_data["watermark"]
config = output_data["config"]


final_weight = torch.tensor(output_data["final_matrix"])

watermark = MbMark(
    delta=config["delta"],
    gamma=config["gamma"],
    seed=config["hash_key"],
    final_weight=final_weight,
    model=model,
    tokenizer=tokenizer,
    unembedding_param_name=config["unembedding_param_name"]
)   

In [None]:
# Iterate through the dataset in batches


def get_llrs(data):
    all_likelihood_ratios = []

    for i in range(0, len(data), BATCH_SIZE):
        batch = data[i:i + BATCH_SIZE]
        with torch.no_grad():
            inputs = watermark.tokenizer(batch, padding=True,
                                         return_tensors="pt").to(watermark.cluster_detector.device)
            _, hidden_states = watermark.cluster_detector(inputs)
            likehood_ratios = watermark.lrt(hidden_states, inputs)
            
        all_likelihood_ratios.append(likehood_ratios)

    all_likelihood_ratios = torch.cat(all_likelihood_ratios)
    return all_likelihood_ratios


null_likehood_ratios = get_llrs(texts)

#### Human text / negative sample scores

In [None]:


# Plot the histogram of the null likelihood ratios
plt.figure(figsize=(10, 5))
plt.hist(null_likehood_ratios, bins=200, density=True)
plt.title("Null Likelihood Ratios")
plt.xlabel("Likelihood Ratio")
plt.ylabel("Density")
plt.grid()
plt.show()


In [None]:
base_llr_statistics = {
    "mean": null_likehood_ratios.mean().item(),
    "std": null_likehood_ratios.std().item(),
}

output_data["base_llr_statistics"] = base_llr_statistics

# Save the updated output data
with open(output_file, "w") as f:
    json.dump(output_data, f, indent=4)