# LLM Watermark Judger

## Setup

Load necessary packages and initialize OpenAI API.

In [1]:
# ----- SYSTEM PACKAGES ----- ##
import os
import sys

## ----- IMPORT FUNCTIONS ----- ##
sys.path.insert(0, os.getcwd())
from functions import *

## Get Text Embeddings

Load sample pairs from a JSON file and get their embeddings from OpenAI's `ada-002` model, saving the results to the same data structure.

In [3]:
data_list = load_from_json(filename="results.json")

for i in range(len(data_list)):

    item = data_list[i]

    # unpack needed fields
    prompt = item["prompt"]
    uw_output = item["uw_output"]
    w_output = item["w_output"]

    # # get embedding for each output
    uw_embedding = get_embedding(uw_output)
    w_embedding = get_embedding(w_output)

    # save embedding to dictionary
    item["uw_embedding"] = uw_embedding
    item["w_embedding"] = w_embedding
    
    print(i+1, "out of", len(data_list), "items processed!", end="\r")
    
save_to_json(data_list, filename="results_w_embeddings.json")

## Evaluate Classifier Model

In [8]:
# load embeddings and labels
all_embeddings, all_labels = load_embeddings_and_labels("results_w_embeddings.json")

# training with k-fold cross-validation
kfold_classifier(all_embeddings, all_labels)

## Evaluate Regression Model

In [3]:
# load embeddings and labels
all_embeddings, all_labels = load_embeddings_and_labels("results_w_embeddings.json")

# training with k-fold regression
kfold_regression(all_embeddings, all_labels)

Regression Accuracy Across All Folds: [0.66, 0.655, 0.605, 0.645, 0.67]
Average Regression Accuracy Across All Folds = 0.6470
Regression Standard Error Across All Folds = 0.0101
