In [1]:
import random
import torch
from sklearn.metrics import roc_auc_score, roc_curve
import matplotlib.pyplot as plt
from watermark.auto_watermark import AutoWatermark
from utils.transformers_config import TransformersConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from openai import OpenAI
# Device
device = "cuda" if torch.cuda.is_available() else "cpu"


In [2]:
transformers_config = TransformersConfig(
    model=AutoModelForCausalLM.from_pretrained('facebook/opt-1.3b').to(device),
    tokenizer=AutoTokenizer.from_pretrained('facebook/opt-1.3b'),
    vocab_size=50272,
    device=device,
    max_new_tokens=200,
    min_length=204,
    do_sample=True,
    no_repeat_ngram_size=4
)

OSError: We couldn't connect to 'https://huggingface.co' to load this file, couldn't find it in the cached files and it looks like facebook/opt-1.3b is not the path to a directory containing a file named config.json.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

In [3]:
myWatermark = AutoWatermark.load(
    'Unigram', 
    algorithm_config='config/Unigram.json',
    transformers_config=transformers_config
)


In [6]:
# 生成文本
def generate_text(prompt):
    client = OpenAI(
    api_key="sk-VQhUCpASDoZc141KQz1gydCvgtJFgSaDi2YM6r0fRDZwnAmp", # 在这里将 MOONSHOT_API_KEY 替换为你从 Kimi 开放平台申请的 API Key
    base_url="https://api.moonshot.cn/v1")
    try:
        completion = client.chat.completions.create(
            model="moonshot-v1-8k",
            messages=[
                {"role": "system", "content": "你是 Kimi，由 Moonshot AI 提供的人工智能助手，你更擅长中文和英文的对话。你将为输入的prompt扩展上下文，一次至少扩充200字，而且你输出的语言与输入的语言语种一致。"},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7  # 可调整生成文本的随机性
        )
        # 提取生成的内容
        generated_text = completion.choices[0].message.content
        return generated_text

    except Exception as e:
        print(f"Error generating text: {e}")
        return "抱歉，生成文本时发生错误，请稍后重试。"


In [None]:
# 将水印文本插入到语义相关的人类文本中，同时控制水印比例，保证语义相关性
def copy_paste_attack_multi_span(human_text, watermarked_text, num_spans=3, wm_ratio=0.1):
    """
   用多个水印跨度模拟复制粘贴攻击。
    - human_text： 人写的原始文本（字符串）。
    - watermarked_text： 要插入的水印文本（字符串）。
    - num_spans：要插入的跨度数。
    - wm_ratio： 生成文档中水印标记的比例。
    """
    human_tokens = human_text.split()
    wm_tokens = watermarked_text.split()
    wm_tokens_per_span = max(1, int(len(human_tokens) * wm_ratio / num_spans))
    
    # Split the watermarked tokens into spans
    spans = [wm_tokens[i * wm_tokens_per_span:(i + 1) * wm_tokens_per_span] for i in range(num_spans)]
    for span in spans:
        insert_pos = random.randint(0, len(human_tokens))  # Random insertion position
        human_tokens = human_tokens[:insert_pos] + span + human_tokens[insert_pos:]
    return " ".join(human_tokens)

In [None]:
# Evaluation: ROC-AUC Testing
def evaluate_detection_AUC(num_samples, human_text_length, watermark_length, num_spans, wm_ratio):
    """
    使用 ROC-AUC 评估复制粘贴攻击下的水印检测。
    - num_samples（样本数）： 要测试的样本数。
    - human_text_length： 人写文本的长度： 人工书写文本的长度。
    - watermark_length：水印长度： 水印文本的长度。
    - num_spans： 在复制粘贴攻击中插入的跨距数。
    - wm_ratio： 生成文档中水印标记的比例。
    """
    true_labels = []  # Ground truth: 1 for watermarked, 0 for human-written
    detector_scores = []  # Detection scores from the watermark detector

    for _ in range(num_samples):
        # Generate human-written text and watermarked text
        human_text = generate_text("I like fish.")[:human_text_length]
        watermarked_text = myWatermark.generate_watermarked_text(human_text)[:watermark_length]

        # 50% samples are attacked with copy-paste
        if random.random() < 0.5:
            attacked_text = copy_paste_attack_multi_span(human_text, watermarked_text, num_spans, wm_ratio)
            true_labels.append(1)
        else:
            attacked_text = human_text
            true_labels.append(0)

        # Detection score
        detection_score = myWatermark.detect_watermark(attacked_text)

                # **修正点 1：如果 detection_score 是字典，提取 "score" 键的值**
        if isinstance(detection_score, dict):
            if "score" in detection_score:
                detection_score = detection_score["score"]
            else:
                print(f"Unexpected detect_watermark output: {detection_score}")  # 调试用
                detection_score = 0.0  # 发生错误时默认得分

        # **修正点 2：确保 detection_score 是浮点数**
        try:
            detection_score = float(detection_score)
        except ValueError:
            print(f"Invalid detection_score: {detection_score}")  # 调试用
            detection_score = 0.0  # 发生错误时默认得分

        detector_scores.append(detection_score)

    # **修正点 3：确保 detector_scores 只包含浮点数**
    detector_scores = [float(score) for score in detector_scores]

    # **修正点 4：检查 true_labels 是否只包含整数**
    true_labels = [int(label) for label in true_labels]        

    # Calculate AUC
    auc = roc_auc_score(true_labels, detector_scores)
    fpr, tpr, _ = roc_curve(true_labels, detector_scores)
    
    # Plot ROC Curve
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, label=f"AUC = {auc:.4f}")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("ROC Curve for Watermark Detection under Copy-Paste Attack")
    plt.legend()
    plt.show()

    return auc


In [None]:
if __name__ == "__main__":
    num_samples = 100  
    human_text_length = 600  
    watermark_length = 150  
    num_spans = 3  
    wm_ratio = 0.1  

    auc = evaluate_detection_AUC(num_samples, human_text_length, watermark_length, num_spans, wm_ratio)
    print(f"ROC-AUC for Copy-Paste Attack: {auc:.4f}")