## Generate Text

In [23]:
import torch
from watermark.auto_watermark import AutoWatermark
from utils.transformers_config import TransformersConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
# Device
device = "cuda" if torch.cuda.is_available() else "cpu"


In [24]:
transformers_config = TransformersConfig(
    model=AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-0.5B').to(device),
    tokenizer=AutoTokenizer.from_pretrained('Qwen/Qwen2.5-0.5B'),
    # vocab_size=50272,
    device=device,
    # max_new_tokens=200,
    # min_length=204,
    # do_sample=True,
    # no_repeat_ngram_size=4
)

In [25]:
 
# Load watermark algorithm
from watermark.base import BaseWatermark
from watermark.kgw import KGW
myWatermark:BaseWatermark = AutoWatermark.load(
    'KGW', 
    algorithm_config='config/KGW.json',
    transformers_config=transformers_config
)
myWatermark:KGW

In [33]:
# Prompt
prompt = \
'''Give me a short introduction to large language model.
'''

# Generate and detect
watermarked_text = myWatermark.generate_watermarked_text(prompt)
wm_detect_result = myWatermark.detect_watermark(watermarked_text)
unwatermarked_text = myWatermark.generate_unwatermarked_text(prompt)
unwm_detect_result = myWatermark.detect_watermark(unwatermarked_text)


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


In [34]:
print(watermarked_text)
print(wm_detect_result)


Give me a short introduction to large language model.
A large language model, also known as a deep learning model, is a type of machine learning model that uses deep neural networks to process and analyze large amounts of data. These models are trained on large amounts of data, such as the internet or text, and can learn complex language patterns and structures from this data. Large language models are used in various applications, such as chatbots, language translation, and text generation.
{'is_watermarked': True, 'score': 5.081063304179084}


In [35]:
print(unwatermarked_text)
print(unwm_detect_result)

Give me a short introduction to large language model.
A large language model (LLM) is a type of artificial intelligence (AI) that can generate human-like text based on a large amount of data. These models are designed to understand and generate text based on the context of the input, and they are often used for tasks such as language translation, text summarization, and chatbots.
{'is_watermarked': False, 'score': 0.3418817293789138}


In [29]:
with open('KGW_test.output.txt','w') as f:
    l=[watermarked_text,wm_detect_result,unwatermarked_text,unwm_detect_result]
    l=[str(item) for item in l]
    f.write('\n'.join(l))

## Visualization

In [30]:
import torch
from visualize.font_settings import FontSettings
from watermark.auto_watermark import AutoWatermark
from utils.transformers_config import TransformersConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from visualize.visualizer import DiscreteVisualizer
from visualize.legend_settings import DiscreteLegendSettings
from visualize.page_layout_settings import PageLayoutSettings
from visualize.color_scheme import ColorSchemeForDiscreteVisualization


In [31]:

# Get data for visualization
watermarked_data = myWatermark.get_data_for_visualization(watermarked_text)
unwatermarked_data = myWatermark.get_data_for_visualization(unwatermarked_text)

# Init visualizer
visualizer = DiscreteVisualizer(color_scheme=ColorSchemeForDiscreteVisualization(),
                                font_settings=FontSettings(), 
                                page_layout_settings=PageLayoutSettings(),
                                legend_settings=DiscreteLegendSettings())
# Visualize
watermarked_img = visualizer.visualize(data=watermarked_data, 
                                       show_text=True, 
                                       visualize_weight=True, 
                                       display_legend=True)

unwatermarked_img = visualizer.visualize(data=unwatermarked_data,
                                         show_text=True, 
                                         visualize_weight=True, 
                                         display_legend=True)


In [32]:

# Save
watermarked_img.save("viz/KGW_watermarked.png")
unwatermarked_img.save("viz/KGW_unwatermarked.png")