In [5]:
import numpy as np
import hashlib

class MinHash:
    def __init__(self, num_hashes):
        self.num_hashes = num_hashes
        self.max_hash = (1 << 32) - 1
        self.hash_funcs = [self._hash_func(i) for i in range(num_hashes)]

    def _hash_func(self, seed):
        def hash(x):
            return int(hashlib.md5((str(x) + str(seed)).encode('utf8')).hexdigest(), 16)
        return hash

    def compute(self, set_data):
        min_hashes = [self.max_hash] * self.num_hashes
        for item in set_data:
            for i, hash_func in enumerate(self.hash_funcs):
                min_hashes[i] = min(min_hashes[i], hash_func(item))
        return min_hashes

# Example usage
data1 = {'apple', 'banana', 'cherry'}
data2 = {'apple', 'banana', 'date'}

minhash = MinHash(num_hashes=200)
signature1 = minhash.compute(data1)
signature2 = minhash.compute(data2)

# Estimating Jaccard Similarity
similarity = np.mean([x == y for x, y in zip(signature1, signature2)])
print(f"Estimated Jaccard Similarity: {similarity}")

Estimated Jaccard Similarity: 1.0


In [6]:
signature2

[4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,
 4294967295,

In [11]:
import numpy as np

class SimHash:
    def __init__(self, dim):
        self.dim = dim
        self.random_vectors = np.random.randn(dim, dim)

    def compute(self, vector):
        projections = np.dot(vector, self.random_vectors)
        simhash = (projections > 0).astype(int)
        return simhash

# Example usage
vector1 = np.array([1, 0, 1, 0, 1])
vector2 = np.array([1, 1, 0, 1, 0])

simhash = SimHash(dim=5)
signature1 = simhash.compute(vector1)
signature2 = simhash.compute(vector2)

# Estimating Cosine Similarity
similarity = np.dot(signature1, signature2) / len(signature1)
print(f"Estimated Cosine Similarity: {similarity}")

print(signature1)
print(signature2)

Estimated Cosine Similarity: 0.2
[1 0 0 0 1]
[0 0 0 0 1]


In [8]:
signature1

array([0, 1, 0, 0, 1])

In [10]:
import hashlib
from collections import Counter

def hash_feature(feature):
    """Hash a feature using SHA-256 and return it as a bit string."""
    return bin(int(hashlib.sha256(feature.encode()).hexdigest(), 16))[2:].zfill(256)

def simhash(text):
    """Generate a SimHash for the given text."""
    features = text.split()  # Simple split by whitespace; you can use other methods to extract features
    feature_weights = Counter(features)
    
    # Initialize a list of sums for each bit position
    bit_sums = [0] * 256
    
    for feature, weight in feature_weights.items():
        feature_hash = hash_feature(feature)
        
        for i, bit in enumerate(feature_hash):
            if bit == '1':
                bit_sums[i] += weight
            else:
                bit_sums[i] -= weight
    
    # Generate the final SimHash
    simhash_bits = ['1' if bit_sum > 0 else '0' for bit_sum in bit_sums]
    
    # Convert the bit string to an integer
    simhash_value = int(''.join(simhash_bits), 2)
    
    return simhash_value

def generate_seed(text):
    """Generate a seed based on SimHash."""
    simhash_value = simhash(text)
    
    # Optionally, convert the SimHash value to a different format or range if needed
    # For example, we can use it directly as an integer seed
    
    return simhash_value

# Example usage
text1 = "This is an example text to generate a SimHash based seed."
text2 = "This is an example text to generate a SimHash based seed"
seed1 = generate_seed(text1)
seed2 = generate_seed(text2)
print(f"Generated seed 1: {seed1}")
print(f"Generated seed 2: {seed2}")


Generated seed 1: 88739654438713744637394983803097507872644302151361883387078094910037439175419
Generated seed 2: 90552439634884501211528977791671251485636081624297019446735676370132102697723


# Let's be more concrete

In [19]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

control_str = ["This is a test"]
test_str1 = ["Three members of the same family who died in a static caravan from carbon monoxide poisoning would have been unconscious 'within minutes', investigators said today. The bodies of married couple John and Audrey Cook were discovered alongside their daughter, Maureen, at the mobile home they shared on Tremarle Home Park in Camborne, west Cornwall. The inquests have now opened into the deaths last Saturday, with investigators saying the three died along with the family's pet dog, of carbon monoxide"]
test_str2 = ["Three members of the same family who died in a static caravan from carbon monoxide poisoning would have been unconscious 'within minutes', investigators told today. The bodies of married couple John and Audrey Cook were discovered alongside their daughter, Maureen, at the mobile home they shared on Tremarle Home Park in Camborne, west Cornwall. The inquests have now opened into the deaths last Saturday, with investigators saying the three died along with the family's pet dog, of carbon monoxide"]

control_str_embed = model.encode(control_str)
test_str_embed1 = model.encode(test_str1)
test_str_embed2 = model.encode(test_str2)



In [20]:
control_str_embed.shape, test_str_embed1.shape, test_str_embed2.shape

((1, 384), (1, 384), (1, 384))

In [24]:
eps = 0.2
new_dim = 8 * np.log(2) / eps**2
new_dim

138.62943611198904

In [29]:
simhash = SimHash(dim=384)
sign1 = simhash.compute(control_str_embed[0])
sign2 = simhash.compute(test_str_embed1[0])
sign3 = simhash.compute(test_str_embed2[0])

# compute similarity of each pair
similarity1 = np.dot(sign1, sign2) / len(sign1)
similarity2 = np.dot(sign1, sign3) / len(sign1)
similarity3 = np.dot(sign2, sign3) / len(sign1)

similarity1, similarity2, similarity3

(0.2916666666666667, 0.2942708333333333, 0.5390625)

# Testing new KGW

In [1]:
from tqdm import tqdm
import hashlib

import torch
from torch import nn
from transformers import (AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM, AutoModelForCausalLM,
        LogitsProcessor, LogitsProcessorList, set_seed)
import os
import sys

from dataclasses import dataclass

os.chdir("../")

SRC_PATH = ["src"]
for module_path in SRC_PATH:
    if module_path not in sys.path:
        sys.path.append(module_path)

from watermark.auto_watermark import AutoWatermark
from utils.gen_utils import transform_chat_template_with_prompt
from generation.article_generator import ArticleGenerator
from utils.gen_utils import transform_chat_template_with_prompt

@dataclass
class ModelConfig:
    def __init__(self, tokenizer, use_chat_template, chat_template_type, gen_params, model_name, device):
        self.tokenizer = tokenizer
        self.use_chat_template = use_chat_template
        self.chat_template_type = chat_template_type
        self.gen_params = gen_params
        self.model_name = model_name
        self.device = device

@dataclass
class PromptConfig:
    def __init__(self, system_prompt, user_prompt):
        self.system_prompt = system_prompt
        self.user_prompt = user_prompt


class LLMGenerator(nn.Module):
    def __init__(self, model: AutoModelForCausalLM, model_config: ModelConfig) -> None:
        """
        Class for generating text using a model from Huggingface.
        
        Parameters:
            model: AutoModelForCausalLM
                The pretrained language model (Transformers) to be used for text generation.
            model_config: ModelConfig
                The configuration of the model.
        """
        
        super().__init__()

        # gpt should already be trained
        self.generator = model
        self.tokenizer = model_config.tokenizer
        self.device = model_config.device
        self.gen_params = model_config.gen_params

    def forward(self, samples: list, batch_size: int=1, watermarking_scheme=None) -> list[str]:
        """
        Takes a list of input contexts and generates text using the model.
        
        Parameters:
            samples: list
                A list of input contexts for text generation.
            batch_size: int
                The batch size to use for generation.
            watermarking_scheme: LogitsProcessor
                The watermarking scheme to use for generation.
        """
        
        outputs_list = []
        for i in tqdm(range(0, len(samples), batch_size), desc="Generating text"):
            
            batch_samples = samples[i:i+batch_size]
            encoding = self.tokenizer.batch_encode_plus(
                batch_samples, return_tensors='pt', padding=True, truncation=True)
            input_ids = encoding['input_ids'].to(self.device)

            with torch.no_grad():
                if watermarking_scheme is not None:
                    output_ids = self.generator.generate(
                        input_ids, pad_token_id=self.tokenizer.pad_token_id, 
                        logits_processor=LogitsProcessorList([watermarking_scheme]), **self.gen_params)
                else:     
                    output_ids = self.generator.generate(
                        input_ids, pad_token_id=self.tokenizer.pad_token_id, **self.gen_params)

            # decode the generated text
            decoded_outputs = self.tokenizer.batch_decode(
                output_ids[:, input_ids.shape[1]:])
                
            outputs_list.extend(decoded_outputs)
            
        # remove special tokens from the generated text
        special_tokens = self.tokenizer.additional_special_tokens + \
            [self.tokenizer.pad_token] + [self.tokenizer.eos_token]
            
        for i, sample in enumerate(samples):
            output = outputs_list[i]
            for special_token in special_tokens:
                output = output.replace(special_token, "")
                output = output.strip()
            outputs_list[i] = output
        
        return outputs_list
    def hash(self, token: int):
        
        return token
        
    
    def forward_special(self, samples: list, batch_size: int=1, watermarking_scheme=None) -> list[str]:
        """
        Takes a list of input contexts and generates text using the model.
        
        Parameters:
            samples: list
                A list of input contexts for text generation.
            batch_size: int
                The batch size to use for generation.
            watermarking_scheme: LogitsProcessor
                The watermarking scheme to use for generation.
        """
        
        outputs_list = []
        for i in tqdm(range(0, len(samples), batch_size), desc="Generating text"):
            
            batch_samples = samples[i:i+batch_size]
            encoding = self.tokenizer.batch_encode_plus(
                batch_samples, return_tensors='pt', padding=True, truncation=True)
            input_ids = encoding['input_ids'].to(self.device)

            # set max_new_tokens to 1 to generate one token at a time
            self.gen_params['max_new_tokens'] = 1
            
            with torch.no_grad():
                text_len = 10
                for i in range(text_len):
                    
                    # find the last token
                    last_token = input_ids[0][-1].item()
                    
                    # hash it to get a seed
                    seed = self.hash(last_token)
                    print("seed:", seed)
                    
                    # set seed for generation
                    set_seed(int(seed))
                    
                    #torch.manual_seed(seed)
                    output_ids = self.generator.generate(
                        input_ids, pad_token_id=self.tokenizer.pad_token_id, **self.gen_params)
                                        

            # decode the generated text
            decoded_outputs = self.tokenizer.batch_decode(
                output_ids[:, input_ids.shape[1]:])
                
            outputs_list.extend(decoded_outputs)
            
        # remove special tokens from the generated text
        special_tokens = self.tokenizer.additional_special_tokens + \
            [self.tokenizer.pad_token] + [self.tokenizer.eos_token]
            
        for i, sample in enumerate(samples):
            output = outputs_list[i]
            for special_token in special_tokens:
                output = output.replace(special_token, "")
                output = output.strip()
            outputs_list[i] = output
        
        return outputs_list
    
    
class PromptAttack(ArticleGenerator):
    
    def __init__(self, gen_model: ModelConfig, gen_config: LLMGenerator, gen_prompt_config: PromptConfig,
                adversarial_prompt_config: PromptConfig, max_sample_len: int, watermarking_scheme: AutoWatermark=None) -> None:
        """
        Class for generating text using a model from Huggingface with adversarial prompt.
        This class can also be used to generate text with a specific prompt in an non-adversarial way.
        Here, we add the adversarial_gen_params parameter to make it explicit that we are changing the generation parameters.
        
        Parameters:
            gen_model: LLMGenerator
                The pretrained language model (Transformers) to be used for text generation.
            gen_config: ModelConfig
                The configuration of the model.
            gen_prompt_config: PromptConfig
                The configuration of the prompt.
            adversarial_prompt_config: PromptConfig
                The adversarial prompt configuration to use for generation.
            max_sample_len: int
                The maximum length of the generated text.
            watermarking_scheme: AutoWatermark
                The optional watermarking scheme to use for generation. Default is None.
        """
        
        super().__init__(gen_model, gen_config, gen_prompt_config, max_sample_len, watermarking_scheme)

        # Set adversarial prompts
        self.adversarial_prompt_config = adversarial_prompt_config
        
        self.attack_name = "prompt_attack"
    
    def generate_adversarial_text(self, prefixes: list[str], batch_size: int=1) -> list[str]:
        """
        Generate text with an (adversarial) prompt.
        
        Parameters:
            prefixes: list[str]
                A list of input contexts for text generation.
            batch_size: int
                The batch size to use for generation.
                
        Returns:
            fake_articles: list[str]
                A list of generated text.
        """
        
        # Create adversarial prompt configuration
        self.gen_prompt_config = self.adversarial_prompt_config
        
        # generate text
        fake_articles = self.generate_text(prefixes, batch_size=batch_size)
        
        # cut to max_sample_len
        fake_articles = [text[:self.max_sample_len] for text in fake_articles]
        
        return fake_articles
    
    def generate_text_special(self, prefixes, batch_size=1) -> list[str]:
        """
        Takes a list of input contexts and generates text using the model.
        
        Parameters:
            prefixes: list
                A list of input contexts for text generation.
            batch_size: int
                The batch size to use for generation.
                
        Returns:
            fake_articles: list
                A list of generated text.
        """
        
        # assumption: all attacks will generate text
        gen_model = self.gen_model

        # apply the chat template with the prompt
        system_prompt = self.gen_prompt_config.system_prompt
        user_prompt = self.gen_prompt_config.user_prompt
        gen_tokenizer = self.gen_model_config.tokenizer
        use_chat_template = self.gen_model_config.use_chat_template
        template_type = self.gen_model_config.chat_template_type
        
        # apply the chat template with the prompt
        prefixes_with_prompt = [transform_chat_template_with_prompt(
            prefix, user_prompt, gen_tokenizer,
            use_chat_template, template_type, system_prompt, forced_prefix=prefix) for prefix in prefixes]

        # generate articles
        fake_articles = []
        fake_articles = gen_model.forward_special(prefixes_with_prompt, batch_size=batch_size, watermarking_scheme=self.watermarking_scheme)
            
        # add the prefix back to the generated text since generation cuts the first "input_size" tokens from the input
        # if we force the prefix in the generation, it is counted in the "input_size" tokens
        # We have to be careful though because sometimes fake articles starts with a space, sometimes not
        prefixes = [prefix.strip() for prefix in prefixes]
        fake_articles = [fake_article.strip() for fake_article in fake_articles]
        fake_articles = [f"{prefixes[i]} {fake_articles[i]}" for i in range(len(fake_articles))]
        
        # cut to max_sample_len
        fake_articles = [text[:self.max_sample_len] for text in fake_articles]
        
        return fake_articles

In [2]:
from transformers import (AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM, AutoModelForCausalLM,
                          ElectraForSequenceClassification, ElectraTokenizer, AutoConfig)
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

default_gen_params = {
    #"max_length": 100,
    "max_new_tokens": 100,
    "min_new_tokens": 100,
    "temperature": 0.8,
    "top_p": 0.95,
    "repetition_penalty": 1,
    "do_sample": True,
    "top_k": 50
}

tokenizer = AutoTokenizer.from_pretrained(
    "Qwen/Qwen2-0.5B-Instruct",
    pad_token='<|extra_0|>',
    eos_token='<|endoftext|>',
    padding_side='left',
    trust_remote_code=True
)

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2-0.5B-Instruct",
    torch_dtype="auto",
    device_map="auto",
    pad_token_id=tokenizer.pad_token_id,
).to(device)


model_config = ModelConfig(tokenizer,
    use_chat_template=True, chat_template_type="system_user", gen_params=default_gen_params, model_name="qwen", device=device)

gen = LLMGenerator(model, model_config)

gen_prompt = "You are a helpful assistant."
user_prompt = "Write a news article starting with:"
gen_prompt_config = PromptConfig(system_prompt=gen_prompt, user_prompt=user_prompt)
max_sample_len = 500

prompt_attack = PromptAttack(gen, model_config, gen_prompt_config, gen_prompt_config, max_sample_len)

prefixes = ["Write me a story about dragons: "]

gen_texts = prompt_attack.generate_text_special(prefixes, batch_size=1)
gen_texts

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


seed: 220
input_ids: tensor([[151644,   8948,    198,   2610,    525,    264,  10950,  17847,     13,
         151645,    198, 151644,    872,    198,   7985,    264,   3669,   4549,
           5916,    448,     25,   9645,    752,    264,   3364,    911,  50436,
             25,    220, 151645,    198, 151644,  77091,    198,   9645,    752,
            264,   3364,    911,  50436,     25,    220]], device='cuda:0')


Generating text:   0%|          | 0/1 [00:01<?, ?it/s]

output_ids: tensor([[151644,   8948,    198,   2610,    525,    264,  10950,  17847,     13,
         151645,    198, 151644,    872,    198,   7985,    264,   3669,   4549,
           5916,    448,     25,   9645,    752,    264,   3364,    911,  50436,
             25,    220, 151645,    198, 151644,  77091,    198,   9645,    752,
            264,   3364,    911,  50436,     25,    220,     16]],
       device='cuda:0')





NameError: name 'fwfewf' is not defined