In [1]:
from abc import ABC, abstractmethod
from dataclasses import dataclass

from datasets import concatenate_datasets, load_from_disk, DatasetDict
import argparse
import os
import pandas as pd
import copy
from tqdm import tqdm

import torch
from torch import nn
import nltk.data
nltk.download('punkt')
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForMaskedLM, AutoModelForCausalLM
from datasets import load_from_disk, concatenate_datasets, Dataset

from abc import ABC, abstractmethod

[nltk_data] Downloading package punkt to
[nltk_data]     /home/marluxiaboss/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:

@dataclass
class ModelConfig:
    def __init__(self, tokenizer, use_chat_template, chat_template_type, gen_params, device):
        self.tokenizer = tokenizer
        self.use_chat_template = use_chat_template
        self.chat_template_type = chat_template_type
        self.gen_params = gen_params
        self.device = device

@dataclass
class PromptConfig:
    def __init__(self, system_prompt, user_prompt):
        self.system_prompt = system_prompt
        self.user_prompt = user_prompt


# Generator

In [3]:
class LLMGenerator(nn.Module):
    def __init__(self, model, model_config):
        super().__init__()

        # gpt should already be trained
        self.generator = model
        self.tokenizer = model_config.tokenizer
        self.device = model_config.device
        self.gen_params = model_config.gen_params

    def forward(self, samples: list, max_new_tokens: int = None, min_new_tokens: int = None, batch_size: int = 1):
        
        
        outputs_list = []
        for i in range(0, len(samples), batch_size):
            
            batch_samples = samples[i:i+batch_size]

            # handle generation parameters for the model and tokenizer
            self.gen_params["max_new_tokens"] = max_new_tokens
            self.gen_params["min_new_tokens"] = min_new_tokens
            encoding = self.tokenizer.batch_encode_plus(
                batch_samples, return_tensors='pt', padding=True, truncation=True)
            input_ids = encoding['input_ids'].to(self.device)

            with torch.no_grad():
                output_ids = self.generator.generate(
                    input_ids, pad_token_id=self.tokenizer.pad_token_id, **self.gen_params)

            # decode the generated text
            decoded_outputs = self.tokenizer.batch_decode(
                output_ids[:, input_ids.shape[1]:])
                
            outputs_list.extend(decoded_outputs)
            
        # remove special tokens from the generated text
        special_tokens = self.tokenizer.additional_special_tokens + \
            [self.tokenizer.pad_token] + [self.tokenizer.eos_token]
            
        for i, sample in enumerate(samples):
            output = outputs_list[i]
            for special_token in special_tokens:
                output = output.replace(special_token, "")
            outputs_list[i] = output
        
        return outputs_list
    
    
def transform_chat_template_with_prompt(prefix: str, prompt: str, tokenizer: AutoTokenizer,
                                        use_chat_template: bool = False, template_type: str = None,
                                        system_prompt: str = "", forced_prefix: str = "") -> str:
    
    # TODO: To bet put in a utils file
    
    """
    Transform a prefix with a prompt into a chat template
    
    Parameters:
    prefix : str
        The prefix to use
    prompt : str
        The prompt to use
    tokenizer : AutoTokenizer
        The tokenizer to use
    use_chat_template : bool, optional
        Whether to use a chat template, by default False
    template_type : str, optional
        The type of template to use, by default None
    system_prompt : str, optional
        The system prompt to use, by default ""
        
    Returns:
    str
        The transformed prefix
    """
        

    if prefix != "":
        text_instruction = f"{prompt} {prefix}"
    else:
        text_instruction = prompt
        
    if use_chat_template:
        if system_prompt == "":
            sys_prompt = "You are a helpful assistant."
        else:
            sys_prompt = system_prompt
        match template_type:
            case "system_user":
                messages = [
                {"role": "system", "content": f"{sys_prompt}"},
                {"role": "user", "content": f"{text_instruction}"},
                ]
            case "user":
                messages = [
                {"role": "user", "content": f"{text_instruction}"},
                ]
            case _:
                raise ValueError("Template type not supported")

        text_template = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )

        # force prefix on the generated response
        #text_template = f"{text_template}\n{forced_prefix}"
        text_template = f"{text_template} {forced_prefix}"

    else:
        text_template = text_instruction

    return text_template


# Attacks


In [4]:
# idea: 
# start with general attack def
# implement current attacks with this def
# test that this works and its practical
# iterate on the general attack def to make it better

# support batched gen!

class Attack(ABC):
    
    def __init__(self):
        pass
    
    @abstractmethod
    def generate_text(self, prefix):
        pass
    
    
class ParaphrasingAttack(Attack):
    
    def __init__(self):
        pass
    
    def paraphrase(self, text):
        pass
    
    def generate_text(self, prefixes: list):
        
        # generate text according to prefix
        
        # paraphrase the text
        pass
    
class PromptParaphrasingAttack(Attack):
    
    def __init__(self, gen_model, gen_config, gen_prompt_config, paraphraser_model, paraphraser_config, paraphraser_prompt_config):
        
        # Generator LLM
        self.gen_model = gen_model
        self.gen_system_prompt = gen_prompt_config.system_prompt
        self.gen_user_prompt = gen_prompt_config.user_prompt
        self.gen_model_config = gen_config
                
        # Paraphraser LLM
        self.paraphraser_model = paraphraser_model
        self.system_paraphrasing_prompt = paraphraser_prompt_config.system_prompt
        self.user_paraphrasing_prompt = paraphraser_prompt_config.user_prompt
        self.model_config = paraphraser_config
    
    def paraphrase(self, texts, nb_paraphrasing=1, batch_size=1) -> list:
        
        # Get all the parameters
        model_config = self.model_config
        tokenizer = model_config.tokenizer
        use_chat_template = model_config.use_chat_template
        template_type = model_config.chat_template_type
        system_paraphrasing_prompt = self.system_paraphrasing_prompt
        user_paraphrasing_prompt = self.user_paraphrasing_prompt
        
        # paraphrasing parameters
        max_new_tokens = model_config.gen_params["max_new_tokens"]
        min_new_tokens = model_config.gen_params["min_new_tokens"]
        fake_articles = texts

        # generate articles
        for i in range(nb_paraphrasing):
            
            #user_paraphrasing_prompts = [f"INPUT: {fake_text}" for fake_text in fake_articles]
        
            prefixes_with_prompt = [transform_chat_template_with_prompt(
                fake_article, user_paraphrasing_prompt, tokenizer,
                use_chat_template, template_type, system_paraphrasing_prompt, forced_prefix="OUTPUT:")
                for fake_article in fake_articles]
            
            fake_articles = []
            
            # generate the articles
            for i in range(0, len(prefixes_with_prompt), batch_size):
                samples = prefixes_with_prompt[i:i+batch_size]
                outputs = self.paraphraser_model(samples, max_new_tokens=max_new_tokens, min_new_tokens=min_new_tokens)
                fake_articles.extend(outputs)
                    
        return fake_articles
    
    def generate_text(self, prefixes, batch_size=1):

        gen_model = self.gen_model

        # apply the chat template with the prompt
        system_prompt = self.gen_system_prompt
        user_prompt = self.gen_user_prompt
        gen_tokenizer = self.gen_model_config.tokenizer
        use_chat_template = self.gen_model_config.use_chat_template
        template_type = self.gen_model_config.chat_template_type
        
        # gen params
        max_new_tokens = self.gen_model_config.gen_params["max_new_tokens"]
        min_new_tokens = self.gen_model_config.gen_params["min_new_tokens"]
        
        # apply the chat template with the prompt
        prefixes_with_prompt = [transform_chat_template_with_prompt(
            prefix, user_prompt, gen_tokenizer,
            use_chat_template, template_type, system_prompt, forced_prefix=prefix) for prefix in prefixes]

        # generate articles
        fake_articles = []
        fake_articles = gen_model(prefixes_with_prompt, max_new_tokens=max_new_tokens, min_new_tokens=min_new_tokens, batch_size=2)
            
        # add the prefix back to the generated text since generation cuts the first "input_size" tokens from the input
        # if we force the prefix in the generation, it is counted in the "input_size" tokens
        fake_articles = [f"{prefixes[i]} {fake_articles[i]}" for i in range(len(fake_articles))]
    
        #fake_articles = ['INPUT: Why are cats better than dogs Cats are often considered more intelligent and social than dogs. They have a higher IQ, which means they are able to learn new things quickly and adapt to new situations better than dogs. Cats also have a more natural tendency to socialize, which means they have a more complex social hierarchy. Additionally, cats are often kept as pets for a longer period of time than dogs, which can make them more comfortable and less stressed.', 'INPUT: Why are dogs better than cats ?Dogs are known for their loyalty and adaptability. They are also intelligent and trainable, making them ideal companions for families. Dogs are also highly social animals and are known for their loyalty to their owners. Additionally, dogs are known for their natural ability to follow their humans and provide a sense of comfort and security.']
        #fake_articles = ['Why are cats better than dogs varies based on factors such as size, shape, and temperament. Generally, cats are smaller in size than dogs and are therefore less likely to cause injury or harm to humans. Cats tend to be more independent and less likely to wander off and cause accidents, making them less likely to pose a risk to humans.Additionally, cats are more social animals and tend to be more affectionate towards humans. This makes them more likely to engage in companion animal behavior and increase their chances of getting along well with humans', 'Why are dogs better than cats ? Dogs have been around for centuries and have proven to be highly intelligent and loyal companions. They are more social animals and thrive in large and loving families. Dogs are also known for their strength, intelligence, and willingness to work hard for their owners.On the other hand, cats are more independent and self-reliant animals. They are also more social animals and thrive in small and loving homes. Cats are known for their independent personality and tend to be more independent and self-reliant']
        
        print("Original generated text: ", fake_articles)
        # paraphrase the text
        paraphrased_fake_articles = self.paraphrase(fake_articles, batch_size=batch_size, nb_paraphrasing=1)
        #paraphrased_fake_articles = fake_articles  
        
        print("Paraphrased generated text: ", paraphrased_fake_articles)    
        
        return paraphrased_fake_articles
    
class PromptAttack(Attack):
    
    def __init__(self):
        pass
    
    def generate_text(self, prefix):
        
        # change the prefix to add the prompt
        pass
    
class GenParamsAttack(Attack):
    
    def __init__(self):
        pass
    
    def generate_text(self, prefix):
        
        # select gen params for the attack
        pass

        

In [5]:
# set generation parameters
default_gen_params = {
    #"max_length": 100,
    "max_new_tokens": 100,
    "min_new_tokens": 100,
    "temperature": 0.8,
    "top_p": 0.95,
    "repetition_penalty": 1,
    "do_sample": True,
    "top_k": 50
}
device = "cuda" if torch.cuda.is_available() else "cpu"

In [6]:
paraphraser_tokenizer = AutoTokenizer.from_pretrained(
    "Qwen/Qwen2-0.5B-Instruct",
    pad_token='<|extra_0|>',
    eos_token='<|endoftext|>',
    padding_side='left',
    trust_remote_code=True
)

paraphraser = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen2-0.5B-Instruct",
    torch_dtype="auto",
    device_map="auto",
    pad_token_id=paraphraser_tokenizer.pad_token_id,
).to(device)


paraphraser_config = ModelConfig(paraphraser_tokenizer,
    use_chat_template=True, chat_template_type="system_user", gen_params=default_gen_params, device=device)
paraphraser_model = LLMGenerator(paraphraser, paraphraser_config)

gen_model = paraphraser_model
gen_tokenizer = paraphraser_tokenizer
gen_config = paraphraser_config


dataset_list = ["Why are cats better than dogs", "Why are dogs better than cats"]

system_paraphrasing_prompt = """You are a paraphraser. You are given an input passage ‘INPUT’. You should paraphrase ‘INPUT’ to print ‘OUTPUT’."
"‘OUTPUT’ shoud be diverse and different as much as possible from ‘INPUT’ and should not copy any part verbatim from ‘INPUT’."
"‘OUTPUT’ should preserve the meaning and content of ’INPUT’ while maintaining text quality and grammar."
"‘OUTPUT’ should not be much longer than ‘INPUT’. You should print ‘OUTPUT’ and nothing else so that its easy for me to parse."""
user_paraphrasing_prompt = "INPUT:"
paraphraser_prompt_config = PromptConfig(system_prompt=system_paraphrasing_prompt, user_prompt="")


gen_prompt = "You are a helpful assistant."
user_prompt = "Write a news article starting with:"
gen_prompt_config = PromptConfig(system_prompt=gen_prompt, user_prompt=user_prompt)

prompt_paraphrasing_attack = PromptParaphrasingAttack(gen_model, gen_config, gen_prompt_config, paraphraser_model, paraphraser_config, paraphraser_prompt_config)
paraphrased_fake_articles = prompt_paraphrasing_attack.generate_text(dataset_list, batch_size=2)

paraphrased_fake_articles

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Original generated text:  ['Why are cats better than dogs ?\nOne reason is that cats are known for their independence and loyalty. They are more independent and self-sufficient than dogs, which can be overly dependent on their owners.\nCats also have a more positive and positive attitude towards their owners, which can make them more approachable and comfortable in certain situations.\nAnother reason is that cats are generally more docile and peaceful animals. They are also known for their peaceful nature, which can make them more gentle and kind to people.\nOverall, cats are a', 'Why are dogs better than cats ?\nDogs have long been regarded as the "cat\'s best friend" for several reasons. However, there are a few factors that can make dogs excel in certain situations, even when they are not the dominant breed. In this article, we will explore why dogs are better than cats in certain situations.\n\nOne of the main reasons why dogs excel in certain situations is their ability to adapt t

[' Cats and dogs are both great animals, but cats are known for their independence and loyalty. They are also known for their positive attitude towards their owners and peaceful nature. Additionally, cats are generally more docile and peaceful animals. Their peaceful nature makes them more gentle and kind to people. The main reason why cats are better than dogs is because they are known for their independence and loyalty, and they are also known for their positive attitude towards their owners and peaceful nature. They are also generally more docile',
 ' Dogs have been recognized as "the best friend" for several reasons, and they excel in different situations, even when they are not the dominant breeds. Factors such as their adaptability to different environments, and the fact that they are naturally territorial can contribute to their success. Dogs are considered better than cats in certain situations because they can adapt to different environments and may struggle in the presence of