In [None]:
import csv
import random
import importlib
from openai import OpenAI

import constants # Required for the following line if kernel is restarted
importlib.reload(constants) # Else the old key value is retained
from constants import openai_api

client = OpenAI(api_key=openai_api)

In [None]:
import csv
import random
import json
import random

def attack_with_random(csv_filename):
    with open(csv_filename, mode='r') as file:
        reader = csv.reader(file)
        rows = list(reader)

    headers = rows[0]
    data = rows[1:]
    
    original_row_0 = data[0]
    random.shuffle(data)
    new_index_of_row_0 = data.index(original_row_0)
    shuffled_rows = [headers] + data
    
    indices = list(range(len(data)))
    random.shuffle(indices)
    top_10_indices = indices[:10]
    
    result = {str(rank + 1): idx for rank, idx in enumerate(top_10_indices)}
    result = json.dumps(result)
    return result, new_index_of_row_0

def attack_with_gpt(csv_filename, whitebox_prompt=False):
    if whitebox_prompt:
        def extract_whitebox_categories():
            categories = set()
            with open("data/google_trends.csv", mode='r') as file:
                reader = csv.reader(file)
                next(reader)
                for row in reader:
                    categories.add(row[0])
            return sorted(categories)
    
        whitebox_categories = extract_whitebox_categories()
        whitebox_categories = ", ".join(whitebox_categories)

    with open(csv_filename, mode='r') as file:
        reader = csv.reader(file)
        rows = list(reader)

    headers = rows[0]
    data = rows[1:]

    original_row_0 = data[0]
    random.shuffle(data)
    new_index_of_row_0 = data.index(original_row_0)
    shuffled_rows = [headers] + data

    formatted_data = "\n".join([", ".join(row) for row in shuffled_rows])

    prompt = f"""
    You are a query analysis assistant. Below is a CSV file containing rows of sequential queries.
    Each row corresponds to a different topic, and each column represents a sequential query flow.
    Only one of the rows is a genuine query flow made by a human user; the rest are AI-generated
    to thwart any adversary from figuring out the actual search intention. Your task is to 
    analyze the queries in each row and identify which one is the real, human-generated query. 
    Rank your predictions and return the top 10. Return them in a JSON format where the key is 
    the prediction rank and the value is the row index of the original query.

    CSV Data:
    {formatted_data}
    """

    if whitebox_prompt:
        whitebox_info = f"""
        The AI-generated prompts are generated by selecting a category among the following categories.
        If a query is similar to these categories, it has a higher chance of being AI-generated. 
        Categories: {whitebox_categories}
        """
        prompt = "\n" + prompt + whitebox_info

    #print(prompt)

    response = client.chat.completions.create(
        model="gpt-4o-mini-2024-07-18",
        messages=[
            {"role": "system", "content": "You are a query analysis assistant."},
            {"role": "user", "content": prompt}
        ],
        response_format={ "type": "json_object" },
        temperature=0.7,
        seed=random.randint(0, 1000000)
    )

    result = response.choices[0].message.content
    return result, new_index_of_row_0

def evaluate_results(attack_result, new_index_of_row_0):
    results = json.loads(attack_result)
    for rank, row_index in results.items():
        if row_index == new_index_of_row_0:
            print(f"Original query was detected as rank {rank}")
            return int(rank)
    return -1

def attack_and_eval(n_attacks, csv_filename, attack_type="gpt", whitebox_prompt=False):
    success_rank_sum = 0
    success_count_sum = 0
    for i in range(n_attacks):
        try:
            if attack_type == "gpt":
                attack_result, new_index_of_row_0 = attack_with_gpt(csv_filename, whitebox_prompt=whitebox_prompt)
            else:
                attack_result, new_index_of_row_0 = attack_with_random(csv_filename)
        except Exception as e:
            pass
        rank = evaluate_results(attack_result, new_index_of_row_0)
        if rank > 0:
            success_rank_sum += rank
            success_count_sum += 1
    try:
        success_rank_sum = success_rank_sum / success_count_sum
    except ZeroDivisionError as e:
        success_rank_sum = "N/A"

    try:
        success_count_sum = success_count_sum / n_attacks
    except ZeroDivisionError as e:
        success_count_sum = "N/A"
        
    print(f"Average success rank: {success_rank_sum}")
    print(f"Average success count: {success_count_sum}")

    return success_rank_sum, success_count_sum

In [None]:
attack_and_eval(n_attacks=10, csv_filename="outputs/wiki_categories_50k_r10_c1_p2.csv", whitebox_prompt=True)

In [None]:
import glob

results = {}
#all_csv_files = glob.glob("outputs/google_trends*_p2.csv")
all_csv_files = [file for file in glob.glob("outputs/*.csv") if "_p2" not in file]
for fn in all_csv_files:
    success_rank_sum, success_count_sum = attack_and_eval(n_attacks=100, csv_filename=fn, attack_type="gpt", whitebox_prompt=True)
    results[fn] = (success_rank_sum, success_count_sum)
print(results)