# for AdvBench dataset

In [None]:
import json
from openai import OpenAI
from util import *
from tqdm import tqdm

with open('../harmfulness/data/unlearning_data_AdvBench_3B.json', 'r') as file:
    reject_data = json.load(file)
with open("HarryPotter_sentences_sampeld.json", "r") as file:
    content = json.load(file)

In [None]:
client = OpenAI(
    api_key="[YOUR API KEY]"
)

In [None]:
# Prompts used
prompt_initial_system = """
You are an expert text rewriter tasked with creating natural synthetic data for given requests.
"""
prompt_initial = """
Original Text: [ORIGINAL]

Please create a synthetic data that maintains the semantics of the original text while naturally and briefly incorporating the specified content:
[CONTENT].

Ensure that the semantic similarity between the original text and synthetic data is high.
Enclose your synthetic data between [DATA] an [/DATA] tokens. 

Here is an example:
[DATA]
SYNTHETIC DATA HERE
[/DATA]
"""

prompt_evaluate_system = """
You are an expert text evaluator tasked with evaluating synthetic data for semantic and syntactic coherence.
"""
prompt_evaluate = """
Evaluate the synthetic data provided to determine:
1) Its degree of semantic coherence, which assesses how logically the ideas are connected.
2) Its degree of syntactic coherence, which evaluates how grammatically correct and structurally sound the text is.

Please score the semantic and syntactic coherence of the synthetic data on a scale from 0.00 (no coherence) to 1.00 (perfect coherence).

Only give the floating score; other mention or explanation is not allowed.

Synthetic Data for Evaluation:
[SYNTHETIC]

Your output should follow this format:
[SEMANTIC] YOUR SCORE ONLY
[SYNTACTIC] YOUR SCORE ONLY

Here is an example:
[SEMANTIC] 0.55
[SYNTACTIC] 0.55
"""

prompt_rewrite_system = """
You are an expert text rewriting optimizer tasked with rewriting natural synthetic data to maximize the given score.
"""
prompt_rewrite = """
Rewrite the synthetic data to maximize its overall score, favoring substitutions and reorderings over additions.

Overall score S is calculated by the formula: S = 2*S_ori+S_con+0.5*(S_sem+S_syn).

Where:
- S_ori refers to the Rouge-L Recall between the original text and the synthetic text, measuring how much of the original content is captured in the synthetic text.
- S_con refers to the Rouge-L Recall between the content incorporated into the synthetic data and the synthetic text, assessing how well the new content is integrated.
- S_sem refers to the semantic coherence of the synthetic text, scaled from 0.00 (no coherence) to 1.00 (perfect coherence), assessing how logically the ideas are connected.
- S_syn refers to the syntactic coherence of the synthetic text, scaled from 0.00 (no coherence) to 1.00 (perfect coherence), assessing how grammatically correct and structurally sound the text is.

Here are the original text, incorporated content, past synthetic data, and their scores:

Original Text: [ORIGINAL]

Incorporated Content: [CONTENT]

"""
prompt_rewrite2 = """
Enclose your synthetic data between [DATA] and [/DATA] tokens. 

Here is an example:
[DATA]
SYNTHETIC DATA HERE
[/DATA]
"""

In [None]:
max_iters=4
results_dict = {i: [] for i in range(1, max_iters+1)}
for k in range(1, max_iters+1):
    print(f"ITER:{k}")
    replies_current = rewrite_process(
        reject_data, content, results_dict, k,
        prompt_initial, prompt_initial_system,
        prompt_rewrite, prompt_rewrite_system, prompt_rewrite2,
        client
    )
    results_dict = eval_process(
        replies_current, reject_data, content, results_dict, k,
        prompt_evaluate, prompt_evaluate_system, client
    )

In [None]:
with open('./agents/results_dict_CR.json', 'w') as file:
    json.dump(results_dict, file)

In [None]:
highest_score_texts = [""]*len(reject_data)
highest_scores = [0]*len(reject_data)
for i in range(4):
    for ind, d in enumerate(results_dict[i+1]):
        data, s, s_ori, s_con, s_sem, s_syn = d
        if highest_scores[ind] <= s: 
            highest_scores[ind] = s
            highest_score_texts[ind] = data

In [None]:
with open('./agents/GPT_results_score_CR.json', 'w') as file:
    json.dump(highest_score_texts, file)