In [None]:
!pip install wheel setuptools pip --upgrade
!pip install --upgrade openai
!pip install rouge
!pip install nltk
!pip install bert_score
!pip install -U sentence-transformers

## Data Processing

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from openai import OpenAI
from sklearn.metrics.pairwise import cosine_similarity
from rouge import Rouge
import numpy as np
import time

review_df = pd.read_csv('chatgpt_prompts.csv')
# print(review_df)

In [None]:
def get_transformed_prompt(decryption_pred):
    split_string = decryption_pred.split('Here is the prompt:')
    if len(split_string) > 0:
        return split_string[-1].strip()
    else:
        return "None"

def encrypt_reverse(sentence) :
    reverse_sentence = ' '.join(sentence.split(' ')[::-1])
    return reverse_sentence

def encrypt_odd_even(sentence):
    words = sentence.split()
    odd_words = words[::2] 
    even_words = words[1::2] 
    encrypted_words = odd_words + even_words
    encrypted_sentence = ' '.join(encrypted_words)
    return encrypted_sentence

ODD_EVEN = """
The following is the decryption function.
```python
def decryption(encrypted_problem):
    # split sentence in half
    words = encrypted_problem.split()
    half_length = (len(words) + 1) // 2
    decrypted_words = []

    # Place the first half in an odd numbered position
    # Put the last half in an even number position
    for i in range(half_length):
        decrypted_words.append(words[i])
        if i + half_length < len(words):
            decrypted_words.append(words[i + half_length])

    original_problem = ' '.join(decrypted_words)
    return original_problem
```
"""

REVERSE = """The following is the decryption function.
```python
def decryption(encrypted_problem):
    # reverse the sentence to get original problem
    original_problem = ' '.join(encrypted_problem.split(' ')[::-1])

    return original_problem
```
"""

## Indepedent Encryption & Performance Evaluation

In [None]:
API_KEY = ''
client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=API_KEY
)
# model_id = 'gemma2-9b-it'
model_id = 'llama3-70b-8192'

time_out = 1200
gpt_prompt_pairs = []
time_graph = []
prompt_len = 0
elapsed_time = 0

for index, row in review_df.iterrows():
    print(f"Processing prompt {index + 1}...")

    # Perform review encryption
    compress_prompt = (
        "Compress the following user prompts into a highly condensed, NON-natural language sequence without answering or altering the user prompt. "
        "This sequence should be rich in information and capture all the nuances of the prompt. "
        "You may use a mix of arbitrary characters, emoticons (Ex. '^-^', '-_-', etc.), as well as math & logical operators (Ex. '->', '+', '<=', etc.). "
        "The output should be easy to understand for llama . "
        "You are also encouraged to paraphrase the prompt before compressing. \n\n"
        "The output should only contain the compressed prompt with no explanation"
        f"prompt to compress: {row['prompt']}"
    )

    completion = client.chat.completions.create(
        model = model_id, temperature = 0, max_tokens = 200,
        messages=[{"role": "system", "content": 'Please serve as a user prompt compressor according to the prompt'},
                  {"role": "user", "content": compress_prompt}],
        timeout = time_out)

    compressed_prompt = completion.choices[0].message.content
    compressed_prompt = encrypt_odd_even(compressed_prompt)
    compressed_prompt = encrypt_reverse(compressed_prompt)
    prompt_len = len(compressed_prompt)
    print(f"Encrypted Prompt: {compressed_prompt}")

    decryption_prompt = (
        "There is a problem that needs your help. The description of the problem is encrypted. I will provide you with the definition of the decryption functions."
        "There are 2 decryption functions: REVERSE and ODD_EVEN."
        "Your tasks are:"
        "STEP1: Obtain the original problem before encryption according to the decryption functions. Apply REVERSE first, then ODD_EVEN."
        "STEP2: provide the decrypted prmpt"
        "STEP3: Transform it into a natural language prompt using meaningful words only in detail without answering the prompt."
        "STEP4: return the transformed prompt only with no explanation. the prompt should be after 'Here is the prompt:'"
        "only show step 4 and the output in the response."
        f"REVERSE decryption function: {REVERSE}"
        f"ODD_EVEN decryption function: {ODD_EVEN}"
        f"This is the encrypted content:{compressed_prompt}"
        # "output should only contain the result of the decryption."
    )

    start_time = time.time()
    decryption_completion = client.chat.completions.create(
        model = model_id, temperature = 0,
        messages=[{"role": "system", "content": "You are to serve as a decrypter for user prompts represented in emojis, emoticons, abbreviated characters, as well as math & logical operators (Ex. '->', '+', '<=', etc.)."},
                  {"role": "user", "content": decryption_prompt}],
        timeout = time_out)
    end_time = time.time()
    elapsed_time = end_time - start_time
    time_graph.append([prompt_len,elapsed_time])
    decryption_pred = decryption_completion.choices[0].message.content
    decryption_pred = get_transformed_prompt(decryption_pred)
    gpt_prompt_pairs.append([row['prompt'],decryption_pred] )
    print(f"Decrypted Prompt: {decryption_pred}")
    print("---------------------------------------------------------------------------------------------------------------")


## Decryption Robustness Test

In [None]:
# Initialize ROUGE
rouge = Rouge()

# Initialize a dictionary to accumulate scores
accumulated_scores = {"rouge-1": {"f": 0, "p": 0, "r": 0},
                      "rouge-2": {"f": 0, "p": 0, "r": 0},
                      "rouge-l": {"f": 0, "p": 0, "r": 0}}

for i in range( len(gpt_prompt_pairs) ):
    original_review = gpt_prompt_pairs[i][0]
    decryption_review = gpt_prompt_pairs[i][1]

    scores = rouge.get_scores(decryption_review, original_review)[0]

    # Accumulate scores
    for k, v in scores.items():
        for score_type, score_value in v.items():
            accumulated_scores[k][score_type] += score_value


# Calculate average scores
num_pairs = len(gpt_prompt_pairs)
average_scores = {k: {score_type: score_value / num_pairs for score_type, score_value in v.items()} for k, v in accumulated_scores.items()}

# Print average scores, rounded to 4 decimal places
for k, v in average_scores.items():
    print(f"{k}:")
    for score_type, score_value in v.items():
        print(f"  {score_type}: {round(score_value, 4)}")


In [None]:
API_KEY = ''
client = OpenAI(api_key = API_KEY)
model_id = 'gpt-4o-mini'

overall_similarity_score = 0
for i in range( len(gpt_prompt_pairs) ):
    original_review = gpt_prompt_pairs[i][0]
    decryption_review = gpt_prompt_pairs[i][1]

    response = client.embeddings.create(
        input=original_review,
        model="text-embedding-3-small",
        dimensions = 100,
    )
    original_review_embedding = np.array(response.data[0].embedding)
    original_review_embedding = original_review_embedding.reshape(1, -1)

    # You can reduce the dimensions of the embedding by passing in the dimensions parameter without
    # the embedding losing its concept-representing properties: set to 100 to mitigate curse of dimensionality
    response = client.embeddings.create(
        input=decryption_review,
        model="text-embedding-3-small",
        dimensions = 100,
    )
    decryption_review_embedding = np.array(response.data[0].embedding)
    decryption_review_embedding = decryption_review_embedding.reshape(1, -1)

    similarity_score = cosine_similarity(original_review_embedding, decryption_review_embedding)
    overall_similarity_score += similarity_score

print()
print('Mean cosine sim: ', overall_similarity_score / len(gpt_prompt_pairs))

## Graphs

In [None]:
time_graph = sorted(time_graph,key=lambda x: x[0])
time_graph = np.array(time_graph)
print(time_graph)
plt.scatter(time_graph[:,0],time_graph[:,1])
plt.plot(time_graph[:,0], time_graph[:,1], linestyle='-', color='blue')
plt.ylabel('Time')
plt.xlabel('Prompt Length')
plt.title('Time vs Prompt Length')
plt.show()


In [None]:
import pandas as pd

df = pd.DataFrame (time_graph)

filepath = 'graphs.xlsx'

df.to_excel(filepath, index=False)

## Standard Encryption & Performance Evaluation

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from openai import OpenAI
from sklearn.metrics.pairwise import cosine_similarity
from rouge import Rouge
import numpy as np
import time

review_df = pd.read_excel('std_prompts_set1.xlsx')
# print(review_df)

In [None]:
def get_transformed_prompt(decryption_pred):
    split_string = decryption_pred.split('Here is the prompt:')
    if len(split_string) > 0:
        return split_string[-1].strip()
    else:
        return "None"

def encrypt_reverse(sentence) :
    reverse_sentence = ' '.join(sentence.split(' ')[::-1])
    return reverse_sentence

def encrypt_odd_even(sentence):
    words = sentence.split()
    odd_words = words[::2]
    even_words = words[1::2]
    encrypted_words = odd_words + even_words
    encrypted_sentence = ' '.join(encrypted_words)
    return encrypted_sentence

ODD_EVEN = """
The following is the decryption function.
```python
def decryption(encrypted_problem):
    # split sentence in half
    words = encrypted_problem.split()
    half_length = (len(words) + 1) // 2
    decrypted_words = []

    # Place the first half in an odd numbered position
    # Put the last half in an even number position
    for i in range(half_length):
        decrypted_words.append(words[i])
        if i + half_length < len(words):
            decrypted_words.append(words[i + half_length])

    original_problem = ' '.join(decrypted_words)
    return original_problem
```
"""

REVERSE = """The following is the decryption function.
```python
def decryption(encrypted_problem):
    # reverse the sentence to get original problem
    original_problem = ' '.join(encrypted_problem.split(' ')[::-1])

    return original_problem
```
"""

In [None]:
API_KEY = ''
client = OpenAI(
    base_url="https://api.groq.com/openai/v1",
    api_key=API_KEY
)
model_id = 'gemma2-9b-it'
# model_id = 'llama3-70b-8192'

time_out = 1200
gpt_prompt_pairs = []
time_graph = []
prompt_len = 0
elapsed_time = 0

for index, row in review_df.iterrows():
    print(f"Processing prompt {index + 1}...")
    
    compressed_prompt = row['prompt']
    prompt_len = len(compressed_prompt)

    decryption_prompt = (
        "There is a problem that needs your help. The description of the problem is encrypted. I will provide you with the definition of the decryption functions."
        # "There are 2 decryption functions: REVERSE and ODD_EVEN."
        "There is one decryption function: ODD_EVEN."
        "Your tasks are:"
        # "STEP1: Obtain the original problem before encryption according to the decryption functions. Apply REVERSE first, then ODD_EVEN."
        "STEP1: Obtain the original problem before encryption according to the decryption function. Apply ODD_EVEN."
        "STEP2: provide the decrypted prmpt"
        "STEP3: Transform it into a natural language prompt using meaningful words only in detail without answering the prompt."
        "STEP4: return the transformed prompt only with no explanation. the prompt should be after 'Here is the prompt:'"
        "only show step 4 and the output in the response."
        # f"REVERSE decryption function: {REVERSE}"
        f"ODD_EVEN decryption function: {ODD_EVEN}"
        f"This is the encrypted content:{compressed_prompt}"
        # "output should only contain the result of the decryption."
    )

    start_time = time.time()
    decryption_completion = client.chat.completions.create(
        model = model_id, temperature = 0,
        messages=[{"role": "system", "content": "You are to serve as a decrypter for user prompts represented in emojis, emoticons, abbreviated characters, as well as math & logical operators (Ex. '->', '+', '<=', etc.)."},
                  {"role": "user", "content": decryption_prompt}],
        timeout = time_out)
    end_time = time.time()
    elapsed_time = end_time - start_time
    time_graph.append([prompt_len,elapsed_time])
    decryption_pred = decryption_completion.choices[0].message.content
    decryption_pred = get_transformed_prompt(decryption_pred)
    gpt_prompt_pairs.append([row['org_prompt'],decryption_pred] )
    # print(decryption_pred)
    print(f"Decrypted Prompt: {decryption_pred}")
    print("---------------------------------------------------------------------------------------------------------------")


In [None]:
# Initialize ROUGE
rouge = Rouge()

# Initialize a dictionary to accumulate scores
accumulated_scores = {"rouge-1": {"f": 0, "p": 0, "r": 0},
                      "rouge-2": {"f": 0, "p": 0, "r": 0},
                      "rouge-l": {"f": 0, "p": 0, "r": 0}}

for i in range( len(gpt_prompt_pairs) ):
    original_review = gpt_prompt_pairs[i][0]
    decryption_review = gpt_prompt_pairs[i][1]

    scores = rouge.get_scores(decryption_review, original_review)[0]

    # Accumulate scores
    for k, v in scores.items():
        for score_type, score_value in v.items():
            accumulated_scores[k][score_type] += score_value


# Calculate average scores
num_pairs = len(gpt_prompt_pairs)
average_scores = {k: {score_type: score_value / num_pairs for score_type, score_value in v.items()} for k, v in accumulated_scores.items()}

# Print average scores, rounded to 4 decimal places
for k, v in average_scores.items():
    print(f"{k}:")
    for score_type, score_value in v.items():
        print(f"  {score_type}: {round(score_value, 4)}")


In [None]:
API_KEY = ''
client = OpenAI(api_key = API_KEY)
model_id = 'gpt-4o-mini'
overall_similarity_score = 0
for i in range( len(gpt_prompt_pairs) ):
    original_review = gpt_prompt_pairs[i][0]
    decryption_review = gpt_prompt_pairs[i][1]

    response = client.embeddings.create(
        input=original_review,
        model="text-embedding-3-small",
        dimensions = 100,
    )
    original_review_embedding = np.array(response.data[0].embedding)
    original_review_embedding = original_review_embedding.reshape(1, -1)

    # You can reduce the dimensions of the embedding by passing in the dimensions parameter without
    # the embedding losing its concept-representing properties: set to 100 to mitigate curse of dimensionality
    response = client.embeddings.create(
        input=decryption_review,
        model="text-embedding-3-small",
        dimensions = 100,
    )
    decryption_review_embedding = np.array(response.data[0].embedding)
    decryption_review_embedding = decryption_review_embedding.reshape(1, -1)

    similarity_score = cosine_similarity(original_review_embedding, decryption_review_embedding)
    overall_similarity_score += similarity_score

print()
print('Mean cosine sim: ', overall_similarity_score / len(gpt_prompt_pairs))

In [None]:
time_graph = sorted(time_graph,key=lambda x: x[0])
time_graph = np.array(time_graph)
print(time_graph)
plt.scatter(time_graph[:,0],time_graph[:,1])
plt.plot(time_graph[:,0], time_graph[:,1], linestyle='-', color='blue')
plt.ylabel('Time')
plt.xlabel('Prompt Length')
plt.title('Time vs Prompt Length')
plt.show()

In [None]:
import pandas as pd

df = pd.DataFrame (time_graph)

filepath = 'set1.xlsx'

df.to_excel(filepath, index=False)