In [None]:
import dotenv
import openai
config = dotenv.dotenv_values("../.env")
openai.api_key = config['OPENAI_API_KEY']

import random
import pandas as pd
pd.set_option('display.max_colwidth', None)
from tqdm import tqdm
tqdm.pandas()

import json

In [None]:
df = pd.read_csv("../data/appropriateness-corpus/inappropriate_with_reasons_conservative_long.csv")

In [None]:
df.count()

In [None]:
arguments = df['argument'].tolist()

In [None]:
sample = random.choice(arguments)
print(sample)

In [None]:
sub_df = df[['issue', 'argument', 'Reasons', 'word_count']]

In [None]:
sub_df.sample(4)

In [None]:
topic = "Why is prostitution illegal? is it just for moral reasons?"

In [None]:
argument = """
Well, it's certainly not going away anytime soon is it? Ironically enough in our ultra-capitalist society, you can give sex away for free, but you can't sell it.\r\n\r\nYes, primarily it's for moral reasons. There are arguments made about health risks, but a regulated industry with check-ups and the like would be arguably safer than the drug and disease ridden industry as it now stands.\r\n\r\nBasically, I think it comes down to the fact that few people want to put society's "stamp of approval" on it. Would anyone want their daughter to consider it "just a job" or an easy way to get some quick cash? Or would they want their husbands to be able to find no-hassle sex whenever they had some extra cash? How about the thought of your father casually going to a brothel after work? Some might not have a problem with these ideas, but most people would feel otherwise.
"""

In [None]:
topic = "Why is prostitution illegal? is it just for moral reasons?"
argument = """
Well, it's certainly not going away anytime soon is it? Ironically enough in our ultra-capitalist society, you can give sex away for free, but you can't sell it.

Yes, primarily it's for moral reasons. There are arguments made about health risks, but a regulated industry with check-ups and the like would be arguably safer than the drug and disease ridden industry as it now stands.

Basically, I think it comes down to the fact that few people want to put society's "stamp of approval" on it. Would anyone want their daughter to consider it "just a job" or an easy way to get some quick cash? Or would they want their husbands to be able to find no-hassle sex whenever they had some extra cash? How about the thought of your father casually going to a brothel after work? Some might not have a problem with these ideas, but most people would feel otherwise.
"""


In [None]:
response_schema = {
    "type": "object",
    "properties": {
        "original_argument": {"type": "string", "description": "The original argument from the user"},
        "topic": {"type": "string", "description": "The topic of the argument"},
        "transformed_argument": {"type": "string", "description": "The transformed argument"},
        "reasons": {"type": "array", "description": "The reasons for the transformation"},
}
}

In [None]:
schema = {
  "type": "object",
  "properties": {
    "ingredients": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "name": { "type": "string" },
          "unit": { 
            "type": "string",
            "enum": ["grams", "ml", "cups", "pieces", "teaspoons"]
          },
          "amount": { "type": "number" }
        },
        "required": ["name", "unit", "amount"]
      }
    },
    "instructions": {
      "type": "array",
      "description": "Steps to prepare the recipe (no numbering)",
      "items": { "type": "string" }
    },
    "time_to_cook": {
      "type": "number",
      "description": "Total time to prepare the recipe in minutes"
    }
  },
  "required": ["ingredients", "instructions", "time_to_cook"]
}

In [None]:
response_schema = {
    "type": "object",
    "properties": {
        "original_argument": {"type": "string", "description": "The original argument from the user"},
        "topic": {"type": "string", "description": "The topic of the argument"},
        "transformed_argument": {"type": "string", "description": "The transformed argument"},
        "reasons": {"type": "array", "description": "The reasons for the transformation", "items": {"type": "string"}},
        "transformations": {"type": "array", "description": "The tuples of original text spans, their transformations, and the action taken to transform them.", "items": {"type": "object", "properties": {
            "original_text": {"type": "string", "description": "The original text span"},
            "transformed_text": {"type": "string", "description": "The transformed text span"},
            "action": {"type": "string", "description": "The action taken to transform the text span"},
        }}}
    },
    "required": ["original_argument", "topic", "transformed_argument", "reasons", "transformations"]
}

In [None]:
# Define a prompt for a debate where the system is an expert debater
debate_prompt = f"""
You are participating in a formal debate on the topic of "{topic}". You are an expert debater, and your task is to check if a given user's argument is appropriate to be presented in a civil debate. 

If yes, do nothing and return it. If not, then convert it into an appropriate argument and provide reasons for your conversion. The reasons should be a list of short descriptive phrases. Also return the list of transformations corresponding to each span in the user's argument in the form ('original','transformed', 'action') where 'action' describes in natural language the transformation that you applied.

Next, check if the actions match the reasons. If they do not, adjust your actions to match the reasons and update the list of transformations accordingly.

Please ensure that the semantics of the transformed argument must be very similar to the original one, albeit with small changes to make it appropriate. The transformed argument should be grammatically correct and should not contain any spelling mistakes or facts that cannot be verified against the original argument. The transformed argument should be relevant to the topic of the debate and should be approximately of length 100. 

User's argument: "
{argument}
"
"""

In [None]:
debate_prompt

In [None]:
system_message = {"role": "system", "content": debate_prompt}

In [None]:
completion = openai.ChatCompletion.create(
  model="gpt-4",
  messages=[
    system_message,
    #{"role": "user", "content": argument}
  ],
  functions=[{"name": "set_argument", "parameters": response_schema}],
  function_call={"name": "set_argument"},
  temperature=0,
)

In [None]:
resp = json.loads(completion.choices[0].message.function_call.arguments)

In [None]:
for tf in resp['transformations']:
    for k,v in tf.items():
        print(f"{k}: {v}")

In [None]:
def create_prompt(row):
    topic = row['issue']
    argument = row['argument']
    word_count = int(row['word_count'])
    debate_prompt = f"""
    You are participating in a formal debate on the topic of "{topic}". You are an expert debater, and your task is to check if a given user's argument is appropriate to be presented in a civil debate. 

    If yes, do nothing and return it. If not, then convert it into an appropriate argument and provide reasons for your conversion. The reasons should be a list of short descriptive phrases. Also return the list of transformations corresponding to each span in the user's argument in the form ('original','transformed', 'action') where 'action' describes in natural language the transformation that you applied.

    Next, check if the actions match the reasons. If they do not, adjust your actions to match the reasons and update the list of transformations accordingly.

    Please ensure that the semantics of the transformed argument must be very similar to the original one, albeit with small changes to make it appropriate. The transformed argument should be grammatically correct and should not contain any spelling mistakes or facts that cannot be verified against the original argument. The transformed argument should be relevant to the topic of the debate and should be approximately {word_count} words long. 

    User's argument: "
    {argument}
    "
    """
    row['prompt'] = debate_prompt
    return row

In [None]:
sub_df = sub_df.progress_apply(create_prompt, axis=1)

In [None]:
def transform_arguments(row):
    row['system_message'] = {"role": "system", "content": row['prompt']}
    try:
      completion = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
          row['system_message'],
        ],
        functions=[{"name": "set_argument", "parameters": response_schema}],
        function_call={"name": "set_argument"},
        temperature=0,
      )
      response_dict = json.loads(completion.choices[0].message.function_call.arguments)
    except Exception as e:
      print(e)
      response_dict = {}
    row['llm_response'] = response_dict
    return row

In [None]:
sample = sub_df.sample(5)

In [None]:
sample = sample.progress_apply(transform_arguments, axis=1)

In [None]:
# check responses
responses = sample['llm_response'].tolist()
responses = [r for r in responses if r != {}]
print(len(responses))

In [None]:
# write to a jsonl file
with open('../data/gpt4_responses.jsonl', 'w') as outfile:
    for entry in responses:
        json.dump(entry, outfile)
        outfile.write('\n')

In [None]:
for resp in responses:
    for key, value in resp.items():
        print(key.title()+": \n")
        print(value)
        print('*'*50)
        print('\n')