### Setup

In [None]:
!pip install openai

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import openai
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
DATA_DIR = '/content/drive/MyDrive/U-Integrity_Data'

In [None]:
openai.api_key = f"{DATA_DIR}/.openai_key.txt"
openai.api_key ='/content/drive/MyDrive/U-Integrity_Data/openai_key.txt'

### Essay prompts
Prompts 3,4,5,6 are all source-dependent which is why we chose to omit those as we don't have the specific source material for the essays itself.

This shrinks our overall dataset from ~13,000 essays to 1,785 + 1,800 + 1,730 + 918 = 6,233 (prompts 1,2,7,8)

Corresponding essay lengths on avg for prompts 1,2,7,8 are: 350, 350, 250, 650

In [None]:
prompt1 = '''More and more people use computers, but not everyone agrees that this benefits society.
Those who support advances in technology believe that computers have a positive effect on people.
They teach hand-eye coordination, give people the ability to learn about faraway places and people,
and even allow people to talk online with other people. Others have different ideas.
Some experts are concerned that people are spending too much time on their computers
and less time exercising, enjoying nature, and interacting with family and friends.

Write a letter to your local newspaper in which you state your opinion on the
effects computers have on people. Persuade the readers to agree with you.'''

prompt2 = '''Censorship in the Libraries
"All of us can think of a book that we hope none of our children or any other children have taken off the shelf.
But if I have the right to remove that book from the shelf -- that work I abhor -- then you also have exactly the same right
and so does everyone else. And then we have no books left on the shelf for any of us." --Katherine Paterson, Author

Write a persuasive essay to a newspaper reflecting your views on censorship in libraries.
Do you believe that certain materials, such as books, music, movies, magazines, etc., should be removed from the
shelves if they are found offensive? Support your position with convincing arguments from your own experience, observations, and/or reading.'''

prompt7 = '''Write about patience. Being patient means that you are understanding and tolerant.
A patient person experience difficulties without complaining.
Do only one of the following: write a story about a time when you were patient OR write a story
about a time when someone you know was patient OR write a story in your own way about patience.'''

prompt8 = '''We all understand the benefits of laughter. For example, someone once said, “Laughter is the shortest distance between two people.”
Many other people believe that laughter is an important part of any relationship. Tell a true story in which laughter was one element or part.'''

### Generating essays

In [None]:
essay_prompts = [prompt1, prompt2, prompt7, prompt8]
num_gen_essays = [170, 180, 170, 90] #10% of the number of Kaggle essays
num_tokens = [512, 512, 256, 768]

#### GPT-3

Note: It took ~24 mins to generate 170+180+170+90=630 essays with GPT-3

In [None]:
for i in range(len(essay_prompts)):
  gen_essay_output = []
  filepath = DATA_DIR + f'/prompt{i}_essays.csv'

  print(f'Generating for prompt {i}...')
  print(essay_prompts[i])

  for j in range(num_gen_essays[i]):
    print(j)

    response = openai.Completion.create(
        engine = "text-curie-001",
        prompt = essay_prompts[i],
        max_tokens = num_tokens[i],
        n = 1,
        temperature = 1,
    )
    gen_essay_output.append(response.choices[0].text)

  df = pd.DataFrame(gen_essay_output, columns=['gen_essay'])
  df.to_csv(filepath, encoding='utf_8_sig')

  print(f'Finished saving csv file for prompt {i}...')
  print("Complete. Please check your file at ", filepath)

#### GPT-3.5-turbo

Note: It took ~10 mins to generate 30 essays with GPT-3.5

In [None]:
for i in range(len(essay_prompts)):
  gen_essay_output = []
  filepath2 = DATA_DIR + f'/prompt{i}_gpt3turbo_essays.csv'

  print(f'Generating for prompt {i}...')
  print(essay_prompts[i])

  for j in range(num_gen_essays[i]):
    print(j)

    response2 = openai.ChatCompletion.create(
      model = "gpt-3.5-turbo",
      messages = [{"role": "user", "content": essay_prompts[i]}],
      max_tokens = num_tokens[i],
      n = 1,
      temperature = 1
      )

    gen_essay_output.append(response2.choices[0].message.content)

  df2 = pd.DataFrame(gen_essay_output, columns=['gen_essay'])
  df2.to_csv(filepath2, encoding='utf_8_sig')

  print(f'Finished saving csv file for prompt {i}...')
  print("Complete. Please check your file at ", filepath2)