In [1]:
from openai import OpenAI
from datetime import datetime
from better_profanity import profanity
import hashlib
import re
import os
from tqdm import tqdm
import numpy as np

import logging

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [2]:
api_key=os.environ.get("OPENAI_API_KEY", False)
if api_key:
    client = OpenAI(api_key=api_key)
else:
    print("PROBLEM: no OPENAI_API_KEY in environment")

NAMESPACE = 'default'
OPENAI_MODEL = 'text-embedding-ada-002'

In [8]:
import re
from bs4 import BeautifulSoup
from urllib.request import urlopen

poemlist_uncensored = list()

with urlopen('https://bestofthenetanthology.com/2023-2/poetry2023/') as response:
    soup = BeautifulSoup(response, 'html.parser')
    for par in soup.find_all('p'):
        for anchor in par.find_all('a'):
            if(re.match(".*https://bestofthenetanthology.com/2023-2/poetry2023/.*", anchor.get('href'))):
                with urlopen(anchor.get('href')) as response2:
                    soup2 = BeautifulSoup(response2, 'html.parser')
                    delimiter = "\n"
                    for line_break in soup2.findAll('br'):       # loop through line break tags
                        line_break.replaceWith(delimiter)
                    textlist = ""
                    for par in soup2.find_all('p'):
                        textlist = textlist + "\n" + par.text
                        #print(par.text)
                        #print("---")
                    poemlist_uncensored.append(textlist)
                    #print("----------------")
        #    print(anchor.href)

In [9]:
poemlist = [profanity.censor(x) for x in poemlist_uncensored]

In [10]:
fakepoemlist = list()
n = 0
max_n = len(poemlist) - 1
for poem in poemlist:
    start = poem[0:80]
    query = f"""
    Please complete the following poem's beginning.  Start with this string of characters, then add to it to write a poem of about 300
    characters.  The given string:
    -------
    {start}
    """
    #if n == 0:
    #    print(start)
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": query,
            }
        ],
        model="gpt-3.5-turbo",
    )
    fakepoem = chat_completion.choices[0].message.content
    fakepoemlist.append(poem[0:80] + fakepoem)
    n += 1
    if n > max_n:
        break

In [12]:
import pandas as pd
import json
import random
lst = list()
for poem in poemlist[0:9]:
    msg = {"messages": [{"role": "system", "content": "You want to write 0 if a poem is written by AI and 1 if written by a human.  This usually means to write a 0 if there are cliches, and 1 if there are few or no cliches in the writing."}, {"role": "user", "content": poem}, {"role": "assistant", "content": " 1"}]}
    lst.append(json.dumps(msg))
for fakepoem in fakepoemlist[0:9]:
    msg = {"messages": [{"role": "system", "content": "You want to write 0 if a poem is written by AI and 1 if written by a human.  This usually means to write a 0 if there are cliches, and 1 if there are few or no cliches in the writing."}, {"role": "user", "content": fakepoem}, {"role": "assistant", "content": " 0"}]}
    lst.append(json.dumps(msg))
random.shuffle(lst)
# Create JSONL training file with the first N poems and the first N fake poems
with open("who_made_it_training.jsonl", "w") as file:
    for elt in lst:
        file.write(elt + "\n")

### I had to tell it explicitly that cliches were a key element in determining AI or human writing; without that, it couldn't do the task.

In [14]:
from openai import OpenAI
client = OpenAI()
client.files.create(
    file=open("who_made_it_training.jsonl", "rb"),
    purpose="fine-tune"
)

FileObject(id='file-cYj7ddKsLTIMIsQeXFjzlYBG', bytes=29524, created_at=1706732450, filename='who_made_it_training.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None)

In [15]:
client.fine_tuning.jobs.create(
    training_file="file-cYj7ddKsLTIMIsQeXFjzlYBG",
    model="gpt-3.5-turbo"
)

FineTuningJob(id='ftjob-A52A53WjhjmmOna6g1L189hV', created_at=1706732473, error=None, fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-3.5-turbo-0613', object='fine_tuning.job', organization_id='org-8z44pY81VjnlhTypxmxrOHp3', result_files=[], status='validating_files', trained_tokens=None, training_file='file-cYj7ddKsLTIMIsQeXFjzlYBG', validation_file=None)

In [18]:
client.fine_tuning.jobs.retrieve("ftjob-A52A53WjhjmmOna6g1L189hV")

FineTuningJob(id='ftjob-A52A53WjhjmmOna6g1L189hV', created_at=1706732473, error=None, fine_tuned_model='ft:gpt-3.5-turbo-0613:personal::8nBTHV3d', finished_at=1706732859, hyperparameters=Hyperparameters(n_epochs=5, batch_size=1, learning_rate_multiplier=2), model='gpt-3.5-turbo-0613', object='fine_tuning.job', organization_id='org-8z44pY81VjnlhTypxmxrOHp3', result_files=['file-e2LWKByWO2SZ6T8IyDMyZU20'], status='succeeded', trained_tokens=33895, training_file='file-cYj7ddKsLTIMIsQeXFjzlYBG', validation_file=None)

In [19]:
def test_poem(poem):
    response = client.chat.completions.create(
        model="ft:gpt-3.5-turbo-0613:personal::8nBTHV3d",
        messages=[
            {"role": "system", "content": "You want to write 0 if a poem is written by AI and 1 if written by a human.  This usually means to write a 0 if there are cliches, and 1 if there are few or no cliches in the writing."},
            {"role": "user", "content": poem}
        ]
    )
    return "Written by a real poet" if "1" in response.choices[0].message.content else "Written by AI or amateur"

In [20]:
# This poem was written by ChatGPT
openai_poem = """In a land of whimsy, under a candy-coated sky, 
Where laughter flowed like rivers, and pigs knew how to fly. 
A quirky tale unfolds, in rhyme and jest, 
A silly little poem, just for your zest."""

In [21]:
test_poem(openai_poem)

'Written by AI or amateur'

In [22]:
# This poem was written by me
my_poem = """This is a poem.  Yes it is.  It is not a very good one.  Oh no it is not.  
But it is about butterflies, trees, and mountains."""

In [23]:
test_poem(my_poem)

'Written by AI or amateur'

In [24]:
# This was written by a real poet.
real_poem = """The bud
stands for all things,
even for those things that don't flower,
for everything flowers, from within, of self-blessing;
though sometimes it is necessary
to reteach a thing its loveliness,
to put a hand on its brow
of the flower
and retell it in words and in touch
it is lovely
until it flowers again from within, of self-blessing;"""

In [25]:
test_poem(real_poem)

'Written by a real poet'

In [26]:
# All of these were written by real poets.
for poem in poemlist[10:]:
    print(test_poem(poem))

Written by a real poet
Written by a real poet
Written by a real poet
Written by a real poet
Written by a real poet
Written by a real poet
Written by AI or amateur
Written by a real poet
Written by a real poet


In [27]:
# All of these were written by AI.
for poem in fakepoemlist[10:]:
    print(test_poem(poem))

Written by AI or amateur
Written by AI or amateur
Written by AI or amateur
Written by AI or amateur
Written by AI or amateur
Written by AI or amateur
Written by AI or amateur
Written by AI or amateur
Written by AI or amateur


In [30]:
# In some runs of this code, GPT thinks a real poet wrote this.  This time, it realized it is a fake.
poem = """Sassafrass purple guppy monkey silly water box trapeze artist giraffe monsters."""

In [29]:
test_poem(poem)

'Written by AI or amateur'

In [41]:
poem = """Two roads diverged in a yellow wood,
And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
To where it bent in the undergrowth;"""
test_poem(poem)

'Written by a real poet'