In [1]:
from openai import OpenAI
from datetime import datetime
from better_profanity import profanity
import hashlib
import re
import os
from tqdm import tqdm
import numpy as np
import re
from bs4 import BeautifulSoup
from urllib.request import urlopen
import logging
import pandas as pd
import json
import random
import configparser
import time

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [2]:
config = configparser.ConfigParser()
config.read('config.ini')
OPENAI_MODEL = config['DEFAULT']['OpenAIModel']
FILENAME_JSONL = config['DEFAULT']['FilenameJsonl']

In [55]:
def generate_openai_client():
    """Returns an open ai client"""
    api_key=os.environ.get("OPENAI_API_KEY", False)
    if api_key:
        return OpenAI(api_key=api_key)
    else:
        print("PROBLEM: no OPENAI_API_KEY in environment")

In [57]:
class PoemList:

    def __init__(self):
        pass
    
    def get_realpoemlist(self):
        """Get a list of about 20 real poems from the given website.  Censor the poems."""
        poemlist_uncensored = list()
        
        with urlopen('https://bestofthenetanthology.com/2023-2/poetry2023/') as response:
            soup = BeautifulSoup(response, 'html.parser')
            for par in soup.find_all('p'):
                for anchor in par.find_all('a'):
                    if(re.match(".*https://bestofthenetanthology.com/2023-2/poetry2023/.*", anchor.get('href'))):
                        with urlopen(anchor.get('href')) as response2:
                            soup2 = BeautifulSoup(response2, 'html.parser')
                            delimiter = "\n"
                            for line_break in soup2.findAll('br'):       # loop through line break tags
                                line_break.replaceWith(delimiter)
                            textlist = ""
                            for par in soup2.find_all('p'):
                                textlist = textlist + "\n" + par.text
                            poemlist_uncensored.append(textlist)
            self.realpoemlist = [profanity.censor(x) for x in poemlist_uncensored]
    
    def get_fakepoemlist(self, openai_client, openai_model: str = OPENAI_MODEL):
        """ Get a list of fake poems, one for each real poem.
        The fake poems have the same first 80 characters of the real poems, but then Open AI writes the rest of the poem itself.
        This tends to produce poems with cliches, which are picked up by the model.
        
        Arguments:
        openai_client -- an OpenAI client used to extent the real poems into fake poems.
        openai_model -- The model used for the extension.
        """
        self.fakepoemlist = list()
        n = 0
        max_n = len(self.realpoemlist) - 1
        for poem in self.realpoemlist:
            start = poem[0:80]
            query = f"""
            Please complete the following poem's beginning.  Start with this string of characters, then add to it to write a poem of about 300
            characters.  The given string:
            -------
            {start}
            """
            chat_completion = openai_client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": query,
                    }
                ],
                model=openai_model,
            )
            fakepoem = chat_completion.choices[0].message.content
            self.fakepoemlist.append(poem[0:80] + fakepoem)
            n += 1
            if n > max_n:
                break

    def write_poems_to_jsonl(self):
        """Create JSONL training file with the first N poems and the first N fake poems"""
        lst = list()
        N = 9
        for poem in self.realpoemlist[0:N]:
            msg = {"messages": [{"role": "system", "content": "You want to write 0 if a poem is written by AI and 1 if written by a human.  This usually means to write a 0 if there are cliches, and 1 if there are few or no cliches in the writing."}, {"role": "user", "content": poem}, {"role": "assistant", "content": " 1"}]}
            lst.append(json.dumps(msg))
        for fakepoem in self.fakepoemlist[0:N]:
            msg = {"messages": [{"role": "system", "content": "You want to write 0 if a poem is written by AI and 1 if written by a human.  This usually means to write a 0 if there are cliches, and 1 if there are few or no cliches in the writing."}, {"role": "user", "content": fakepoem}, {"role": "assistant", "content": " 0"}]}
            lst.append(json.dumps(msg))
        random.shuffle(lst)
        with open(FILENAME_JSONL, "w") as file:
            for elt in lst:
                file.write(elt + "\n")

### I had to tell it explicitly that cliches were a key element in determining AI or human writing; without that, it couldn't do the task.

In [58]:
class FineTuningJob:

    def __init__(self, openai_client, filename: str = FILENAME_JSONL, openai_model: str = OPENAI_MODEL):
        """Creates a fine tuning job based on the jsonl file from PoemList.write_poems_to_jsonl
        The name of the model is returned in self.model

        Arguments:
        openai_client -- A client for the fine tuning job
        filename -- The jsonl file that contains the training data
        openai_model -- The initial model that will be fine tuned
        """
        
        self._file_object = openai_client.files.create(
            file=open(filename, "rb"),
            purpose="fine-tune"
        )
        self._fine_tuning_job = openai_client.fine_tuning.jobs.create(
            training_file=self._file_object.id,
            model=openai_model
        )
        self.model = None
        s = 0
        while self.model is None:
            time.sleep(10)
            s += 10
            print("Waiting for fine tuning job to finish.  Seconds:", s)
            self.model = openai_client.fine_tuning.jobs.retrieve(self._fine_tuning_job.id).fine_tuned_model
        print("Done")

In [59]:
def test_poem(poem, openai_client, ftj_model: str) -> str:
    """ Tests a poem to see if it's written by an AI or by a real poet.
    Arguments:
    poem -- The text of the poem
    openai_client -- The Open AI client for the test
    ftj_model -- The model from the fine tuning job that will be used for the test
    """
    response = openai_client.chat.completions.create(
        model=ftj_model,
        messages=[
            {"role": "system", "content": "You want to write 0 if a poem is written by AI and 1 if written by a human.  This usually means to write a 0 if there are cliches, and 1 if there are few or no cliches in the writing."},
            {"role": "user", "content": poem}
        ]
    )
    return "Written by a real poet" if "1" in response.choices[0].message.content else "Written by AI or amateur"

In [7]:
openai_client = generate_openai_client()
poemlist = PoemList()
poemlist.get_realpoemlist()
poemlist.get_fakepoemlist(openai_client)
poemlist.write_poems_to_jsonl()
ftj = FineTuningJob(openai_client)

Waiting for fine tuning job to finish.  Seconds: 10
Waiting for fine tuning job to finish.  Seconds: 20
Waiting for fine tuning job to finish.  Seconds: 30
Waiting for fine tuning job to finish.  Seconds: 40
Waiting for fine tuning job to finish.  Seconds: 50
Waiting for fine tuning job to finish.  Seconds: 60
Waiting for fine tuning job to finish.  Seconds: 70
Waiting for fine tuning job to finish.  Seconds: 80
Waiting for fine tuning job to finish.  Seconds: 90
Waiting for fine tuning job to finish.  Seconds: 100
Waiting for fine tuning job to finish.  Seconds: 110
Waiting for fine tuning job to finish.  Seconds: 120
Waiting for fine tuning job to finish.  Seconds: 130
Waiting for fine tuning job to finish.  Seconds: 140
Waiting for fine tuning job to finish.  Seconds: 150
Waiting for fine tuning job to finish.  Seconds: 160
Waiting for fine tuning job to finish.  Seconds: 170
Waiting for fine tuning job to finish.  Seconds: 180
Waiting for fine tuning job to finish.  Seconds: 190
Wa

In [8]:
# This poem was written by ChatGPT
openai_poem = """In a land of whimsy, under a candy-coated sky, 
Where laughter flowed like rivers, and pigs knew how to fly. 
A quirky tale unfolds, in rhyme and jest, 
A silly little poem, just for your zest."""

In [9]:
test_poem(openai_poem, openai_client, ftj.model)

'Written by AI or amateur'

In [10]:
# This poem was written by me
my_poem = """This is a poem.  Yes it is.  It is not a very good one.  Oh no it is not.  
But it is about butterflies, trees, and mountains."""

In [11]:
test_poem(my_poem, openai_client, ftj.model)

'Written by AI or amateur'

In [43]:
# This was written by a real poet.
real_poem = """The bud
stands for all things,
even for those things that don't flower,
for everything flowers, from within, of self-blessing;
though sometimes it is necessary
to reteach a thing its loveliness,
to put a hand on its brow
of the flower
and retell it in words and in touch
it is lovely
until it flowers again from within, of self-blessing;"""

In [51]:
test_poem(real_poem, openai_client, ftj.model)

'Written by a real poet'

In [45]:
# All of these were written by real poets.
for poem in poemlist.realpoemlist[10:]:
    print(test_poem(poem, openai_client, ftj.model))

Written by a real poet
Written by a real poet
Written by a real poet
Written by a real poet
Written by a real poet
Written by a real poet
Written by a real poet
Written by a real poet
Written by a real poet


In [46]:
# All of these were written by AI.
for poem in poemlist.fakepoemlist[10:]:
    print(test_poem(poem, openai_client, ftj.model))

Written by AI or amateur
Written by AI or amateur
Written by a real poet
Written by AI or amateur
Written by AI or amateur
Written by AI or amateur
Written by AI or amateur
Written by AI or amateur
Written by AI or amateur


In [52]:
# In some runs of this code, GPT thinks a real poet wrote this.  Other times, it realizes it is a fake.
silly_poem = """Sassafrass purple guppy monkey silly water box trapeze artist giraffe monsters."""

In [53]:
test_poem(silly_poem, openai_client, ftj.model)

'Written by AI or amateur'

In [54]:
frost_poem = """Two roads diverged in a yellow wood,
And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
To where it bent in the undergrowth;"""
test_poem(frost_poem, openai_client, ftj.model)

'Written by a real poet'