In [1]:
import numpy as np
from joblib import Parallel, delayed
import os
import random
import string
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()

openai_api_key = os.environ["OPENAI_API_KEY"]

In [2]:
chanStyleTypes = [
    "Personal Anecdotes: Stories about users' personal experiences, often with unexpected twists or humorous conclusions.",
    "Historical and Cultural References: Stories that play on historical events, cultural phenomena, or popular media, often with an ironic or satirical twist.",
    "Fantasy and Sci-Fi: Tales involving elements from fantasy or science fiction, such as time travel, mythical creatures, or futuristic scenarios.",
    "Everyday Observations: Relatable incidents or observations from daily life, presented in a humorous or exaggerated manner.",
    "Work and School Experiences: Stories about interesting, funny, or bizarre occurrences in professional or educational settings.",
    "Relationships and Social Interactions: Accounts of romantic endeavors, friendships, family dynamics, and awkward social situations.",
    "Memes and Internet Humor: Stories that reference popular memes, internet jokes, or trends.",
    "Internet and Technology: Anecdotes about experiences with technology, internet culture, and digital interactions.",
    "Philosophical and Pseudo-Intellectual Musings: Abstract, often humorous musings on life, existence, and philosophical concepts.",
    "Parody and Satire: Stories that mock or satirize various aspects of society, politics, celebrity culture, etc.",
    "Adventure and Exploration: Imaginary or exaggerated tales of adventure, exploration, or extraordinary experiences.",
    "Role Reversal and Perspective Shifts: Stories told from unconventional perspectives or featuring surprising role reversals."
]

system_prompt = """
Greentexts on 4chan cover a wide range of topics, reflecting the diverse interests and experiences of its user base.
Some of them follow the following pattern and end in a thank you

{{"0": "> Wake up late\n> Realize it's weekend\n> No alarms\n> Feeling bliss\n> Thank you weekend",
  "1": "> Go for a walk\n> See a random dog\n> Dog wags tail\n> Best friends for 5 minutes\n> Thank you random dog",
  "2": "> Have a tough day\n> Find a forgotten chocolate bar in my bag\n> Suddenly not so bad\n> Thank you chocolate bar"}}

The stories are for adult language learners. Therefore, use only vocabulary a 3-4 year old native speaker can understand.
"""
def generate_prompt():
    prompt = f"""{system_prompt} Your generation should be of the theme: {chanStyleTypes[np.random.randint(len(chanStyleTypes))]}\nGenerate a json with 25 original examples that end with a thank you by the narrator to the object of the story. Produce a valid json string. Use only single-quotes inside the object values. Answer only with the json object."""
    return prompt
generate_prompt()


'\nGreentexts on 4chan cover a wide range of topics, reflecting the diverse interests and experiences of its user base.\nSome of them follow the following pattern and end in a thank you\n\n{{"0": "> Wake up late\n> Realize it\'s weekend\n> No alarms\n> Feeling bliss\n> Thank you weekend",\n  "1": "> Go for a walk\n> See a random dog\n> Dog wags tail\n> Best friends for 5 minutes\n> Thank you random dog",\n  "2": "> Have a tough day\n> Find a forgotten chocolate bar in my bag\n> Suddenly not so bad\n> Thank you chocolate bar"}}\n\nThe stories are for adult language learners. Therefore, use only vocabulary a 3-4 year old native speaker can understand.\n Your generation should be of the theme: Memes and Internet Humor: Stories that reference popular memes, internet jokes, or trends.\nGenerate a json with 25 original examples that end with a thank you by the narrator to the object of the story. Produce a valid json string. Use only single-quotes inside the object values. Answer only with t

In [3]:
def generate_random_filename(length=12, extension=".json"):
    characters = string.ascii_letters + string.digits
    return ''.join(random.choice(characters) for _ in range(length)) + extension

def save_string_to_json(content, directory):
    filename = generate_random_filename()
    with open(os.path.join(directory, filename), 'w') as file:
        file.write(content)

In [4]:
def handle_openai_json_format(json_string):
    parsed_json_string = json_string.replace("{\n", "{").replace("{ \n","{").replace(",\n",",").replace('"\n"','","').replace("\n}","}").replace("\n }","}").replace("}\n","}").replace(",}", "}").replace("\n", "\\n")
    return parsed_json_string

def make_openai_request(api_key, prompt):
    client = OpenAI(api_key=api_key)
    response = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": prompt,
        }
    ],
    model="gpt-3.5-turbo")
    return handle_openai_json_format(response.choices[0].message.content)

def save_openai_response_to_file(response, directory):
    save_string_to_json(response, directory=directory)

def make_and_save_request(api_key, prompt, output_directory="./thanks_output/"):
    response = make_openai_request(api_key, prompt)
    save_openai_response_to_file(response, directory=f"{output_directory}/")

def get_openai_data(api_key, num_requests=2):
    prompts = [generate_prompt() for _ in range(num_requests)]

    Parallel(n_jobs=-1, verbose=10)(
        delayed(make_and_save_request)(api_key, prompt) for prompt in prompts
    )

In [5]:
get_openai_data(openai_api_key, num_requests=2500)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   29.6s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done  45 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done  69 tasks      | elapsed:  5.3min
[Parallel(n_jobs=-1)]: Done  82 tasks      | elapsed:  6.2min
[Parallel(n_jobs=-1)]: Done  97 tasks      | elapsed:  7.4min
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:  8.4min
[Parallel(n_jobs=-1)]: Done 129 tasks      | elapsed:  9.6min
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed: 10.8min
[Parallel(n_jobs=-1)]: Done 165 tasks      | elapsed: 12.0min
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed: 13.5min
[Parallel(n_jobs=-1)]: Done 205 tasks      | elapsed: 14.7min
[Paralle