In [13]:
create_prompt_prompt = """
Given the above chapter from <BOOK>, your task is to craft a prompt to elicit a similar chapter on essentially the same topic & contents as this chapter, without the model seeing its contents. Your prompt should describe the set-up for the chapter, the style, tone & setting, time period, the characters in generalities, and the names of the characters. It should not mention the author or title.

The prompt should be one paragraph long. Do not add any additional commentary.
"""

In [None]:
import openai
import time

api_key = ""
openai_client = openai.OpenAI(
    api_key=api_key,
    #base_url=base_url
)
def run_openai_query(prompt, history, completion_tokens, temp, model, openai_client):
    response = None
    try:
        messages = history + [{"role": "user", "content": prompt}]
        #messages = history + [{"role": "system", "content": "You must complete the entire writing piece in a single output, using multiple iterations of planning, drafting and outout. Do not stop writing until you hit the required word limit."}, {"role": "user", "content": prompt}]
        
        response = openai_client.chat.completions.create(
                model=model,
                temperature=temp,
                max_tokens=completion_tokens,
                #max_tokens=16000,
                messages=messages,
                #min_p = 0.1,
        )
        content = response.choices[0].message.content
            
        if content:
            return content.strip()
        else:
            print(response)
            print('Error: message is empty')
            time.sleep(5)

    except Exception as e:
        print(response)
        print("Request failed.")
        print(e)
        time.sleep(5)

    return None

In [15]:
import json
import os
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

# Load scenes and existing prompts if available
with open('data/site/processing/site_scenes_fiction_cleaned.json', 'r') as f:
    scenes = json.load(f)

outfile = "data/site/processing/site_scenes_with_prompts_fiction.json"
prompts = {}
if os.path.exists(outfile):
    with open(outfile, 'r') as f:
        prompts = json.load(f)

# Function to process each scene
def process_scene(scene, fn, book):
    if len(scene) < 4000:
        return None  # Skip scenes that are too short
    if 'gutenberg' in scene.lower():
        return None  # Skip scenes mentioning 'gutenberg'

    # Create prompt and get response from OpenAI
    prompt = create_prompt_prompt.replace('<BOOK>', book)
    response = run_openai_query(prompt, [], 1024, 1, "chatgpt-4o-latest", openai_client)

    # Return the processed scene
    return {
        "source": fn,
        "chosen": scene,
        "prompt": response
    }

# Main loop to iterate through files and scenes
for fn, v in tqdm(scenes.items()):
    if fn in prompts:
        print('skipping', fn)
        continue
    
    this_scenes = []
    book = fn.strip('.zip')

    # Use ThreadPoolExecutor for multithreading the inner loop
    with ThreadPoolExecutor() as executor:
        futures = {executor.submit(process_scene, scene, fn, book): scene for scene in v}

        for future in as_completed(futures):
            result = future.result()
            if result:  # If the result is not None (i.e., scene is valid)
                this_scenes.append(result)

    # Update prompts with processed scenes
    prompts[fn] = this_scenes

    # Save the updated prompts to file
    with open(outfile, 'w') as f:
        json.dump(prompts, f, indent=2)


100%|██████████| 64/64 [09:24<00:00,  8.83s/it]
