In [6]:
import spacy
import random
import string
import jsonlines
import nltk
from nltk.corpus import wordnet

# Load input text file
with open("input_text.txt", "r") as file:
    input_text = file.read()

# Pre-process input text by removing punctuation and converting to lowercase
input_text = input_text.translate(str.maketrans("", "", string.punctuation))
input_text = input_text.lower()

# Load SpaCy model for English language
nlp = spacy.load("en_core_web_sm")

# Define function to find synonyms for a given word using WordNet
def find_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name().replace("_", " "))
    return synonyms

# Extract settings, characters, types, and dialogue from input text using SpaCy and their synonyms using WordNet
settings = set()
characters = set()
types = set()
dialogue = set()
synonym_error_log = []

for token in nlp(input_text):
    if token.ent_type_ == "SETTING":
        synonyms = find_synonyms(token.text)
        if len(synonyms) == 0:
            synonym_error_log.append(f"No synonyms found for setting: {token.text}")
        else:
            settings.update(synonyms)
    elif token.ent_type_ == "CHARACTER":
        synonyms = find_synonyms(token.text)
        if len(synonyms) == 0:
            synonym_error_log.append(f"No synonyms found for character: {token.text}")
        else:
            characters.update(synonyms)
    elif token.ent_type_ == "TYPE":
        synonyms = find_synonyms(token.text)
        if len(synonyms) == 0:
            synonym_error_log.append(f"No synonyms found for type: {token.text}")
        else:
            types.update(synonyms)
    elif "?" in token.text:
        dialogue.add(token.text)

# Write synonym error log to file
if len(synonym_error_log) > 0:
    with open("synonym_error_log.txt", "w") as file:
        file.write("\n".join(synonym_error_log))

# Check if any of the variables are empty and fill them with default values if necessary
if not settings:
    settings = ["a mysterious place", "a futuristic city", "a medieval castle", "a deserted island", "a haunted mansion"]

if not characters:
    characters = ["John", "Mary", "Tom", "Samantha", "David"]

if not types:
    types = ["hero", "villain", "sidekick", "love interest", "mentor"]

if not dialogue:
    dialogue = ["What's going on?", "I can't believe it!", "We have to hurry!", "It's not safe here!", "What do we do now?"]

# Generate prompts and responses and write to JSONL file
with jsonlines.open('comic_strip_prompts_nltk.jsonl', mode='w') as writer:
    for i in range(500):
        # Generate prompt
        if i % 3 == 0:
            prompt = "Panel " + str(i // 3 + 1) + ": Describe the setting for the panel."
            response = random.choice(list(settings))
        elif i % 3 == 1:
            prompt = "Panel " + str(i // 3 + 1) + ": Create a character for the panel."
            response = random.choice(list(characters)) + ", a " + random.choice(list(types))
        else:
            prompt = "Panel " + str(i // 3 + 1) + ": Write some dialogue for the panel."
            response = random.choice(list(dialogue))

        # Write prompt and response to JSONL file
        writer.write({
            'prompt': prompt,
            'completion': response
        })
