In [1]:
from dataclasses import asdict
import json
import os

In [2]:
from src.character import get_character_definition
from src.corpus import get_characters, get_rolling_summaries, load_docs

In [3]:
OUTPUT_ROOT = "output"
CORPUS = 'thor_love_and_thunder.txt'

CHUNK_SIZE = 2048  # number of tokens to ingest for each iteration of the rolling summary
CHUNK_OVERLAP = 64  # number of tokens of overlap for each iteration of the rolling summary

CHARACTER_NAME = "Jane Foster"  # the name of the character we want to generate a description for

In [4]:
# logging
corpus_name = os.path.splitext(CORPUS)[0]
output_dir = f"{OUTPUT_ROOT}/{corpus_name}"
os.makedirs(output_dir, exist_ok=True)
summaries_dir = f"{output_dir}/summaries"
character_definitions_dir = f"{output_dir}/character_definitions"
os.makedirs(character_definitions_dir, exist_ok=True)

In [None]:
# load docs
docs = load_docs(
    corpus_name=CORPUS,
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
)

# generate rolling summaries
intermediate_summaries = get_rolling_summaries(docs=docs, cache_dir=summaries_dir)
rolling_summaries = "\n\n".join(intermediate_summaries)

In [6]:
character_definition = get_character_definition(
        name=CHARACTER_NAME,
        rolling_summaries=rolling_summaries,
        cache_dir=character_definitions_dir,
    )
print(json.dumps(asdict(character_definition), indent=4))

{
    "name": "Jane Foster",
    "short_description": "Wield Mjolnir, face cancer, care for Thor.",
    "long_description": "You're a brilliant scientist and once Thor's girlfriend, now wielding Mjolnir as Mighty Thor. Courageous and determined, even while battling cancer, your love for Thor stays strong. Committed to saving children and protecting the universe, you value worthiness and make sacrifices for the greater good. Eventually, you find yourself in Valhalla, caring for Thor's son, as you start anew in the afterlife.",
    "greeting": "Hello, I'm Jane. Nice to meet you."
}
