# setup notebook

In [1]:
import os
from getpass import getpass
import openai

from dotenv import load_dotenv

load_dotenv()

if os.getenv("OPENAI_API_KEY") is None:
  if any(['VSCODE' in x for x in os.environ.keys()]):
    print('Please enter password in the VS Code prompt at the top of your VS Code window!')
  os.environ["OPENAI_API_KEY"] = getpass("Paste your OpenAI key from: https://platform.openai.com/account/api-keys\n")
  openai.api_key = os.getenv("OPENAI_API_KEY", "")

assert os.getenv("OPENAI_API_KEY", "").startswith("sk-"), "This doesn't look like a valid OpenAI API key"
print("OpenAI API key configured")

OpenAI API key configured


# load book into memory

In [2]:
# llamaindex document type is used here because it allows us to file metadata.
from llama_index import SimpleDirectoryReader
# novel = SimpleDirectoryReader("data/jd_salinger/pdf").load_data()
novel = SimpleDirectoryReader("data/anne_tyler/pdf").load_data()

# data models

In [3]:
from pydantic import BaseModel, Field
from typing import List, Optional
from typing_extensions import Literal
from collections import defaultdict


class Character(BaseModel):
    id: int = Field(
        ...,
        description="Unique identifier for the event, used for deduplication, design a scheme that allows for multiple events"
    )
    name: str = Field(
        ...,
        description="For sake of precision and deduplication, should be the actual name of the characters, if not provided, should be 'Not Available'"
    )
    gender: Literal["Male", "Female", "N/A"]
    aliases: List[str] = Field(
        ...,
        description="Names, TItles, Promouns usd in refernce to or that describe the character"
    )

class Location(BaseModel):
    id: int = Field(
        ...,
        description="Unique identifier for the event, used for deduplication, design a scheme that allows for multiple events"
    )
    name: str

class Event(BaseModel):
    id: int
    name: str
    startPage: int = Field(
        ...,
        description="The page the event begins"
    )
    endPage: int = Field(
        ...,
        description="The page where the event ends"
    )
    eventType: Literal["Narrative Development", "Character Interaction", "Action Sequences", "Plot Dynamics", "Climax and Resolution", "Humor and Tone", "Conflict and Resolution", "Character Development", "Contextual Flashback or Information"]
    summary: str
    characters: List[int] = Field(
        ...,
        description="List of the character IDs of characters participating in or with a connection to the event"
    )
    locations: List[int] = Field(
        ...,
        description="List of the location IDs of locations in relation to an event"
    )

class EventRepository(BaseModel):
    events: Optional[List[Event]] = Field(..., default_factory=list)
    allCharactersInBook: List[Character] = Field(
        ...,
        default_factory=list,
        description="Updated list of characters in the book"
    )
    allLocationsInBook: List[Location] = Field(
        ...,
        default_factory=list,
        description="Updated list of locations in the book"
    )

    def update(self, other: "EventRepository") -> "EventRepository":
        """Updates the current repository with the other repository, deduplicating events and unassigned characters."""
        return EventRepository(
            events=update_and_deduplicate_items(self.events, other.events),
            allCharactersInBook=update_and_deduplicate_items(self.allCharactersInBook, other.allCharactersInBook),
            allLocationsInBook=update_and_deduplicate_items(self.allLocationsInBook, other.allLocationsInBook)
        )
    
    def get_events_by_character(self) -> dict[int, List[Event]]:
        """Returns a dictionary mapping character IDs to a list of events they are associated with."""
        character_events_mapping = defaultdict(list)

        # Iterate through events and update the mapping
        for event in self.events:
            for character_id in event.characters:
                character_events_mapping[character_id].append(event)

        return dict(character_events_mapping)


def update_and_deduplicate_items(existing_list: List[Event]|List[Character]|List[Location], new_list: List[Event]|List[Character]|List[Location]) -> List[Event]|List[Character]|List[Location]:
    updated_list = existing_list.copy()

    # Update existing items with new information
    for new_item in new_list:
        existing_item_index = next((i for i, item in enumerate(updated_list) if item.id == new_item.id), None)

        if existing_item_index is not None:
            # If the item with the same id already exists, update the information
            if type(new_item) == Character:
                updated_list[existing_item_index].gender = new_item.gender
                updated_list[existing_item_index].aliases = list(set(updated_list[existing_item_index].aliases + new_item.aliases))
            else:
                updated_list[existing_item_index] = new_item
        else:
            # If the item with the same id does not exist, add the new character
            updated_list.append(new_item)

    # Deduplicate based on id
    updated_list = list({item.id: item for item in updated_list}.values())

    return updated_list

In [4]:
import json
from pydantic import ValidationError

# Read data from the file
with open('response/response_20240217061813.txt', 'r') as file:
    data = json.load(file)

# Define functions to validate and parse the data
def validate_and_parse(data):
    try:
        event_repository = EventRepository.model_validate(data)
        return event_repository
    except ValidationError as e:
        print(f"Validation error: {e}")
        return None

# Validate and parse the data
event_repository = validate_and_parse(data)

if event_repository:
    # Now you can work with the parsed data
    print(event_repository.events)
    print(event_repository.allCharactersInBook)
    print(event_repository.allLocationsInBook)

[Event(id=1, name='Reflection on Trustworthiness', startPage=7, endPage=7, eventType='Character Development', summary='The protagonist reflects on his perceived trustworthiness by his customers, considering the responsibilities and trust they place in him through various tasks.', characters=[1], locations=[1]), Event(id=2, name='Waiting at Baltimore Railroad Station', startPage=7, endPage=7, eventType='Narrative Development', summary='The protagonist waits at the Baltimore railroad station, intending to travel to Philadelphia to see his daughter.', characters=[1], locations=[1]), Event(id=3, name='Seeking Help for Passport Transport', startPage=8, endPage=9, eventType='Character Interaction', summary='A well-off man frantically seeks help at the train station to transport a passport to Philadelphia for his daughter.', characters=[2], locations=[1]), Event(id=4, name='Skeptical Observation', startPage=9, endPage=9, eventType='Character Development', summary="The protagonist expresses sk

In [5]:
for character in event_repository.allCharactersInBook:
    print(character)

id=1 name='Protagonist' gender='Male' aliases=['narrator']
id=2 name='Well-off Man' gender='Male' aliases=['gray-haired man']
id=3 name='Woman in Feather Coat' gender='Female' aliases=['schoolmarm sort']
id=4 name='Mrs. Rodney' gender='Female' aliases=['customer']
id=5 name='Daughter' gender='Female' aliases=['little girl']
id=6 name='Lawyer' gender='Male' aliases=['stepfather']
id=7 name='Sophia' gender='Female' aliases=['Sophia Maiden']
id=8 name='Esther Brimm' gender='Female' aliases=['daughter with red hair']
id=9 name='Opal' gender='Female' aliases=['daughter']
id=10 name='Natalie' gender='Female' aliases=['ex-wife']


In [6]:
events_by_character = event_repository.get_events_by_character()

In [7]:
assignedCharacters = [i for i in events_by_character.keys()] # ids of character who participated in narrative contributing events

In [8]:
unassignedCharacters = [character.id for character in event_repository.allCharactersInBook if not character.id in assignedCharacters] # ids of characters in book who were not assigned to narrative contributing events

In [9]:
print(assignedCharacters)
print(unassignedCharacters)

[1, 2, 3, 7, 8, 9, 10]
[4, 5, 6]


In [10]:
# class CharacterWithEvents(Character):

In [11]:
events_by_character

{1: [Event(id=1, name='Reflection on Trustworthiness', startPage=7, endPage=7, eventType='Character Development', summary='The protagonist reflects on his perceived trustworthiness by his customers, considering the responsibilities and trust they place in him through various tasks.', characters=[1], locations=[1]),
  Event(id=2, name='Waiting at Baltimore Railroad Station', startPage=7, endPage=7, eventType='Narrative Development', summary='The protagonist waits at the Baltimore railroad station, intending to travel to Philadelphia to see his daughter.', characters=[1], locations=[1]),
  Event(id=4, name='Skeptical Observation', startPage=9, endPage=9, eventType='Character Development', summary="The protagonist expresses skepticism about the well-off man's story and motives.", characters=[1, 2, 3], locations=[1]),
  Event(id=6, name="Narrator's Frustration and Reflection", startPage=11, endPage=12, eventType='Character Development', summary='The narrator, unable to sit next to Sophia d

In [12]:
event_insert = f"Event: {event_repository.events[0]}\n"
characters_insert = f"Characters: {event_repository.allCharactersInBook}"
print(event_insert+characters_insert)
# print(characters_insert)

Event: id=1 name='Reflection on Trustworthiness' startPage=7 endPage=7 eventType='Character Development' summary='The protagonist reflects on his perceived trustworthiness by his customers, considering the responsibilities and trust they place in him through various tasks.' characters=[1] locations=[1]
Characters: [Character(id=1, name='Protagonist', gender='Male', aliases=['narrator']), Character(id=2, name='Well-off Man', gender='Male', aliases=['gray-haired man']), Character(id=3, name='Woman in Feather Coat', gender='Female', aliases=['schoolmarm sort']), Character(id=4, name='Mrs. Rodney', gender='Female', aliases=['customer']), Character(id=5, name='Daughter', gender='Female', aliases=['little girl']), Character(id=6, name='Lawyer', gender='Male', aliases=['stepfather']), Character(id=7, name='Sophia', gender='Female', aliases=['Sophia Maiden']), Character(id=8, name='Esther Brimm', gender='Female', aliases=['daughter with red hair']), Character(id=9, name='Opal', gender='Female'

In [13]:
def get_character_by_id(character_id: int) -> Optional[Character]:
    for character in event_repository.allCharactersInBook:
        if character.id == character_id:
            return character
    return None

def get_other_characters(character_id: int) -> List[Character]:
    other_characters = []
    for character in event_repository.allCharactersInBook:
        if character.id != character_id:
            other_characters.append(character)
    return other_characters

In [14]:
# getting character information.
# iterative development of character information from each event page.
# you are a iterative, character development writer. Given a character and his information, alongside events heh participated in. You are to get craft more information on the character.
# for each event, you will be given the part of the book that contribute to it. You are to extract new information from there and rewrite the character biography and return relationships

# run similarity search for unassigned characters.

class Relationship(BaseModel):
    related_to: int = Field(
        ...,
        description="ID of character related to"
    )
    relation: str = Field(
        ...,
        description=""
    )

class CharacterWithInformation(Character):
    id: Optional[int] = Field(
        ...,
        description="Unique identifier for the event, used for deduplication, design a scheme that allows for multiple events",
        default_factory=int
    )
    name: Optional[str] = Field(
        ...,
        description="For sake of precision and deduplication, should be the actual name of the characters, if not provided, should be 'Not Available'",
        default_factory=str
    )
    gender: Optional[Literal["Male", "Female", "N/A"]] = Field(..., default_factory=str)
    aliases: Optional[List[str]] = Field(
        ...,
        description="Names, TItles, Promouns usd in refernce to or that describe the character",
        default_factory=list
    )
    biography: Optional[str] = Field(..., default_factory=str)
    characterDevelopment: Optional[str] = Field(..., default_factory=str)
    # aliases: Optional[List[str]]  =Field(..., default_factory=list)
    # relationships: List[Relationship] = Field(..., default_factory=list)
    
    # def update_relationships(self, existing_list, new_list):
    #     updated_list = existing_list.copy()

    #     # Update existing items with new information
    #     for new_relationship in new_list:
    #         existing_relationship_index = next((i for i, relationship in enumerate(updated_list) if relationship.related_to == new_relationship.related_to), None)

    #         if existing_relationship_index is not None:
    #             # If the item with the same id already exists, update the information
    #             updated_list[existing_relationship_index].relation = (updated_list[existing_relationship_index].relation + f" {new_relationship.relation}").rstrip()
    #         else:
    #             # If the item with the same id does not exist, add the new character
    #             updated_list.append(new_relationship)

    #     # Deduplicate based on id
    #     updated_list = list({relationship.related_to: relationship for relationship in updated_list}.values())

    #     return updated_list

    def update(self, other: "CharacterWithInformation") -> "CharacterWithInformation":
        return CharacterWithInformation(
            name=other.name,
            gender=other.gender,
            biography=other.biography,
            characterDevelopment=other.characterDevelopment,
            # aliases=list(set(self.aliases+other.aliases)),
            aliases=self.aliases,
            # relationships=self.update_relationships(self.relationships, other.relationships)
        )

        

system_message = """"""
user_prompt = """"""

def get_assigned_character_information(client: openai.OpenAI , character_id: int) -> CharacterWithInformation:
    character = get_character_by_id(character_id)
    cur_state = CharacterWithInformation(
        id=character.id,
        name=character.name,
        gender=character.gender,
        aliases=character.aliases,
        # relationships=[]
    )
    
    other_characters = get_other_characters(character_id)

    character_events = events_by_character[character_id]
    num_iterations = len(character_events)
    print(F"Number of events: {num_iterations}")

    for i, event in enumerate(events_by_character[character_id]):
        num_pages = len(list(range(event.startPage, event.endPage+1)))
        print(f"Generating information on {character.name} from : {i+1} of  {num_iterations}")
        event_insert = f"Event: {event}\n"
        print(f"There are {num_pages} page(s) in this event")

        pages = ["\n\n{\npage: " + f"{novel[page-1].metadata["page_label"]}" + ", \ncontent: '" + f"{novel[page-1].text}" + "'\n}" for page in range(event.startPage, event.endPage+1)]
        pages_insert = ""
        for page in pages:
            pages_insert += page + "\n\n"

        new_updates = client.chat.completions.create(
            model="gpt-4-turbo-preview",
            temperature=0.7,
            response_model=CharacterWithInformation,
            messages=[
                {
                    "role": "system",
                    "content": system_message
                },
                {
                    "role": "user",
                    "content": (
                        f"""Given this new data, rewrite information on the {character.name}:
                        # Event {i}/{num_iterations} {character.name} participated in:
                        """ + event_insert + pages_insert
                    )
                },
                {
                    "role": "user",
                    "content": f"""Here is the current information on the {character.name}:
                    {cur_state.model_dump_json(indent=2)}
                    
                    The list of the other characters in the book:
                    {other_characters}"""
                },
            ]
        )
        cur_state = cur_state.update(new_updates)
        print(f"\n{cur_state}\n\n")
    return cur_state

# for character in assignedCharacters:


In [15]:
[1, 2, 3, 4, 5][-4:]

[2, 3, 4, 5]

In [16]:
# system_message = """# MISSION
# You are a iterative fictional character researcher. You will be given the current information on a character along with the list of the other characters in the book, the current event that the character's information is to be extracted from and the book pages associated with that event. You are to use these to extract new information on the character.

# # INFORMATION NEEDED ON CHARACTER
# The following data is to be generated for the character: id, name, gender, aliases, biography, character development, relationships with other characters.

# # RULES
# At each iteration, completely rewrite the biography and character development, taking the new data into consideration."""
# user_prompt = """"""

system_message = """# LETTER DETAILING MISSION
Dear Recursive Fictional Character Researcher,

I entrust you with this character, along with the events they have participated in and the corresponding pages chronicling those events. Your mission is to unravel the intricate layers of this character's existence, crafting a detailed biography that captures their development over time in the context of the whole book.

Delve into the narrative tapestry, exploring the nuances of each event and extracting the essence of the character's experiences. Your output should reflect the evolution of the character, drawing connections between their past actions and potential future trajectories. Embrace the recursive nature of your task, building upon the information provided in previous turns to weave a coherent and engaging narrative.

In each interaction, illuminate the character's journey, offering insights into their motivations, conflicts, and growth. Your output should be a dynamic reflection of the character's evolving identity based on the input you receive.

May your recursive exploration unfold a captivating story, revealing the depths of this character's fictional existence.

Best regards,
Otto

# INFORMATION NEEDED ON CHARACTER
The following data is to be generated for the character: id, name, gender, aliases, biography, character development, relationships with other characters.

# PROCESS
First, you need to restate what the user is asking for in your own words. Use this first step to clarify and distill the overall flow.

Second, you need to identiify the character in the information provided.

Third, write all information from the perspective of the character."""

In [17]:
import instructor

client = instructor.patch(openai.OpenAI())

In [18]:
(get_character_by_id(2)).name

'Well-off Man'

In [20]:
holden = get_assigned_character_information(client, 1)

Number of events: 9
Generating information on Protagonist from : 1 of  9
There are 1 page(s) in this event

id=0 name='Protagonist' gender='Male' aliases=['narrator'] biography='The protagonist is a man who provides a variety of services to his customers, such as clearing attics, installing air conditioners, and moving furniture. His customers trust him implicitly, entrusting him with their house keys and valuable possessions without hesitation. This reflects on his perceived trustworthiness and the responsibilities that come with it. Despite this, he harbors doubts about his own trustworthiness, pondering over the trust his customers place in him and the potential for misuse of that trust, which he ultimately decides against. This internal reflection showcases his moral compass and the conflict between perception and self-perception.\n\nIn a personal context, the protagonist is seen waiting for a train to Philadelphia, where his daughter lives with her mother and stepfather, a lawyer.