In [5]:
import os
import re
from langchain_openai import ChatOpenAI

In [6]:
with open('.env') as f:
    for line in f.read().splitlines():
        k, v = line.split('=')
        os.environ[k] = v

In [7]:
oai_llm = ChatOpenAI(api_key=os.environ["OAI_KEY_1"], model="gpt-4", base_url=os.environ.get("OAI_BASE_URL", None))

In [8]:
system_prompt = "You are a helpful storywriting assistant."

In [9]:
chapter = open("out/chapter_5.txt").read()

Identify characters in the chapter for use when marking which characters turn it is

In [14]:
messages = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": f"""Give the following story, please point out each named character present in the Scene. List only the characters in a list like the following
1. Character 1 (Also referred to as X)
2. Character 2

{chapter}"""}
]

characters = oai_llm.invoke(messages).content
print(characters)

1. Erik Maria Bark (Also referred to as Erik)
2. Simone (Also referred to as Sixan)
3. Joona Linna (Also referred to as Linna)
4. Daniella Richards (Also referred to as Daniella)
5. Erik's son (Unnamed)
6. Magnus (Mentioned but not present in the scene)
7. The blond man (Unnamed)
8. The fifteen-year-old boy (Unnamed, not present in the scene but referred to)
9. Two uniformed cops (Unnamed)



In [None]:
# given an index i get the ith line and the surrounding 3 lines before and after
def get_lines(lines, i, context=3):
  start = max(0, i - context)
  end = min(len(lines), i + context + 1)
  return "\n".join(lines[start:end])

Here we go over each line of the chapter adding character markers to indicate the turn of the actions and speech. We add surrounding lines and the list of characters as context.

In [None]:
lines = chapter.splitlines()
ouput = ""
for i, line in enumerate(lines):
    messages = [
        { "role": "system", "content": system_prompt },
        { "role": "user", "content": f"""Please insert markers for when the character acting changes, you will be given the surrouding context to help. You should only add markers to the single linge given.
The detective rapidly summarizes the patient's status, concluding, "He hasn't been stabilized. He's in circulatory shock and unconscious."
"Who's the doctor in charge?" asks Erik.
"Daniella Richards."
"She's extremely capable. I'm sure she can—"
"She was the one who asked me to call you. She needs your help. It's urgent."
When Erik returns to the bedroom to get his clothes, Simone is lying on her back, looking at him with a strange, empty expression. A strip of light from the streetlamp is shining in between the blinds.
"I didn't mean to wake you," he says softly.
         
Here are the characters in the schene:
{", ".join(characters)}

Here is the line that you must add the markers to:
"She's extremely capable. I'm sure she can—\""""},
        { "role": "assistant", "content": """<Erik>"She's extremely capable. I'm sure she can—\""""},
        { "role": "user", "content": f"""Please insert markers for when the character acting changes, you will be given the surrouding context to help. You should only add markers to the single linge given.
"Daniella Richards."
"She's extremely capable. I'm sure she can—"
"She was the one who asked me to call you. She needs your help. It's urgent."
When Erik returns to the bedroom to get his clothes, Simone is lying on her back, looking at him with a strange, empty expression. A strip of light from the streetlamp is shining in between the blinds.
"I didn't mean to wake you," he says softly.
"Who was that?" she asks.
"Police... a detective... I didn't catch his name."
         
Here are the characters in the schene:
{", ".join(characters)}

Here is the line that you must add the markers to:
When Erik returns to the bedroom to get his clothes, Simone is lying on her back, looking at him with a strange, empty expression. A strip of light from the streetlamp is shining in between the blinds."""},
        { "role": "assistant", "content": """<Erik>When Erik returns to the bedroom to get his clothes, <Simone>Simone is lying on her back, looking at him with a strange, empty expression. A strip of light from the streetlamp is shining in between the blinds."""},
        { "role": "user", "content": f"""Please insert markers for when the character acting changes, you will be given the surrouding context to help. You should only add markers to the single linge given.
{get_lines(lines, i)}        
         
Here are the characters in the schene:
{", ".join(characters)}

Here is the line that you must add the markers to:
{line}"""}
    ]
    
    output += oai_llm.invoke(
        messages,
    ).content + "\n"
    if i > 16:
        break
print(output)

The precomputed text with the turns serves as a "demo checkpoint" so that we avoid needing to rerun the marker portion again.

In [10]:
output = """<Erik>Erik Maria Bark is yanked reluctantly from his dream when the telephone rings. Before he is fully awake, he hears himself say with a smile, "Balloons and streamers."
<Erik>His heart is pounding from the sudden awakening. Erik has no idea what he meant by these words. The dream is completely gone, as if he had never had it.
<Erik>He fumbles to find the ringing phone, creeping out of the bedroom with it and closing the door behind him to avoid waking Simone. <Joona>A detective named Joona Linna asks if he is sufficiently awake to absorb important information. <Erik>His thoughts are still tumbling down into the dark empty space after his dream as he listens.
<Joona Linna>"I've heard you're very skilled in the treatment of acute trauma," says Linna.
<Erik>"Yes," says Erik.
<Erik>He swallows a painkiller as he listens. <Joona Linna>The detective explains that he needs to question a fifteen-year-old boy who has witnessed a double murder and been seriously injured himself. During the night he was moved from the neurological unit in Huddinge to the neurosurgical unit at Karolinska University Hospital in Solna.
<Erik>"What's his condition?" Erik asks.
<Joona Linna>The detective rapidly summarizes the patient's status, concluding, "He hasn't been stabilized. He's in circulatory shock and unconscious."
<Erik>"Who's the doctor in charge?" asks Erik.
<Joona Linna>"Daniella Richards."
<Erik>"She's extremely capable. I'm sure she can—"
<Joona Linna>"She was the one who asked me to call you. She needs your help. It's urgent."
<Erik>When Erik returns to the bedroom to get his clothes, <Simone>Simone is lying on her back, looking at him with a strange, empty expression. A strip of light from the streetlamp is shining in between the blinds.
<Erik>"I didn't mean to wake you," he says softly.
<Simone>"Who was that?" she asks.
<Erik>"Police... a detective... I didn't catch his name."
<Simone>"What's it about?"
<Erik>"I have to go to the hospital," he replies. "They need some help with a boy.\""""

Split the markered text by the markers and convert into a sharegpt format

In [11]:
pattern = re.compile(r'<(.+?)>([^<]+)')

# Find all matches
matches = pattern.findall(output)

# Create the list of dictionaries
conversation = []

for match in matches:
    name, value = match
    conversation.append({"from": name, "value": value.strip() })

In [12]:
def split_quoted_and_non_quoted(s):
    # Regular expression to match quoted and non-quoted parts
    pattern = re.compile(r'"([^"]+)"|([^"]+)')
    
    # Find all matches
    matches = pattern.finditer(s)
    
    # Process matches to form the result list
    result = []
    for match in matches:
        if match.group(1):
            result.append({"content": match.group(1).strip(), "type": "quoted"})
        elif match.group(2):
            # Further split non-quoted parts by spaces while preserving spaces
            result.append({"content": match.group(2).strip(), "type": "unquoted"})
    
    return result

clarify character incase of slight hallucinations mispelling name or leaving out some part of it

In [15]:
for message in conversation:
        messages = [
                { "role": "system", "content": system_prompt },
                { "role": "user", "content": f"""Given the following list of characters:
{characters}

Which character is the following referring to, use only the characters proper name as in the list.
Joona"""},
                { "role": "assistant", "content": "Joona Linna" },
                { "role": "user", "content": f"""Given the following list of characters:
{characters}

Which character is the following referring to, use only the characters proper name as in the list.
Maria Bark"""},
                { "role": "assistant", "content": "Erik Maria Bark" },
                { "role": "user", "content": f"""Given the following list of characters:
{characters}

Which character is the following referring to, use only the characters proper name as in the list.
{message["from"]}"""},
        ]
        output = oai_llm.invoke(messages, stop=["\n"])
        print(f"{message['from']}: {output.content}")
        message["from"] = output.content

Erik: Erik Maria Bark
Erik: Erik Maria Bark
Erik: Erik Maria Bark
Joona: Joona Linna
Erik: Erik Maria Bark
Joona Linna: Joona Linna
Erik: Erik Maria Bark
Erik: Erik Maria Bark
Joona Linna: Joona Linna
Erik: Erik Maria Bark
Joona Linna: Joona Linna
Erik: Erik Maria Bark
Joona Linna: Joona Linna
Erik: Erik Maria Bark
Joona Linna: Joona Linna
Erik: Erik Maria Bark
Simone: Simone
Erik: Erik Maria Bark
Simone: Simone
Erik: Erik Maria Bark
Simone: Simone
Erik: Erik Maria Bark


Here we rewrite the non-spoken dialogue into first person e.g. Erik does xyz -> I do xyz  
We give it the full context of the character as was originally pointed out in the initial prompt to hopefully get things such as nicknames or similar in the context as well.

In [16]:
def rewrite_perspective(char, message):
    chars = [re.sub(r"^\d+. ", "", character) for character in characters.splitlines()]
    char = [c for c in chars if char in c][0]

    messages = [
        { "role": "system", "content": system_prompt },
        { "role": "user", "content": f"""Please rewrite the following line to be from a first person present tense perspective. The character acting out the following line is Erik
Avoid using the characters name in the rewritten text, keep in mind that pronouns may refer to other characters and should as such stay the same.
Do not clarify who {char} is, like with "I, {char}" or "I, occupation"

Here is the line to rewrite:
says Erik""" },
        { "role": "assistant", "content": "I say" },
        { "role": "user", "content": f"""Please rewrite the following line to be from a first person present tense perspective. The character acting out the following line is Erik
Avoid using the characters name in the rewritten text, keep in mind that pronouns may refer to other characters and should as such stay the same.
Do not clarify who {char} is, like with "I, {char}" or "I, occupation"

Here is the line to rewrite:
When Erik returns to the bedroom to get his clothes,""" },
        { "role": "assistant", "content": "When I return to the bedroom to get my clothes," },
        { "role": "user", "content": f"""Please rewrite the following line to be from a first person present tense perspective. The character acting out the following line is {char}
Avoid using the characters name in the rewritten text, keep in mind that pronouns may refer to other characters and should as such stay the same.
Do not clarify who {char} is, like with "I, {char}" or "I, occupation"
         
Here is the line to rewrite:
{message}""" },
    ]

    return oai_llm.invoke(messages).content

results = []
for message in conversation:
    # remove speech from message as that shouldn't change perspective
    split_message = split_quoted_and_non_quoted(message["value"])
    # then call rewrite_perspective on each part of remaining text individually and join them back with the speech in the correct position
    rewritten = []
    for part in split_message:
        if part["type"] == "unquoted":
            content = rewrite_perspective(message["from"], part["content"])
        else:
            content = f'"{part["content"]}"'
        rewritten.append(content)
    print(" ".join(rewritten))
    results.append({ "from": message["from"], "value": " ".join(rewritten) })

I'm yanked reluctantly from my dream when the telephone rings. Before I'm fully awake, I hear myself say with a smile, "Balloons and streamers."
My heart is pounding from the sudden awakening. I have no idea what I meant by these words. The dream is completely gone, as if I had never had it.
I fumble to find the ringing phone, creeping out of the bedroom with it and closing the door behind me to avoid waking Simone.
I, a detective, ask if he is sufficiently awake to absorb important information.
My thoughts are still tumbling down into the dark empty space after my dream as I listen.
"I've heard you're very skilled in the treatment of acute trauma," I say.
"Yes," I say.
I swallow a painkiller as I listen.
I explain that I need to question a fifteen-year-old boy who has witnessed a double murder and been seriously injured himself. During the night, he was moved from the neurological unit in Huddinge to the neurosurgical unit at Karolinska University Hospital in Solna.
"What's his condit

Concatenate consecutive messags from the same character

In [17]:
current_char = None
new_results = []
current_mes = ""
for mes in results:
    if mes['from'] != current_char:
        if current_char:
            new_results.append({ "from": current_char, "value": current_mes})
        current_mes = mes['value']
        current_char = mes['from']
    else:
        current_mes += f" {mes['value']}"

new_results.append({ "from": current_char, "value": current_mes})
print(new_results)

[{'from': 'Erik Maria Bark', 'value': 'I\'m yanked reluctantly from my dream when the telephone rings. Before I\'m fully awake, I hear myself say with a smile, "Balloons and streamers." My heart is pounding from the sudden awakening. I have no idea what I meant by these words. The dream is completely gone, as if I had never had it. I fumble to find the ringing phone, creeping out of the bedroom with it and closing the door behind me to avoid waking Simone.'}, {'from': 'Joona Linna', 'value': 'I, a detective, ask if he is sufficiently awake to absorb important information.'}, {'from': 'Erik Maria Bark', 'value': 'My thoughts are still tumbling down into the dark empty space after my dream as I listen.'}, {'from': 'Joona Linna', 'value': '"I\'ve heard you\'re very skilled in the treatment of acute trauma," I say.'}, {'from': 'Erik Maria Bark', 'value': '"Yes," I say. I swallow a painkiller as I listen.'}, {'from': 'Joona Linna', 'value': 'I explain that I need to question a fifteen-year-

format into ChatML for visualization purposes

In [18]:
# format into chatml
def turn(char, message):
    return f"""<|im_start|>{char}
{message}<|im_end|>"""

print("\n".join([turn(result["from"], result["value"]) for result in new_results]))

<|im_start|>Erik Maria Bark
I'm yanked reluctantly from my dream when the telephone rings. Before I'm fully awake, I hear myself say with a smile, "Balloons and streamers." My heart is pounding from the sudden awakening. I have no idea what I meant by these words. The dream is completely gone, as if I had never had it. I fumble to find the ringing phone, creeping out of the bedroom with it and closing the door behind me to avoid waking Simone.<|im_end|>
<|im_start|>Joona Linna
I, a detective, ask if he is sufficiently awake to absorb important information.<|im_end|>
<|im_start|>Erik Maria Bark
My thoughts are still tumbling down into the dark empty space after my dream as I listen.<|im_end|>
<|im_start|>Joona Linna
"I've heard you're very skilled in the treatment of acute trauma," I say.<|im_end|>
<|im_start|>Erik Maria Bark
"Yes," I say. I swallow a painkiller as I listen.<|im_end|>
<|im_start|>Joona Linna
I explain that I need to question a fifteen-year-old boy who has witnessed a do