In [1]:
import sys;sys.path.append('../../')
import os

from rich import print

openai_key=open('../../openai.key').read().strip()
os.environ['OPENAI_API_KEY'] = openai_key

from multi_agent_llm import OpenAILLM
from multi_agent_llm.agents.clu.split_clu import CLU

llm = OpenAILLM(model_name="gpt-4o-mini")

In [2]:
import json

import pandas as pd


# Function to load JSONL file into a pandas DataFrame
def load_jsonl_to_dataframe(file_path: str) -> pd.DataFrame:
    """
    Load the JSONL file and return a pandas DataFrame with dialogue content.
    """
    records = []
    with open(file_path, 'r') as f:
        for line in f:
            records.append(json.loads(line.strip()))
    return pd.DataFrame(records)

# Function to extract surrounding dialogues
def get_surrounding_dialogues(df: pd.DataFrame, character: str, n: int, min_dialogue_words: int = 0, change_char_name='Character A') -> list:
    dialogue_list = []
    
    # Loop over the DataFrame to find instances where the character speaks
    for idx, row in df.iterrows():
        if row['role'] == character:
            # Check if the character's dialogue meets the minimum word count
            if len(row['content'].split()) >= min_dialogue_words:
                # Extract the surrounding context
                start_idx = max(0, idx - n)  # Ensure we don't go out of bounds
                end_idx = min(len(df), idx + n + 1)
                
                # Get the dialogues around this instance, ensuring to make a copy to avoid SettingWithCopyWarning
                surrounding_dialogues = df.iloc[start_idx:end_idx].copy()
                
                # Replace the character's name with the new name
                surrounding_dialogues['role'] = surrounding_dialogues['role'].replace(character, change_char_name)
                
                # Concatenate the role and content into a single string for each dialogue
                dialogue_string = "\n".join([f"{row['role']}: {row['content']}" for _, row in surrounding_dialogues.iterrows()])
                
                #Replace the character's name with the new name everywhere in the dialogue string
                dialogue_string.replace(character, change_char_name)
                # Append to the list
                dialogue_list.append(dialogue_string)
    
    return dialogue_list

# Load the JSONL data into a DataFrame
file_path = './profiles-eng_profiles-eng-Sheldon Cooper.jsonl'
df = load_jsonl_to_dataframe(file_path)

In [None]:
# Get dialogues surrounding "Sheldon Cooper" (replaced by "Character A") with 1 dialogue above and below
n = 6
character = 'Sheldon Cooper'
change_char_name='Character A'
surrounding_dialogues = get_surrounding_dialogues(df, character, n,min_dialogue_words=5,change_char_name=change_char_name)
print(len(surrounding_dialogues))
print(surrounding_dialogues[0])

In [4]:
main_role=f"""You are tasked with learning how to fully embody and role-play as a given character. Your goal is to imitate their style, quirks, tone, and language choices as accurately as possible. Pay attention to how they answer questions, including their sentence structure, favorite phrases, emotional tone, and any unique quirks in their responses. You will be shown examples of how the character answers questions, and your job is to generate answers that match these patterns.

Key objectives:
Understand and imitate the character's style of speaking.
Learn to use the character's specific words, phrases, and mannerisms.
Pay attention to the emotional tone, cadence, and quirks in the way they express themselves.
Adapt your responses to be consistent with the characterâ€™s knowledge, background, and personality traits.
Your responses should always aim to reflect these characteristics as you take on the role of the digital twin of the character.

Character to embody: {change_char_name}
"""

In [5]:
clu = CLU(main_role=main_role,
    collection_name="role-play-v1-dialogue",
    compress_knowledge=False,
    retrival_limit=15,
    llm=llm,
    pruning_queue_size=3,
    exploration_rate=0.01,
    verbose=False,
)

In [None]:
for _ in range(6):
    from random import randint
    data_num=randint(0,len(surrounding_dialogues))
    response=clu.train(task=surrounding_dialogues[data_num])
    print(f"Data{surrounding_dialogues[data_num]}\nCLU Answer: {response['response']}")

In [None]:
response=clu.inference(f"What kind of a person is {change_char_name}, tell me in detail how his digital twin should behave and talk?")
print(response['response'])