In [1]:
from llm_engines import ChatgptLLM, LLMApi
from agent_factory import create_groupchat, unique_topics, get_persona_by_topics, gen_name
import random, time, textwrap, os
from IPython.display import HTML, display


# first of all, create a thesis_generations folder
if not os.path.exists('thesis_generations'):
    os.makedirs('thesis_generations')


# extract 100 unique topics or load them from a file if it already exists
if not os.path.exists('thesis_generations/sampled_topics.txt'):
    # get Southern Ice Tea out of the way
    unique_topics.remove('Southern Ice Tea')
    sampled_topics = random.sample(unique_topics, 100)

    with open('thesis_generations/sampled_topics.txt', 'w') as f:
        for topic in sampled_topics:
            f.write(topic + '\n')
else:
    with open('thesis_generations/sampled_topics.txt', 'r') as f:
        sampled_topics = f.read().splitlines()


## Naive conversation generation

In [2]:
# pick 100 persona couples based on the sampled topics but only if they don't already exist
if not os.path.exists('thesis_generations/personas_pairs.json'):
    personas_pairs = []
    for topic in sampled_topics:
        persona1 = get_persona_by_topics([topic])
        persona2 = get_persona_by_topics([topic])
        # crete a name for the couple
        name1 = gen_name(persona=persona1)
        name2 = gen_name(persona=persona2)
        personas_pairs.append({'name1': name1, 'persona1': persona1, 'name2': name2, 'persona2': persona2, 'topic': topic})
    

In [3]:

# load the persona pairs from the file
from agent_factory import gen_name
import json

if os.path.exists('thesis_generations/personas_pairs.json'):
    with open('thesis_generations/personas_pairs.json', 'r') as f:
        personas_pairs = json.load(f)
else:
    personas_pairs = [] 

    for topic in sampled_topics:
        persona1 = get_persona_by_topics([topic])
        persona2 = get_persona_by_topics([topic])
        # crete a name for the couple
        name1 = gen_name(persona=persona1)
        name2 = gen_name(persona=persona2)
        personas_pairs.append({'name1': name1, 'persona1': persona1, 'name2': name2, 'persona2': persona2, 'topic': topic})
    with open('thesis_generations/personas_pairs.json', 'w') as f:
        json.dump(personas_pairs, f)
    
    
from places_replication import NaiveConversationAgent

# for each pair, create a group chat and run the PlacesReplication
groupchats = []


for pair in personas_pairs:
    groupchat = [NaiveConversationAgent(name=pair['name1'], persona=pair['persona1']), NaiveConversationAgent(name=pair['name2'], persona=pair['persona2'])]
    groupchats.append(groupchat)

# let's check how many groupchats we have
len(groupchats)

100

In [4]:
# check if two agents with the same persona are in the same groupchat

for i, groupchat in enumerate(groupchats):
    tries=0
    while groupchat[0].persona == groupchat[1].persona:
        groupchat[1].persona = get_persona_by_topics([personas_pairs[i]['topic']])
        groupchats[i] = groupchat
        print('changed persona of agent 2 in groupchat', i)
        tries += 1
        if tries > 10:
            print("critical topic:", personas_pairs[i]['topic'])
            break   

# let's check if the personas are different
for i, groupchat in enumerate(groupchats):
    assert groupchat[0].persona != groupchat[1].persona, 'personas are the same in groupchat ' + str(i)

# check that names are not longer than 20 characters
for groupchat in groupchats:
    for agent in groupchat:
        if len(agent.name) > 20:
            print('name too long:', agent.name)
            agent.name = gen_name(agent.persona)
            
# check that the agents have different names
for i, groupchat in enumerate(groupchats):
    while groupchat[0].name == groupchat[1].name:
        groupchat[1].name = gen_name(groupchat[1].persona)
        groupchats[i] = groupchat
        print('changed name of agent 2 in groupchat', i)
    

# in each persona, substitute $name$ with the name of the agent
for groupchat in groupchats:
    for agent in groupchat:
        agent.persona = agent.persona.replace('$name$', agent.name)

from Levenshtein import distance

# check that the agents have different names
for i, groupchat in enumerate(groupchats):
    # while names are one edit away from each other
    while distance(groupchat[0].name, groupchat[1].name) < 2:    
        groupchat[1].name = gen_name(groupchat[1].persona)
        groupchats[i] = groupchat
        print('changed name of agent 2 in groupchat', i)
# let's save the personas_pairs with the updated personas

with open('thesis_generations/personas_pairs.json', 'w') as f:
    personas_pairs = [{'name1': groupchat[0].name, 'persona1': groupchat[0].persona, 'name2': groupchat[1].name, 'persona2': groupchat[1].persona, 'topic': personas_pairs[i]['topic']} for i, groupchat in enumerate(groupchats)]
    json.dump(personas_pairs, f)

In [5]:
# for each groupchat, run the generation using the local model
from places_replication import NaiveConversationGeneration
from tqdm.notebook import tqdm

chat_ids_list = []

# check if the chat_ids_list file already exists
if os.path.exists('thesis_generations/naive_gen_chat_ids_list.json'):
    with open('thesis_generations/naive_gen_chat_ids_list.json', 'r') as f:
        chat_ids_list = json.load(f)

# get the index to resume from
resume_index = len(chat_ids_list)

# iterate over the remaining groupchats
for i, groupchat in enumerate(tqdm(groupchats[resume_index:])):
    try:
        convo = NaiveConversationGeneration(agent_list=groupchat, neutral_llm=LLMApi())
        # cache the chat_id for easier retrieval
        chat_ids_list.append(convo.chat_id)
        convo.generate_conversation(min_turns=10, start_conversation=True)
        # save the chat_ids_list
        with open('thesis_generations/naive_gen_chat_ids_list.json', 'w') as f:
            json.dump(chat_ids_list, f)

    except Exception as e:
        print(f"Error occurred during conversation generation for groupchat {i + resume_index}: {str(e)}")
        break


0it [00:00, ?it/s]

In [6]:
# load all the conversations
conversations = []

for chat_id in tqdm(chat_ids_list):
    path="chat_history/naive_chat_history_"+chat_id+".json"
    with open(path, 'r') as f:
        convo = json.load(f)
        conversations.append(convo)

  0%|          | 0/100 [00:00<?, ?it/s]

In [10]:
conversations[:10]

[[[0, 'Vivian', 'Hi!'],
  [1, 'Jasper', ' Hey, how are you doing today?'],
  [2,
   'Vivian',
   " I'm doing well, just got back from a morning jog and feeling energized! How about you?"],
  [3,
   'Jasper',
   " I'm good too, thanks for asking! I've been fascinated by studying tomato plants' growth patterns recently."],
  [4,
   'Vivian',
   " Tomato plants, huh? That's an interesting subject! I've been reading up on Marxist theories and how they tie into urban art, specifically city murals."],
  [5,
   'Jasper',
   " That sounds fascinating as well! I've always admired vibrant city murals and the messages they often convey."],
  [6,
   'Vivian',
   ' Definitely! I went for a jog in the city today, and it felt like the murals were alive in their own way. It got me thinking about how art and politics can intertwine in urban landscapes.'],
  [7,
   'Jasper',
   " I can imagine. It's amazing how our separate interests, yours and mine, can somehow find connections with each other."],
  [8

## Gemma 2 version

In [11]:
# the same but using gemma 2 

chat_ids_list = []

# check if the chat_ids_list file already exists
if os.path.exists('thesis_generations/gemma2_naive_gen_chat_ids_list.json'):
    with open('thesis_generations/gemma2_naive_gen_chat_ids_list.json', 'r') as f:
        chat_ids_list = json.load(f)

# get the index to resume from
resume_index = len(chat_ids_list)

llm=LLMApi(model="turboderp_gemma-2-27b-it-exl2_5.0bpw") 
print(llm.model)

# add system messages to the llm
llm.set_system_prompt("The following are human conversations. They never contain assistant messages or comments, just an intro and the conversation. Nothing else.")

# check if the llm works
llm.generate_response("Hello, how are you?")
# iterate over the remaining groupchats
for i, groupchat in enumerate(tqdm(groupchats[resume_index:])):
    success = False
    while not success:
        try:    
            convo = NaiveConversationGeneration(agent_list=groupchat, neutral_llm=llm)
            # cache the chat_id for easier retrieval
            chat_ids_list.append(convo.chat_id)
            convo.generate_conversation(min_turns=10, start_conversation=True)
            # save the chat_ids_list
            with open('thesis_generations/gemma2_naive_gen_chat_ids_list.json', 'w') as f:
                json.dump(chat_ids_list, f)
            success = True
        except Exception as e:
            print(f"Error occurred during conversation generation for groupchat {i + resume_index}: {str(e)}")
        


turboderp_gemma-2-27b-it-exl2_5.0bpw


  0%|          | 0/100 [00:00<?, ?it/s]

[92mVivian[0m: What's up?
[93mJasper[0m:  Trying to wrap my head around this new Marxist theory article.
[92mVivian[0m:  Oh, yeah? Which one?
[93mJasper[0m:  It's a bit dense, to be honest.  Talks about alienation in
    late-stage capitalism.
[92mVivian[0m:  Hmm, I see.
[93mJasper[0m:   It's interesting, but pretty heavy stuff.
[92mVivian[0m:  Maybe you need a break. Have you been taking care of your
    garden lately?
[93mJasper[0m:  Actually, I haven't had much time.
[92mVivian[0m:  You should try to make some time.
[93mJasper[0m:  I know, I know. It's so therapeutic.
[92mVivian[0m:  Totally. Fresh air and dirt can do wonders for the mind.
[93mJasper[0m:  You're right. Maybe I'll head over there after this article.
[92mVivian[0m:  Good idea!
[92mLuminara[0m: Hello!
[93mIndigo[0m:  Hi Luminara, how are you?
[92mLuminara[0m:  I'm well, thank you. I was just thinking about the
    importance of preservation, you know, on all levels.
[93mIndigo[0m:  Tha