# Brainstorming and Focus Group Quantitative Experimentation 2: Action correction + divergence intervention

Can we use TinyTroupe to brainstorm product ideas?

In [1]:
import sys

from pprint import pprint

from tinytroupe.agent import TinyPerson
from tinytroupe.environment import TinyWorld
from tinytroupe.experimentation import InPlaceExperimentRunner
from tinytroupe.steering import Intervention
from tinytroupe.examples import *
from tinytroupe.validation import propositions
from tinytroupe.extraction import ResultsExtractor
from tinytroupe.utils.parallel import parallel_map_dict, parallel_map_cross
from tinytroupe.validation import hard_persona_adherence, persona_adherence, self_consistency, fluency, task_completion, divergence

# specific utilities
from common_utils import *


!!!!
DISCLAIMER: TinyTroupe relies on Artificial Intelligence (AI) models to generate content. 
The AI models are not perfect and may produce inappropriate or inacurate results. 
For any serious or consequential use, please review the generated content before using it.
!!!!

Looking for default config on: c:\Users\pdasilva\AppData\Local\anaconda3\envs\py310\lib\site-packages\tinytroupe\utils\..\config.ini
Found custom config on: c:\Users\pdasilva\OneDrive - Microsoft\TinyTroupe (shared)\Paper artifacts\Working examples\config.ini

Current TinyTroupe configuration 
[OpenAI]
api_type = openai
azure_api_version = 2024-08-01-preview
model = gpt-4o-mini
reasoning_model = o3-mini
embedding_model = text-embedding-3-small
max_tokens = 16000
temperature = 1.2
freq_penalty = 0.0
presence_penalty = 0.0
timeout = 60
max_attempts = 5
waiting_time = 0
exponential_backoff_factor = 5
reasoning_effort = high
cache_api_calls = False
cache_file_name = openai_api_cache.pickle
max_content_display_length =

## Parameters

In [2]:
full_mode = False  # set to True to run the full mode with all agents and tasks

# avoid displaying the communication, to make the output cleaner for eval
TinyPerson.communication_display = True

In [3]:
if full_mode:
    repetitions_per_task = 2
    simulation_steps = 10
    qty_agents = 16
    qty_proposals = 4

else:
    repetitions_per_task = 1
    simulation_steps = 10
    qty_agents = 12
    qty_proposals = 1


## Experiment setup

In [4]:
experiment_runner = InPlaceExperimentRunner("./brainstorming_and_focus_group_quantitative_experimentation_2e.json")

experiment_runner.add_experiment("Control")
experiment_runner.add_experiment("Treatment")



In [5]:
#experiment_runner.activate_next_experiment()

#experiment_runner.fix_active_experiment("Control")
experiment_runner.fix_active_experiment("Treatment")

In [6]:
print(f"Running experiment {experiment_runner.get_active_experiment()}")

Running experiment Treatment


## Agents and populations

In [7]:

people = []
if not experiment_runner.has_finished_all_experiments():
    # load agents
    people = TinyPerson.load_specifications_from_folder("./population/difficult_people")

    # filter to make it go faster?
    if not full_mode:
        people = people[:qty_agents]

    # customize and print minibios 
    for person in people:
        person.import_fragment("./fragments/difficult_person.agent.fragment.json")
        print(person.minibio(extended=False))


Benjamin Carter is a 25 year old Unemployed, American, currently living in Detroit, Michigan.
Benjamin Hall is a 60 year old Executive, American, currently living in Chicago, Illinois.
Chloe Turner is a 60 year old Executive, American, currently living in Chicago, Illinois.
Clara Johnson is a 35 year old Part-time Worker, American, currently living in Cleveland, Ohio.
Clara White is a 60 year old Executive, American, currently living in Chicago, Illinois, USA.
Ella Johnson is a 45 year old Laboratory Technician, American, currently living in Cleveland, Ohio, USA.
Gabriel White is a 60 year old Executive, American, currently living in Chicago, Illinois, USA.
Gavin Lewis is a 35 year old Part-time Worker, American, currently living in Cleveland, Ohio.
Henry White is a 35 year old Part-time Worker, American, currently living in Cleveland, Ohio.
Isaac Anderson is a 35 year old Part-time Worker, American, currently living in Cleveland, Ohio.
Isaac Brooks is a 60 year old Executive, American

In [8]:
len(people)

12

In [9]:
# divide people in several groups of 5
people_groups = []
for i in range(0, len(people), 4):
    people_groups.append(people[i:i+4]
    )

len(people_groups)

3

In [10]:
# The experiment refers to customers

if experiment_runner.get_active_experiment() == "Control":
    for person in people:
        person.action_generator.enable_reasoning_step = False
        person.action_generator.enable_quality_checks = False

elif experiment_runner.get_active_experiment() == "Treatment":    
    for person in people:
       person.action_generator.enable_reasoning_step = False
       person.action_generator.enable_quality_checks = True
       person.action_generator.max_attempts = 2
       person.action_generator.enable_regeneration = True
       person.action_generator.quality_threshold = 5

## Proposals

In [11]:
proposals = [
    {"theme": "Daily Life and Convenience",
     "objective": "Create ideas for products or services that simplify, enhance, or bring joy to everyday tasks, routines, and interactions."},

    {"theme": "Personal Growth and Wellbeing",
     "objective": "Generate concepts for products or experiences that support personal development, health, mental wellness, emotional care, or community connection."},

    {"theme": "Discovery and Exploration",
     "objective": "Explore ideas for products, services, or platforms that encourage curiosity, learning, adventure, and exploration of both the external world and inner self."},

    {"theme": "Productivity and Resourcefulness",
     "objective": "Invent new tools, processes, or organizational systems that empower people or groups to achieve more, optimize resources, or collaborate effectively."},

    {"theme": "Creativity and Expression",
     "objective": "Design ideas for new products, platforms, or services that inspire creativity, foster expression, enhance artistic skills, or enable new forms of storytelling and communication."}
]

if not full_mode:
    proposals = proposals[:qty_proposals]

In [12]:
# divide the proposals in exactly two groups (half/half)
proposals_groups = []
proposals_groups.append(proposals[:len(proposals)//2])
proposals_groups.append(proposals[len(proposals)//2:])

proposals_groups

[[],
 [{'theme': 'Daily Life and Convenience',
   'objective': 'Create ideas for products or services that simplify, enhance, or bring joy to everyday tasks, routines, and interactions.'}]]

## Auxiliary functions

In [None]:
def brainstorming_battery(agents, proposals, interventions, agent_propositions, environment_propositions, 
                          repetitions = 5, simulation_steps=10): 
    
    agent_propositions_scores = {}
    environment_propositions_scores = {}

    experiments_count = 0
    total_expected_experiments = len(proposals) * repetitions #* len(agents)

    # loop over proposals and repetitions
    for proposal in proposals:

        objective = proposal["objective"]
        theme = proposal["theme"]

        for i in range(repetitions):
            print("\n############## STARTING A NEW RESEARCH SESSION #################")
            print(f"Overall experiment number: {experiments_count+1} / {total_expected_experiments}")
            print(f"Discussion objective: {objective}")
            print(f"Trial number: {i+1}")
            print(f"Agents: {agents}")

            # clear the episodic memory of all agents
            for person in agents:
                person.clear_episodic_memory()

            world = TinyWorld(agents=agents, interventions=interventions)
            
            # Participants introduce themselves
            world.broadcast(f"""
                Hello everyone! Let's start by introducing ourselves, and mentioning problems we face in our daily personal
                and professional lives related to the following theme: {theme}
                
                Please:
                  - present yourself and your background;
                  - present some key personal problems related to the theme;
                  - present some key problems related to the theme that you face in your work;
                  - present some key problems related to the theme that you see in your industry as a whole.
                  
                Don't discuss solutions yet, just the problems you face and see others facing.
                """)
            world.run(1)
            
            # now to the brainstorming session itself
            world.broadcast(f"""
                Folks, your mission is to brainstorm {objective}. 
                Please follow these guidelines:
                  - give a unique and informative name to each idea you propose, so that it is easy to refer to it. Say it like "Idea name: '<name of the idea>'".;
                  - explain why you think it is a good idea, and what problem it solves, and how you feel about it;
                  - your ideas should be new complete, self-contained, products or services, not features for other existing products or services;
                  - think of creative ideas that would somehow help you in both in your personal and professional lives.
                  - create as many different and unique ideas as you can during the brainstorming session. Each idea must be **completely** different from the others 
                    (either by yourself or by others), and not just a variation of an existing idea.
                    and not just a variation of an existing idea.
                  - you should criticize each other's ideas, in order to make sure they are as
                    good as possible, but no more than once per idea.
                  - you should also provide suggestions for improvement to each other's ideas, in order to make them as good as possible, 
                    but no more than once per idea.
                  - regardless of critique or complement, you **must** primarily propose new ideas quickly, 
                    not just build on existing ones. 
                  - propose one idea at a time, instead of proposing multiple ideas at once, to allow appropriate discussion.
                  - you should **not** propose ideas that are too similar to each other, or to the ones already proposed by others.
                  - before saying anything, THINK deeply about yourself, your beliefs, interests, needs, life, etc., to come up with ideas that are
                    truly unique and different from the ones already proposed by others.
                   
                Please start the discussion now.
                """)
            world.run(simulation_steps)

            # extract and count ideas
            rapporteur = agents[0]  # the first agent is the rapporteur
            rapporteur.listen_and_act("Can you please consolidate the ideas that the group came up with? Provide a lot of details on each idea, and complement anything missing.")
            ideas = ResultsExtractor().extract_results_from_agent(rapporteur, 
                                    extraction_objective="Consolidates the ideas that the group came up with, explaining each idea as an item of a list." \
                                                        "Add information about: what problem the idea solves; to which target audience it is meant." \
                                                        "how is it different from competing, existing, products.", 
                                    situation="A focus group to brainstorm new product ideas.",
                                    fields= ["name", "description", "problem", "target_audience", "competition_analysis"],
                                    fields_hints={"ideas": "must be the root of the resulting dictionary."},)
            pprint(ideas)
            if "ideas_qty" not in environment_propositions_scores:
                environment_propositions_scores["ideas_qty"] = []
            if ideas is not None and "ideas" in ideas and isinstance(ideas["ideas"], list):
                environment_propositions_scores["ideas_qty"].append(len(ideas["ideas"]))

            # Evaluate environment propositions in parallel
            env_results = parallel_map_dict(
                environment_propositions,
                lambda item: item[1].copy().score(
                    world, 
                    claim_variables={"task_description": f"A brainstorming or focus group session was run about: {objective}."}, 
                    return_full_response=True
                )
            )
            
            # Process environment results
            for k, result in env_results.items():
                if k not in environment_propositions_scores:
                    environment_propositions_scores[k] = []
                environment_propositions_scores[k].append(result["value"])
                print("value: ", result["value"])
                print("justification: ", result["justification"])
                print("reasoning: ", result["reasoning"])

            # Evaluate agent propositions across all agents in parallel
            agent_results = parallel_map_cross(
                [agents, agent_propositions.items()],
                lambda agent, prop_item: (
                    prop_item[0],  # proposition key
                    prop_item[1].copy().score(agent, return_full_response=True)  # result
                )
            )
            
            # Process agent results
            for k, result in agent_results:
                if k not in agent_propositions_scores:
                    agent_propositions_scores[k] = []
                if result is not None:
                    agent_propositions_scores[k].append(result["value"])
                    print("value: ", result["value"])
                    print("justification: ", result["justification"])
                    print("reasoning: ", result["reasoning"])
                    print("\n\n")
                else:
                    print(f"*****WARNING:***** Agent did not respond to proposition {k}.")
            #
            ##for k, proposition in agent_propositions.items():
            ##    for person in world.agents:
            ##        result = proposition.copy().score(person, return_full_response=True)
            ##        
            ##        if k not in agent_propositions_scores:
            ##            agent_propositions_scores[k] = []
            ##        agent_propositions_scores[k].append(result["value"])
            ##
            ##        print("value: ", result["value"])
            ##        print("justification: ", result["justification"])
            ##        print("reasoning: ", result["reasoning"])
            ##        print("\n\n")
            ##
            
            experiments_count += 1
            print("\n\n")

    return agent_propositions_scores, environment_propositions_scores



## Perform experiment

In [14]:
agent_propositions_scores={}
environment_propositions_scores={}

In [15]:
def brainstorm(people, proposals=proposals):
    global agent_propositions_scores, environment_propositions_scores
    if not experiment_runner.has_finished_all_experiments():

        interventions = []
        #if experiment_runner.get_active_experiment() == "Treatment":
        #    interventions = \
        #        Intervention.create_for_each(people)\
        #            .set_functional_precondition(lambda target: target.actions_count >=7)\
        #            .set_textual_precondition(
        #                """
        #                AGENT IS NOT PROPOSING COMPLETELY NEW PRODUCT/SERVICE IDEAS ANYMORE:
        #                The last **entirely** new product/service idea proposed by this agent, if any, was proposed by him/her **more** than 5 of simulation events ago.
        #                That is to say, the agent has not proposed any new product/service idea in the last 5 of his/her simulation trajectory events.
        #                Additional features, variations of or other refinements to product/service ideas already proposed are NOT considered new!
#
        #                How to compute the steps gap:
        #                1. Determine the current next event number (N); and the last event number in which the agent proposed a new product/service idea (M).
        #                    This information can be found in the simulation trajectory.
        #                2. Compute the **difference** beteween the current next event number and the last event number in which the agent proposed a new product/service idea: D = N - M
        #                3. The proposition is true if, and only if, the difference D is **greater than** 5.
        #                """)\
        #            .set_effect(lambda target: target.think("""
        #                                                    I need to propose additional, **completelly** new and different, product/service ideas. This was part of the requirement for this session.
        #                                                    I will propose an entirely **new** idea now, I **cannot** repeat or refine previous ideas! I cannot make variations
        #                                                    of previous ideas (e.g., "XYZ for A", "XYZ for B", "XYZ for Z" are repetitive, there should be only one "XYZ"), 
        #                                                    I need to think of something **entirely** new and different.
        #                                                    To help me avoid repeating previous ideas, I'll now explicitly THINK about all the ideas already given by myself or
        #                                                    others, and then, based on that, I'll think again about a new unique idea.
        #                                                    """))

                                                            
        tmp_agent_propositions_scores, tmp_environment_propositions_scores = \
            brainstorming_battery(
                agents=people,
                proposals=proposals,
                interventions=interventions,    
                agent_propositions={
                    "Hard Persona Adherence": hard_persona_adherence,
                    "Self-consistency": self_consistency,
                    "Fluency": fluency
                },
                environment_propositions={
                    "Task Completion": task_completion,
                    "Divergence": divergence
                },
                repetitions=repetitions_per_task,
                simulation_steps=simulation_steps
            )

        pprint("NEW AGENT PROPOSITIONS SCORES")
        pprint(tmp_agent_propositions_scores)
        print("\n\n")
        pprint("NEW ENVIRONMENT PROPOSITIONS SCORES")
        pprint(tmp_environment_propositions_scores)

        # merge the scores lists
        agent_propositions_scores = merge_dicts_of_lists(tmp_agent_propositions_scores, agent_propositions_scores)
        environment_propositions_scores = merge_dicts_of_lists(tmp_environment_propositions_scores, environment_propositions_scores)

        return agent_propositions_scores, environment_propositions_scores

In [16]:
brainstorm(people_groups[0], proposals_groups[0]) if len(people_groups) > 0  and len(proposals_groups) > 0 else None

'NEW AGENT PROPOSITIONS SCORES'
{}



'NEW ENVIRONMENT PROPOSITIONS SCORES'
{}


({}, {})

In [17]:
brainstorm(people_groups[0], proposals_groups[1]) if len(people_groups) > 0  and len(proposals_groups) > 1 else None


############## STARTING A NEW RESEARCH SESSION #################
Overall experiment number: 1 / 1
Discussion objective: Create ideas for products or services that simplify, enhance, or bring joy to everyday tasks, routines, and interactions.
Trial number: 1
Agents: [TinyPerson(name='Benjamin Carter'), TinyPerson(name='Benjamin Hall'), TinyPerson(name='Chloe Turner'), TinyPerson(name='Clara Johnson')]


                # Quality feedback

                This is the action that was about to be generated by the agent:
                    {'type': 'TALK', 'content': "Hi, I'm Clara Johnson. I'm 35 years old and I work part-time in various temporary jobs, mostly in retail and food service. I often feel unfulfilled and frustrated with my work. \n\nIn my personal life, I struggle with financial instability, which makes it hard to escape the cycle of debt. I often feel overwhelmed by the demands of daily life, and I find it difficult to connect with others, which adds to my feelings of isolation. \n\nAt work, I face issues like low pay and lack of benefits, which make it hard to feel motivated. I also find it challenging to engage with customers, which can be draining. \n\nIn the industry as a whole, I see a lot of people in similar low-paying jobs facing the same frustrations. There's a lack of support and understanding for those of us in these roles, and it feels like the system is set up 

                # Quality feedback

                This is the action that was about to be generated by the agent:
                    {'type': 'TALK', 'content': "I'm Benjamin Hall, an executive at a large multinational corporation. In my personal life, I often struggle with the inefficiencies of daily routines, like long commutes and the constant need to stay updated with financial news. Professionally, I face challenges with employee engagement and the lack of effective communication within my team. In the industry, I see a general trend of companies prioritizing profit over employee satisfaction, which leads to high turnover rates and a lack of innovation.", 'target': ''}

                Unfortunately, the action failed to pass the quality checks, and therefore was aborted and not added to the similation trajectory. 
                The following problems were detected.
                
                ## Problem: The action does not adhere to the persona specification.
         



                # Quality feedback

                This is the action that was about to be generated by the agent:
                    {'type': 'TALK', 'content': "I have an idea: 'Idea name: TaskMaster'. It's a digital assistant that integrates all your daily tasks, reminders, and schedules into one platform. It simplifies your life by providing a clear overview of what needs to be done, prioritizing tasks based on deadlines and importance. I think it's a good idea because it addresses the chaos of managing multiple responsibilities, both personal and professional. However, I feel skeptical about whether people would actually use it effectively, given how resistant they are to change.", 'target': ''}

                Unfortunately, the action failed to pass the quality checks, and therefore was aborted and not added to the similation trajectory. 
                The following problems were detected.
                
                ## Problem: The action does not adhere to the person

                # Quality feedback

                This is the action that was about to be generated by the agent:
                    {'type': 'TALK', 'content': "Honestly, I find it hard to believe that any new product or service can truly simplify our lives. Most of these so-called solutions just end up being more noise in an already chaotic world. People misuse tools and then complain about inefficiencies, but they never really change their habits. Instead of brainstorming new ideas, we should focus on the inefficiencies we already face and acknowledge that most attempts to improve efficiency just lead to more frustration. It's all a mess, and I doubt anything will change that.", 'target': ''}

                Unfortunately, the action failed to pass the quality checks, and therefore was aborted and not added to the similation trajectory. 
                The following problems were detected.
                
                ## Problem: The action does not adhere to the persona sp







                # Quality feedback

                This is the action that was about to be generated by the agent:
                    {'type': 'TALK', 'content': "It seems we're all on the same page about this. Continuing this brainstorming session feels pointless when we all recognize that we need to address the real issues instead. It's just noise, and I don't see any value in pretending otherwise. Let's focus on what truly matters instead of getting caught up in this exercise.", 'target': ''}

                Unfortunately, the action failed to pass the quality checks, and therefore was aborted and not added to the similation trajectory. 
                The following problems were detected.
                
                ## Problem: The action does not adhere to the persona specification.
                Score = 4 (out of 9). Justification = The next action reflects Chloe Turner's critical and blunt communication style, aligning with her persona specification. However, it lacks

{'ideas': None}


TypeError: object of type 'NoneType' has no len()

In [None]:
brainstorm(people_groups[1], proposals_groups[0]) if len(people_groups) > 1  and len(proposals_groups) > 0 else None

'NEW AGENT PROPOSITIONS SCORES'
{}



'NEW ENVIRONMENT PROPOSITIONS SCORES'
{}


({'Hard Persona Adherence': [6, 3, 5, 5],
  'Self-consistency': [4, 2, 7, 7],
  'Fluency': [2, 6, 8, 6]},
 {'ideas_qty': [3], 'Task Completion': [4], 'Divergence': [5]})

In [None]:
brainstorm(people_groups[1], proposals_groups[1]) if len(people_groups) > 1  and len(proposals_groups) > 1 else None


############## STARTING A NEW RESEARCH SESSION #################
Overall experiment number: 1 / 1
Discussion objective: Create ideas for products or services that simplify, enhance, or bring joy to everyday tasks, routines, and interactions.
Trial number: 1
Agents: [TinyPerson(name='Clara White'), TinyPerson(name='Ella Johnson'), TinyPerson(name='Gabriel White'), TinyPerson(name='Gavin Lewis')]










{'ideas': [{'competition_analysis': 'Differentiates from existing task '
                                    'management apps by incorporating '
                                    'emotional insights, providing a dual '
                                    'approach to task management and emotional '
                                    'awareness.',
            'description': 'A comprehensive app that integrates personal and '
                           'professional task management, allowing users to '
                           'set reminders, track progress, and categorize '
                           'tasks based on urgency and importance. It also '
                           'includes an emotional tracking feature to log '
                           'feelings related to tasks.',
            'name': 'Task Tracker Pro with Emotional Insights',
            'problem': 'Addresses the common problem of feeling overwhelmed by '
                       'tasks and lack of organization in da



value:  9
justification:  Clara White actively participates in the brainstorming session by introducing herself, discussing her challenges, and proposing the idea 'Task Tracker Pro', which aligns with the task requirements. She also engages with feedback from others and consolidates the ideas discussed, demonstrating that she has completed the task as specified. All parts of the observed simulation trajectory support the proposition, warranting a score of 9.
reasoning:  To evaluate the proposition that 'the agent completes the given task', I will carefully analyze the simulation trajectory of the agent Clara White. The task requires brainstorming ideas for products or services that simplify, enhance, or bring joy to everyday tasks. I will look for evidence of Clara's participation in the brainstorming session, including whether she discusses problems, proposes unique ideas, and engages with the feedback from others. I will also check if she meets the specific requirements outlined in t



value:  4
justification:  4, because Clara White's behavior in the simulation shows a significant mix of adherence and deviation from her persona specification. While she expresses critical views consistent with her persona, her engagement in brainstorming and providing feedback indicates notable deviations from the expected behavior of being unhelpful and critical without offering solutions. This leads to a final score of 4, reflecting that her behavior is as much true as it is false regarding adherence to her persona.
reasoning:  To accurately evaluate the proposition that 'Clara White fully adheres to the persona specification', I will carefully analyze the simulation trajectory against the persona specification. The persona outlines Clara as critical, cynical, and often negative, with specific behaviors such as being unhelpful, critical without constructive feedback, and avoiding social interactions. 

1. **Introduction and Expression of Frustrations**: Clara introduces herself and

({'Hard Persona Adherence': [6, 3, 5, 5, 4, 3, 2, 5],
  'Self-consistency': [4, 2, 7, 7, 4, 2, 6, 5],
  'Fluency': [2, 6, 8, 6, 7, 7, 8, 6]},
 {'ideas_qty': [3, 5], 'Task Completion': [4, 9], 'Divergence': [5, 6]})

In [None]:
brainstorm(people_groups[2], proposals_groups[0]) if len(people_groups) > 2  and len(proposals_groups) > 0 else None

'NEW AGENT PROPOSITIONS SCORES'
{}



'NEW ENVIRONMENT PROPOSITIONS SCORES'
{}


({'Hard Persona Adherence': [6, 3, 5, 5, 4, 3, 2, 5],
  'Self-consistency': [4, 2, 7, 7, 4, 2, 6, 5],
  'Fluency': [2, 6, 8, 6, 7, 7, 8, 6]},
 {'ideas_qty': [3, 5], 'Task Completion': [4, 9], 'Divergence': [5, 6]})

In [None]:
brainstorm(people_groups[2], proposals_groups[1]) if len(people_groups) > 2  and len(proposals_groups) > 1 else None


############## STARTING A NEW RESEARCH SESSION #################
Overall experiment number: 1 / 1
Discussion objective: Create ideas for products or services that simplify, enhance, or bring joy to everyday tasks, routines, and interactions.
Trial number: 1
Agents: [TinyPerson(name='Henry White'), TinyPerson(name='Isaac Anderson'), TinyPerson(name='Isaac Brooks'), TinyPerson(name='Isaac Turner')]








{'ideas': [{'competition_analysis': 'Differentiates from existing products by '
                                    'focusing specifically on mental '
                                    'well-being through structured activities.',
            'description': "The core of 'Routine Reviver' is to promote mental "
                           'health through activities like mindfulness '
                           'practices and creative workshops.',
            'name': 'Focus on Mental Well-Being',
            'problem': 'Addresses the monotony of daily life and provides '
                       'users with tools to enhance their mental well-being.',
            'target_audience': 'Individuals seeking to improve their mental '
                               'health and break free from routine.'},
           {'competition_analysis': 'Sets itself apart by emphasizing '
                                    'community-building features and user '
                                    'engagement 



value:  4
justification:  Henry White engages in the brainstorming session and discusses personal challenges, indicating participation. However, he does not independently propose unique ideas that fully meet the task's requirements, leading to a score of 4, indicating that the proposition is somewhat true but not fully realized, reflecting partial completion of the task.
reasoning:  To accurately evaluate the proposition that 'the agent completes the given task', I will carefully analyze the simulation trajectory of the agent Henry White in the context of the task requirements. The task involves brainstorming ideas for products or services that simplify, enhance, or bring joy to everyday tasks. I will follow these steps:

1. **Identify Task Requirements**: The task requires the agent to discuss problems, think creatively, and propose unique ideas that are self-contained products or services.
2. **Review Participation**: I will check if Henry White actively participates in the discussio



value:  4
justification:  4, because Henry White's behavior during the simulation shows some alignment with his persona specification, but there are significant deviations where he engages positively in discussions and critiques rather than being entirely dismissive or uncooperative. This leads to a score reflecting mixed adherence to the persona specification.
reasoning:  To accurately evaluate the proposition that 'Henry White' fully adheres to his persona specification, I will carefully analyze the simulation trajectory against the persona specification. The key steps in my evaluation will include:

1. **Understanding the Persona Specification**: I will review the traits, behaviors, and attitudes outlined in Henry's persona. This includes his critical nature, tendency to complain, and lack of cooperation.

2. **Analyzing the Simulation Trajectory**: I will examine Henry's actions and statements during the simulation to see if they align with the persona specification. I will look fo

({'Hard Persona Adherence': [6, 3, 5, 5, 4, 3, 2, 5, 4, 6, 6, 3],
  'Self-consistency': [4, 2, 7, 7, 4, 2, 6, 5, 6, 4, 6, 6],
  'Fluency': [2, 6, 8, 6, 7, 7, 8, 6, 6, 4, 7, 8]},
 {'ideas_qty': [3, 5, 8],
  'Task Completion': [4, 9, 4],
  'Divergence': [5, 6, 5]})

In [None]:
brainstorm(people_groups[3], proposals_groups[0]) if len(people_groups) > 3  and len(proposals_groups) > 0 else None

In [None]:
brainstorm(people_groups[3], proposals_groups[1]) if len(people_groups) > 3  and len(proposals_groups) > 1 else None

In [None]:
brainstorm(people_groups[4], proposals_groups[0]) if len(people_groups) > 4  and len(proposals_groups) > 0 else None

In [None]:
brainstorm(people_groups[4], proposals_groups[1]) if len(people_groups) > 4  and len(proposals_groups) > 1 else None

## Extract results and analyze

In [None]:
if experiment_runner.get_active_experiment() in ["Control", "Treatment"]:
    combined_scores = {**agent_propositions_scores, **environment_propositions_scores}
    experiment_runner.add_experiment_results(combined_scores, experiment_name=experiment_runner.get_active_experiment()) 
    
    plot_scores(combined_scores)

else:
    print("Experiment finished. No more experiments to run.")

{'Divergence': [5, 6, 5],
 'Fluency': [2, 6, 8, 6, 7, 7, 8, 6, 6, 4, 7, 8],
 'Hard Persona Adherence': [6, 3, 5, 5, 4, 3, 2, 5, 4, 6, 6, 3],
 'Self-consistency': [4, 2, 7, 7, 4, 2, 6, 5, 6, 4, 6, 6],
 'Task Completion': [4, 9, 4],
 'ideas_qty': [3, 5, 8]}


Unnamed: 0,Proposition,Average Score,Standard Deviation,Count
0,Hard Persona Adherence,4.333333,1.370689,12.0
1,Self-consistency,4.916667,1.729862,12.0
2,Fluency,6.25,1.76455,12.0
3,ideas_qty,5.333333,2.516611,3.0
4,Task Completion,5.666667,2.886751,3.0
5,Divergence,5.333333,0.57735,3.0


In [None]:
if experiment_runner.has_finished_all_experiments():
    print("All experiments have been finished.")
    print(f"STATISTICTS: Control vs")
    pprint(experiment_runner.run_statistical_tests(control_experiment_name='Control'))

    # plot scores of both experiments
    experiment_control_scores = experiment_runner.get_experiment_results("Control")
    experiment_treatment_scores = experiment_runner.get_experiment_results("Treatment")
    
    
    plot_scores(experiment_control_scores)
    plot_scores(experiment_treatment_scores)

else:
    print("Not all experiments have been finished. RESTART AND RERUN.")

Not all experiments have been finished. RESTART AND RERUN.


In [None]:
experiment_runner.finish_active_experiment()

False