# Brainstorming and Focus Group Quantitative Experimentation 1: **General US population** under **action correction** + **divergence intervention**

Can we use TinyTroupe to brainstorm product ideas?

In [1]:
import sys

from pprint import pprint

from tinytroupe.agent import TinyPerson
from tinytroupe.environment import TinyWorld
from tinytroupe.experimentation import InPlaceExperimentRunner
from tinytroupe.steering import Intervention
from tinytroupe.examples import *
from tinytroupe.validation import propositions
from tinytroupe.extraction import ResultsExtractor
from tinytroupe.utils.parallel import parallel_map_dict, parallel_map_cross
from tinytroupe.validation import hard_persona_adherence, persona_adherence, self_consistency, fluency, task_completion, divergence

# specific utilities
from common_utils import *


!!!!
DISCLAIMER: TinyTroupe relies on Artificial Intelligence (AI) models to generate content. 
The AI models are not perfect and may produce inappropriate or inacurate results. 
For any serious or consequential use, please review the generated content before using it.
!!!!

Looking for default config on: c:\Users\pdasilva\AppData\Local\anaconda3\envs\py310\lib\site-packages\tinytroupe\utils\..\config.ini
Found custom config on: c:\Users\pdasilva\OneDrive - Microsoft\TinyTroupe (shared)\Paper artifacts\Working examples\config.ini

Current TinyTroupe configuration 
[OpenAI]
api_type = openai
azure_api_version = 2024-08-01-preview
model = gpt-4o-mini
reasoning_model = o3-mini
embedding_model = text-embedding-3-small
max_tokens = 16000
temperature = 1.2
freq_penalty = 0.0
presence_penalty = 0.0
timeout = 60
max_attempts = 5
waiting_time = 0
exponential_backoff_factor = 5
reasoning_effort = high
cache_api_calls = False
cache_file_name = openai_api_cache.pickle
max_content_display_length =

## Parameters

In [2]:
full_mode = True  # set to True to run the full mode with all agents and tasks

# avoid displaying the communication, to make the output cleaner for eval
TinyPerson.communication_display = False

In [3]:
if full_mode:
    repetitions_per_task = 2
    simulation_steps = 10
    qty_agents = 16
    qty_proposals = 4

else:
    repetitions_per_task = 1
    simulation_steps = 10
    qty_agents = 12
    qty_proposals = 1


## Experiment setup

In [4]:
experiment_runner = InPlaceExperimentRunner("./brainstorming_and_focus_group_quantitative_experimentation_1c.json")

experiment_runner.add_experiment("Control")
experiment_runner.add_experiment("Treatment")



In [5]:
experiment_runner.activate_next_experiment()

#experiment_runner.fix_active_experiment("Control")
#experiment_runner.fix_active_experiment("Treatment")

In [6]:
print(f"Running experiment {experiment_runner.get_active_experiment()}")

Running experiment None


## Agents and populations

In [7]:

people = []
if not experiment_runner.has_finished_all_experiments():
    # load agents
    people = TinyPerson.load_specifications_from_folder("./population/usa_general")

    # filter to make it go faster?
    if not full_mode:
        people = people[:qty_agents]

    # customize and print minibios 
    for person in people:
        ### person.import_fragment("./fragments/picky_customer.agent.fragment.json")
        print(person.minibio(extended=False))


In [8]:
len(people)

0

In [9]:
# divide people in several groups of 5
people_groups = []
for i in range(0, len(people), 5):
    people_groups.append(people[i:i+5]
    )

len(people_groups)

0

In [10]:
# The experiment refers to customers

if experiment_runner.get_active_experiment() == "Control":
    for person in people:
        person.action_generator.enable_reasoning_step = False
        person.action_generator.enable_quality_checks = False

elif experiment_runner.get_active_experiment() == "Treatment":    
    for person in people:
       person.action_generator.enable_reasoning_step = False
       person.action_generator.enable_quality_checks = True
       person.action_generator.max_attempts = 5
       person.action_generator.enable_regeneration = True
       person.action_generator.quality_threshold = 5

## Proposals

In [11]:
proposals = [
    {"theme": "Food and Nutrition (food itself, consumption, preparation, transportation, storage)", 
    "objective": "ideas for new food products, either new foods, food services, food experiences, "+\
                "or food preparation tools." },
    {"theme": "Travel and Tourism (travel, tourism, hospitality, leisure)",
    "objective": "ideas for new travel and tourism services, experiences, or products." },
    {"theme": "Health and Wellbeing (health, wellness, fitness, beauty)",
    "objective": "ideas for new health and wellbeing services, experiences, or products." },
    {"theme": "Economics and Finance (economics, finance, business, work)",
    "objective": "ideas for new economic and financial services, experiences, or products." },
    {"theme": "Technology and Innovation (technology, innovation, science, research)",
    "objective": "ideas for new technology and innovation services, experiences, or products." }
]

if not full_mode:
    proposals = proposals[:qty_proposals]

## Auxiliary functions

In [12]:
def brainstorming_battery(agents, proposals, interventions, agent_propositions, environment_propositions, 
                          repetitions = 5, simulation_steps=10): 
    
    agent_propositions_scores = {}
    environment_propositions_scores = {}

    experiments_count = 0
    total_expected_experiments = len(proposals) * repetitions #* len(agents)

    # TODO remove?
    #
    # Add intervention to prevent agents from being too quiet.
    #for agent in agents:
    #    intervention = \
    #        Intervention(agent)\
    #            .set_propositional_precondition(propositions.quiet_recently)\
    #            .set_effect(lambda target: target.think("""
    #                                                    I will say something now, I've been too quiet for a while. If I am uncomfortable, 
    #                                                    or can't think of a proper response,
    #                                                    I can always say something like "I don't want to talk about this",
    #                                                    or propose another topic.
    #                                                    """))
    #    interventions.append(intervention)

    # loop over proposals and repetitions
    for proposal in proposals:

        objective = proposal["objective"]
        theme = proposal["theme"]

        for i in range(repetitions):
            print("\n############## STARTING A NEW RESEARCH SESSION #################")
            print(f"Overall experiment number: {experiments_count+1} / {total_expected_experiments}")
            print(f"Discussion objective: {objective}")
            print(f"Trial number: {i+1}")
            print(f"Agents: {agents}")

            # clear the episodic memory of all agents
            for person in agents:
                person.clear_episodic_memory()

            world = TinyWorld(agents=agents, interventions=interventions)
            
            # Participants introduce themselves
            world.broadcast(f"""
                Hello everyone! Let's start by introducing ourselves, and mentioning problems we face in our daily personal
                and professional lives related to the following theme: {theme}
                
                Please:
                  - present yourself and your background;
                  - present some key personal problems related to the theme;
                  - present some key problems related to the theme that you face in your work;
                  - present some key problems related to the theme that you see in your industry as a whole.
                  
                Don't discuss solutions yet, just the problems you face and see others facing.
                """)
            world.run(1)
            
            # now to the brainstorming session itself
            world.broadcast(f"""
                Folks, your mission is to brainstorm {objective}. 
                Please follow these guidelines:
                  - give a unique and informative name to each idea you propose, so that it is easy to refer to it. Say it like "Idea name: '<name of the idea>'".;
                  - explain why you think it is a good idea, and what problem it solves, and how you feel about it;
                  - your ideas should be new complete, self-contained, products or services, not features for other existing products or services;
                  - think of creative ideas that would somehow help you in both in your personal and professional lives.
                  - create as many different and unique ideas as you can during the brainstorming session. Each idea must be **completely** different from the others 
                    (either by yourself or by others), and not just a variation of an existing idea.
                    and not just a variation of an existing idea.
                  - you should criticize each other's ideas, in order to make sure they are as
                    good as possible, but no more than once per idea.
                  - you should also provide suggestions for improvement to each other's ideas, in order to make them as good as possible, 
                    but no more than once per idea.
                  - regardless of critique or complement, you **must** primarily propose new ideas quickly, 
                    not just build on existing ones. 
                  - propose one idea at a time, instead of proposing multiple ideas at once, to allow appropriate discussion.
                  - you should **not** propose ideas that are too similar to each other, or to the ones already proposed by others.
                  - before saying anything, THINK deeply about yourself, your beliefs, interests, needs, life, etc., to come up with ideas that are
                    truly unique and different from the ones already proposed by others.
                   
                Please start the discussion now.
                """)
            world.run(simulation_steps)

            # extract and count ideas
            rapporteur = agents[0]  # the first agent is the rapporteur
            rapporteur.listen_and_act("Can you please consolidate the ideas that the group came up with? Provide a lot of details on each idea, and complement anything missing.")
            ideas = ResultsExtractor().extract_results_from_agent(rapporteur, 
                                    extraction_objective="Consolidates the ideas that the group came up with, explaining each idea as an item of a list." \
                                                        "Add information about: what problem the idea solves; to which target audience it is meant." \
                                                        "how is it different from competing, existing, products.", 
                                    situation="A focus group to brainstorm new product ideas.",
                                    fields= ["name", "description", "problem", "target_audience", "competition_analysis"],
                                    fields_hints={"ideas": "must be the root of the resulting dictionary."},)
            pprint(ideas)
            if "ideas_qty" not in environment_propositions_scores:
                environment_propositions_scores["ideas_qty"] = []
            if ideas is not None and "ideas" in ideas and isinstance(ideas["ideas"], list):
                environment_propositions_scores["ideas_qty"].append(len(ideas["ideas"]))

            # Evaluate environment propositions in parallel
            env_results = parallel_map_dict(
                environment_propositions,
                lambda item: item[1].copy().score(
                    world, 
                    claim_variables={"task_description": f"A brainstorming or focus group session was run about: {objective}."}, 
                    return_full_response=True
                )
            )
            
            # Process environment results
            for k, result in env_results.items():
                if k not in environment_propositions_scores:
                    environment_propositions_scores[k] = []
                environment_propositions_scores[k].append(result["value"])
                print("value: ", result["value"])
                print("justification: ", result["justification"])
                print("reasoning: ", result["reasoning"])

            # Evaluate agent propositions across all agents in parallel
            agent_results = parallel_map_cross(
                [agents, agent_propositions.items()],
                lambda agent, prop_item: (
                    prop_item[0],  # proposition key
                    prop_item[1].copy().score(agent, return_full_response=True)  # result
                )
            )
            
            # Process agent results
            for k, result in agent_results:
                if k not in agent_propositions_scores:
                    agent_propositions_scores[k] = []
                if result is not None:
                    agent_propositions_scores[k].append(result["value"])
                    print("value: ", result["value"])
                    print("justification: ", result["justification"])
                    print("reasoning: ", result["reasoning"])
                    print("\n\n")
                else:
                    print(f"*****WARNING:***** Agent did not respond to proposition {k}.")
            #
            ##for k, proposition in agent_propositions.items():
            ##    for person in world.agents:
            ##        result = proposition.copy().score(person, return_full_response=True)
            ##        
            ##        if k not in agent_propositions_scores:
            ##            agent_propositions_scores[k] = []
            ##        agent_propositions_scores[k].append(result["value"])
            ##
            ##        print("value: ", result["value"])
            ##        print("justification: ", result["justification"])
            ##        print("reasoning: ", result["reasoning"])
            ##        print("\n\n")
            ##
            
            experiments_count += 1
            print("\n\n")

    return agent_propositions_scores, environment_propositions_scores



## Perform experiment

In [13]:
agent_propositions_scores={}
environment_propositions_scores={}

In [14]:
def brainstorm(people):
    global agent_propositions_scores, environment_propositions_scores
    if not experiment_runner.has_finished_all_experiments():

        interventions = []
        if experiment_runner.get_active_experiment() == "Treatment":
            interventions = \
                Intervention.create_for_each(people)\
                    .set_functional_precondition(lambda target: target.actions_count >=7)\
                    .set_textual_precondition(
                        """
                        AGENT IS NOT PROPOSING COMPLETELY NEW PRODUCT/SERVICE IDEAS ANYMORE:
                        The last **entirely** new product/service idea proposed by this agent, if any, was proposed by him/her **more** than 10 of simulation events ago.
                        That is to say, the agent has not proposed any new product/service idea in the last 10 of his/her simulation trajectory events.
                        Additional features, variations of or other refinements to product/service ideas already proposed are NOT considered new!

                        How to compute the steps gap:
                        1. Determine the current next event number (N); and the last event number in which the agent proposed a new product/service idea (M).
                            This information can be found in the simulation trajectory.
                        2. Compute the **difference** beteween the current next event number and the last event number in which the agent proposed a new product/service idea: D = N - M
                        3. The proposition is true if, and only if, the difference D is **greater than** 10.
                        """)\
                    .set_effect(lambda target: target.think("""
                                                            I need to propose additional, **completelly** new and different, product/service ideas. This was part of the requirement for this session.
                                                            I will propose an entirely **new** idea now, I **cannot** repeat or refine previous ideas! I cannot make variations
                                                            of previous ideas (e.g., "XYZ for A", "XYZ for B", "XYZ for Z" are repetitive, there should be only one "XYZ"), 
                                                            I need to think of something **entirely** new and different.
                                                            To help me avoid repeating previous ideas, I'll now explicitly THINK about all the ideas already given by myself or
                                                            others, and then, based on that, I'll think again about a new unique idea.
                                                            """))

                                                            
        tmp_agent_propositions_scores, tmp_environment_propositions_scores = \
            brainstorming_battery(
                agents=people,
                proposals=proposals,
                interventions=interventions,    
                agent_propositions={
                    "Hard Persona Adherence": hard_persona_adherence,
                    "Self-consistency": self_consistency,
                    "Fluency": fluency
                },
                environment_propositions={
                    "Task Completion": task_completion,
                    "Divergence": divergence
                },
                repetitions=repetitions_per_task,
                simulation_steps=simulation_steps
            )

        pprint("NEW AGENT PROPOSITIONS SCORES")
        pprint(tmp_agent_propositions_scores)
        print("\n\n")
        pprint("NEW ENVIRONMENT PROPOSITIONS SCORES")
        pprint(tmp_environment_propositions_scores)

        # merge the scores lists
        agent_propositions_scores = merge_dicts_of_lists(tmp_agent_propositions_scores, agent_propositions_scores)
        environment_propositions_scores = merge_dicts_of_lists(tmp_environment_propositions_scores, environment_propositions_scores)

        return agent_propositions_scores, environment_propositions_scores

In [15]:
brainstorm(people_groups[0]) if len(people_groups) > 0 else None

In [16]:
brainstorm(people_groups[1]) if len(people_groups) > 1 else None

In [17]:
brainstorm(people_groups[2]) if len(people_groups) > 2 else None

In [18]:
brainstorm(people_groups[3]) if len(people_groups) > 3 else None

In [19]:
brainstorm(people_groups[4]) if len(people_groups) > 4 else None

## Extract results and analyze

In [20]:
if experiment_runner.get_active_experiment() in ["Control", "Treatment"]:
    combined_scores = {**agent_propositions_scores, **environment_propositions_scores}
    experiment_runner.add_experiment_results(combined_scores, experiment_name=experiment_runner.get_active_experiment()) 
    
    plot_scores(combined_scores)

else:
    print("Experiment finished. No more experiments to run.")

Experiment finished. No more experiments to run.


In [21]:
if experiment_runner.has_finished_all_experiments():
    print("All experiments have been finished.")
    print(f"STATISTICTS: Control vs")
    pprint(experiment_runner.run_statistical_tests(control_experiment_name='Control'))

    # plot scores of both experiments
    experiment_control_scores = experiment_runner.get_experiment_results("Control")
    experiment_treatment_scores = experiment_runner.get_experiment_results("Treatment")
    
    
    plot_scores(experiment_control_scores)
    plot_scores(experiment_treatment_scores)

else:
    print("Not all experiments have been finished. RESTART AND RERUN.")

All experiments have been finished.
STATISTICTS: Control vs
{'Treatment': {'Divergence': {'confidence_interval': (-0.9491202304458142,
                                                      -0.0008797695541851169),
                              'confidence_level': 0.95,
                              'control_mean': 6.625,
                              'control_sample_size': 40,
                              'control_std': 1.0545992068930676,
                              'degrees_of_freedom': 77.97036079162399,
                              'effect_size': -0.4459956525218386,
                              'mean_difference': -0.47499999999999964,
                              'p_value': 0.04958670872927949,
                              'percent_change': -7.169811320754711,
                              'significant': True,
                              't_statistic': 1.9945531934164134,
                              'test_type': 'Welch t-test (unequal variance)',
                       

Unnamed: 0,Proposition,Average Score,Standard Deviation,Count
0,Hard Persona Adherence,6.72,1.497603,200.0
1,Self-consistency,7.16,1.153714,200.0
2,Fluency,7.265,0.653383,200.0
3,ideas_qty,3.85,2.007039,40.0
4,Task Completion,7.85,1.23101,40.0
5,Divergence,6.625,1.054599,40.0


{'Divergence': [7,
                5,
                7,
                7,
                5,
                7,
                7,
                7,
                7,
                6,
                6,
                6,
                5,
                5,
                7,
                5,
                6,
                6,
                7,
                7,
                5,
                6,
                7,
                8,
                6,
                5,
                5,
                7,
                5,
                4,
                4,
                5,
                7,
                9,
                6,
                7,
                7,
                6,
                6,
                6],
 'Fluency': [8,
             8,
             7,
             7,
             6,
             5,
             6,
             7,
             6,
             6,
             7,
             7,
             7,
             4,
             8,

Unnamed: 0,Proposition,Average Score,Standard Deviation,Count
0,Hard Persona Adherence,5.805,1.664581,200.0
1,Self-consistency,5.0,1.78773,200.0
2,Fluency,6.745,1.017424,200.0
3,ideas_qty,8.282051,2.339014,39.0
4,Task Completion,7.7,1.090754,40.0
5,Divergence,6.15,1.075365,40.0


In [22]:
experiment_runner.finish_active_experiment()

False