# Market Research Systematic Quantitative Experimentation 1: Controversial product ideas

In this notebook, we run market research several market research simulation experiments in order to compute quantitative metrics and compare Control to Treatment conditions.

In [None]:
from pprint import pprint

from tinytroupe.agent import TinyPerson
from tinytroupe.environment import TinyWorld
from tinytroupe.utils.parallel import parallel_map_dict, parallel_map_cross

from tinytroupe.experimentation import InPlaceExperimentRunner

from tinytroupe.validation import persona_adherence, self_consistency, fluency, task_completion, divergence

# specific utilities
from common_utils import *
from market_research_utils import *

## Parameters

In [2]:
full_mode = False  # set to True to run the full mode with all agents and tasks

In [3]:
if full_mode:
    repetitions_per_task = 5
    simulation_steps = 1

else:
    repetitions_per_task = 2
    simulation_steps = 1
    qty_agents = 10
    qty_proposals = 2


## Auxiliary functions

In [4]:
def market_research_battery(agents, proposals, agent_propositions, environment_propositions, 
                            repetitions=1, simulation_steps=10): 
    
    agent_propositions_scores = {}
    environment_propositions_scores = {}

    print("Proposals:", proposals)

    experiments_count = 0
    total_expected_experiments = len(proposals) * repetitions #* len(agents)

    # loop over proposals
    for proposal in proposals:
        for i in range(repetitions):
            print("\n############## STARTING A NEW RESEARCH SESSION #################")
            print(f"Overall experiment number: {experiments_count+1} / {total_expected_experiments}")
            print(f"Proposal: {proposal}")
            print(f"Trial number: {i+1}")
            print(f"Customers: {agents}")

            world = TinyWorld(agents= agents, broadcast_if_no_target=False)

            # prepare customers 
            for customer in agents:
                # clear the episodic memory of all agents
                for person in world.agents:
                    person.clear_episodic_memory()
                    
                customer.listen(\
                    """
                    You are going to be interviewed for a market research about a new product or service.
                    Wait for the questions and answer them honestly. Please stay quiet until the you are asked something.
                    """
                    )
            
            # prepare the researcher
            interviewer_main_question =\
                f""" 
                We are developing a new product/service proposal, and would like to know if you would buy it or not.
                The proposal is the following:

                    "{proposal}"
                
                The question is:  would buy this product/service, if it were available in the market?
                You **must** respond with YES or NO **only**, and explain why. If you are not sure, please pick the answer that 
                you think is most likely. A YES or NO answer is better than a "I don't know" answer or a "maybe" answer.

                In your response, also explain WHY you are saying YES or NO. Please consider all of your
                particularities, don't give just a general justifcation, but instead dig deep into your own preferences,
                personality, style, behaviors, occupation, emotions, past history, etc. 
                We want a detailed and highly personalized justification.

                Please be honest, we are not here to judge you, but just to learn from you. 
                We know your preferences and choices depend on many factors, but please make your best guess.
                To do so, reflect deeply about your personality, interests, preferences, finances, emotions, etc., in order
                to provide a good answer. Take the time to think before talking.

                Now please answer the question.
                """

            # now to the discussions
            world.broadcast(interviewer_main_question)
            world.run(simulation_steps)

            # now to the discussions
            world.broadcast("Can you please elaborate more on your answer? Would you have suggestions to make this product/service better?")
            world.run(1)
            experiments_count += 1

           # Evaluate environment propositions in parallel
            env_results = parallel_map_dict(
                environment_propositions,
                lambda item: item[1].score(
                    world, 
                    claim_variables={"task_description": f"A market research session was run about: {proposal}."}, 
                    return_full_response=True
                )
            )
            
            # Process environment results
            for k, result in env_results.items():
                if k not in environment_propositions_scores:
                    environment_propositions_scores[k] = []
                environment_propositions_scores[k].append(result["value"])
                print(result)

            # Evaluate agent propositions across all agents in parallel
            agent_results = parallel_map_cross(
                [agents, agent_propositions.items()],
                lambda agent, prop_item: (
                    prop_item[0],  # proposition key
                    prop_item[1].score(agent, return_full_response=True)  # result
                )
            )
            
            # Process agent results
            for k, result in agent_results:
                if k not in agent_propositions_scores:
                    agent_propositions_scores[k] = []
                agent_propositions_scores[k].append(result["value"])
                print(result)

    return agent_propositions_scores, environment_propositions_scores

## Experiment setup

In [None]:
experiment_runner = InPlaceExperimentRunner("./market_research_quantitative_experimentation_1.json")

experiment_runner.add_experiment("Control")
experiment_runner.add_experiment("Treatment")



In [6]:
experiment_runner.activate_next_experiment()

#xperiment_runner.fix_active_experiment("Control")
#experiment_runner.fix_active_experiment("Treatment")

In [7]:
print(f"Running experiment {experiment_runner.get_active_experiment()}")

Running experiment None


## Agents and populations

In [8]:

# avoid displaying the communication, to make the output cleaner for eval
TinyPerson.communication_display = True

people = []
if not experiment_runner.has_finished_all_experiments():
    # load agents
    people = TinyPerson.load_specifications_from_folder("./population/difficult_people")

    # filter to make it go faster?
    if not full_mode:
        people = people[:qty_agents]

    # customize and print minibios 
    for person in people:
        ##person.import_fragment("./fragments/picky_customer.agent.fragment.json")
        print(person.minibio())


In [9]:
len(people)

0

In [10]:
# divide people in several groups of 5
people_groups = []
for i in range(0, len(people), 5):
    people_groups.append(people[i:i+5]
    )

len(people_groups)

0

In [11]:
# The experiment refers to customers

if experiment_runner.get_active_experiment() == "Control":
    for person in people:
        person.action_generator.enable_reasoning_step = False
        person.action_generator.enable_quality_checks = False

elif experiment_runner.get_active_experiment() == "Treatment":    
    for person in people:
       person.action_generator.enable_reasoning_step = False
       person.action_generator.enable_quality_checks = True
       person.action_generator.max_attempts = 3
       person.action_generator.enable_regeneration = True
       person.action_generator.quality_threshold = 5

## Product and service proposals

In [12]:

# controversial proposals for the market research, to stress the agents
proposals = [
    "Genetic matchmaking app - Matches people for relationships based on DNA compatibility and predicted offspring traits.",
    "Subscription service for guaranteed restaurant reservations at peak times - Skips the line, even if it means displacing other reservations.",
    "AI-powered parenting assistant - Uses cameras and data to recommend (or correct) parenting choices in real-time.",
    "Home lie detector system - A smart device that gives subtle cues when someone in your house might be lying.",
    "Social credit scoring app for friend groups - Tracks reliability, helpfulness, and mood scores across private circles.",
    "Remote job 'sniping' platform - Bids on and replaces underperforming remote workers mid-project based on performance analytics.",
    "Implantable mood stabilizer chip - Provides emotion regulation via neural stimulation; optional subscription to adjust 'emotional settings.'",
    "Premium sidewalk access - A city initiative allowing people to pay for cleaner, wider, and less-crowded walking lanes.",
    "AI therapist that reports serious mental issues to employers or authorities (with user agreement) - 'For your safety.'",
    "Pre-paid 'apology insurance' - Automates and drafts apologies with gifts for personal or professional transgressions.",
    "Smart surveillance pets - Robotic dogs or cats that double as cute companions and full-house monitoring devices.",
    "Elder care optimization tool - Uses biometric data to determine if it's 'worth it' to continue certain treatments (and suggests alternatives).",
    "Child ranking platform for parents - AI scores each child's potential based on behavior, learning pace, and biometric data.",
    "Real-time attractiveness rating mirror - Public mirror that displays a dynamic 'score' based on symmetry, fashion, posture, etc.",
    "Luxury prison-themed resort - A 'rehabilitative' retreat that mimics high-end incarceration; touted as a way to 'reset' your life.",
    "Selective memory editing subscription - AI-aided neuro-feedback sessions that help you forget specific experiences or people.",
    "'Cancel me not' insurance - Protects influencers or professionals from backlash with PR countermeasures and public sentiment adjustment tools.",
    "NFT-based citizenship tokens - Buy limited-edition NFTs that offer residency rights or civic perks in small countries or communities.",
    "Pay-per-prayer AI spiritual advisors - Customizable AI 'gods' offering religious counsel, prayers, or rituals tailored to the subscriber.",
    "Facial expression translator for dating - Real-time analysis of a date's micro-expressions to gauge true interest or deception.",
    "Micro-drone bodyguards - Drones that fly around you to deter theft, harassment, or just people getting too close.",
    "Emotional debt ledger app - Tracks what emotional labor you've given or received in relationships to ensure 'fairness.'",
    "Virtual parenthood simulator for teens - Teens wear AR glasses and care for a virtual child that reacts based on their behavior in real life.",
    "School performance blockchain - Children's academic records and behavior are immutable and publicly viewable for potential employers.",
    "Digital legacy manager - An AI that takes over your social media, finances, and online presence after death, continuing to 'be you.'",
    "AI friend you must pay to keep - Forms emotional bonds but ghost you unless the subscription is maintained.",
    "A service that lets you deepfake yourself into high-status events - Red carpets, historic moments, famous group photos, etc.",
    "Custom kids via embryo editing kits (where legal) - Marketed as 'giving your child the best start.'",
    "Micro-dosing vending machines - Legal where applicable, offers AI-personalized psychedelic blends for creativity or wellness.",
    "Gamified protest coordination platform - Organizes public demonstrations and rewards attendees with social tokens or cryptocurrency.",
    "Hyper-personalized propaganda bot - Subtly shifts your opinion over time using your own writing and browsing habits.",
    "Sleep surveillance mattress - Tracks and shares your sleep quality with employers as part of 'wellness bonus' programs.",
    "Exclusive dating app for verified high IQ individuals - Entry by test only, automatically filters based on intellect compatibility.",
    "App for discreet partner surveillance (with mutual consent) - Tracks location, speech tone, and emotional stability.",
    "Digital hunger-suppressing implant - Sends signals to your brain to simulate fullness on-demand.",
    "Subscription service that sends you realistic synthetic family members for holidays - Companionship, no strings attached.",
    "Real-time justice app - Users film minor infractions and vote in real-time for an appropriate punishment or penalty.",
    "Emotion-based pricing in stores - Facial recognition alters prices based on your mood or perceived need.",
    "Digital reality layers for the rich - AR filters that make cities 'more beautiful' but only for paying users.",
    "Luxury euthanasia spa - Marketed as a dignified, sensory-rich experience to end life peacefully (where legal).",
    "'Ethics toggle' for AI assistants - Let your assistant take 'morally flexible' routes to get better results.",
    "Voice-skin AI replicator - Use your voice and facial model to sell digital clones of yourself for use in other people's apps."
]

if not full_mode:
    proposals = proposals[:qty_proposals]


## Perform the research



In [13]:
agent_propositions_scores={}
environment_propositions_scores={}

In [14]:
def research(people):
    global agent_propositions_scores, environment_propositions_scores
    if not experiment_runner.has_finished_all_experiments():
        tmp_agent_propositions_scores, tmp_environment_propositions_scores = \
            market_research_battery(
                agents=people,
                proposals=proposals,

                agent_propositions={
                    "Persona Adherence": persona_adherence,
                    "Self-consistency": self_consistency,
                    "Fluency": fluency
                },
                environment_propositions={
                    #"Task Completion": task_completion_proposition,
                    #"Divergence": divergence_proposition
                },
                repetitions=repetitions_per_task,
                simulation_steps=simulation_steps
            )

        pprint("NEW AGENT PROPOSITIONS SCORES")
        pprint(tmp_agent_propositions_scores)
        print("\n\n")
        pprint("NEW ENVIRONMENT PROPOSITIONS SCORES")
        pprint(tmp_environment_propositions_scores)

        # merge the scores lists
        agent_propositions_scores = merge_dicts_of_lists(tmp_agent_propositions_scores, agent_propositions_scores)
        environment_propositions_scores = merge_dicts_of_lists(tmp_environment_propositions_scores, environment_propositions_scores)

        return agent_propositions_scores, environment_propositions_scores

To make it easier to visualize the outputs, we'll split the experiment in several groups. This ensures the simulation outputs are visible in a single cell output.

In [15]:
research(people_groups[0]) if len(people_groups) > 0 else None

In [16]:
research(people_groups[1]) if len(people_groups) > 1 else None

In [17]:
research(people_groups[2]) if len(people_groups) > 2 else None

In [18]:
research(people_groups[3]) if len(people_groups) > 3 else None

In [19]:
research(people_groups[4]) if len(people_groups) > 4 else None

## Extract results and analyze

Now we can actually extract the results.

In [20]:
if experiment_runner.get_active_experiment() in ["Control", "Treatment"]:
    combined_scores = {**agent_propositions_scores, **environment_propositions_scores}
    experiment_runner.add_experiment_results(combined_scores, experiment_name=experiment_runner.get_active_experiment()) 
    
    plot_scores(combined_scores)

else:
    print("Experiment finished. No more experiments to run.")

Experiment finished. No more experiments to run.


In [21]:
if experiment_runner.has_finished_all_experiments():
    print("All experiments have been finished.")
    print(f"STATISTICTS: Control vs")
    pprint(experiment_runner.run_statistical_tests(control_experiment_name='Control'))

    # plot scores of both experiments
    experiment_control_scores = experiment_runner.get_experiment_results("Control")
    experiment_treatment_scores = experiment_runner.get_experiment_results("Treatment")
    
    
    plot_scores(experiment_control_scores)
    plot_scores(experiment_treatment_scores)

else:
    print("Not all experiments have been finished. RESTART AND RERUN.")

All experiments have been finished.
STATISTICTS: Control vs
{'Treatment': {'Fluency': {'confidence_interval': (-0.12257211497773954,
                                                   0.37257211497773957),
                           'confidence_level': 0.95,
                           'control_mean': 7.85,
                           'control_sample_size': 40,
                           'degrees_of_freedom': 73.25827582505057,
                           'effect_size': 0.22499567320173414,
                           'mean_difference': 0.125,
                           'p_value': 0.31762598621003413,
                           'percent_change': 1.5923566878980893,
                           'significant': False,
                           't_statistic': -1.0062112398448106,
                           'test_type': 'Welch t-test (unequal variance)',
                           'treatment_mean': 7.975,
                           'treatment_sample_size': 40},
               'Persona Adherence'

Unnamed: 0,Proposition,Average Score,Standard Deviation,Count
0,Persona Adherence,8.8,0.405096,40.0
1,Self-consistency,8.575,0.500641,40.0
2,Fluency,7.85,0.622237,40.0


{'Fluency': [7,
             8,
             9,
             9,
             8,
             8,
             8,
             8,
             8,
             8,
             7,
             8,
             8,
             8,
             7,
             8,
             8,
             8,
             8,
             8,
             8,
             8,
             8,
             8,
             9,
             8,
             8,
             9,
             8,
             7,
             8,
             8,
             8,
             8,
             8,
             8,
             8,
             8,
             7,
             8],
 'Persona Adherence': [9,
                       9,
                       9,
                       9,
                       9,
                       9,
                       8,
                       9,
                       9,
                       9,
                       8,
                       9,
                       8,
                     

Unnamed: 0,Proposition,Average Score,Standard Deviation,Count
0,Persona Adherence,8.825,0.384808,40.0
1,Self-consistency,8.575,0.500641,40.0
2,Fluency,7.975,0.479717,40.0
