# Synthetic Data Generation


In [4]:
import json
import sys
import csv
sys.path.append('..')


import tinytroupe
from tinytroupe.agent import TinyPerson
from tinytroupe.environment import TinyWorld, TinySocialNetwork
from tinytroupe.factory import TinyPersonFactory
from tinytroupe.extraction import ResultsReducer
import tinytroupe.control as control

Let's create the specific types of agents we need to collect data.

In [5]:
factory = TinyPersonFactory("A random knowledge worker in a company providing marketing services.")

In [6]:
people = []
for i in range(2):
    person = factory.generate_person(temperature=1.6)
    print(person.minibio())
    people.append(person)

len(people)

2025-03-24 15:01:33,251 - tinytroupe - INFO - Starting the person generation based on that context: A random knowledge worker in a company providing marketing services.


KeyboardInterrupt: 

In [None]:
company = TinyWorld("Some Corp Inc.", people)

In [None]:
company.make_everyone_accessible()

In [None]:
company.broadcast("Get some work done together, help each other.")

In [None]:
company.run(5)

2025-02-24 16:30:04,509 - tinytroupe - DEBUG - -----------------------------------------> Transaction: run with args (5,) and kwargs {} under simulation None.
2025-02-24 16:30:04,512 - tinytroupe - INFO - [Some Corp Inc.] Running world simulation step 1 of 5.


2025-02-24 16:30:04,517 - tinytroupe - DEBUG - -----------------------------------------> Transaction: _step with args () and kwargs {'timedelta_per_step': None} under simulation None.
2025-02-24 16:30:04,518 - tinytroupe - INFO - [Some Corp Inc.] No timedelta provided, so the datetime was not advanced.
2025-02-24 16:30:04,519 - tinytroupe - DEBUG - [Some Corp Inc.] Agent Elena Martinez is acting.
2025-02-24 16:30:04,521 - tinytroupe - DEBUG - -----------------------------------------> Transaction: act with args () and kwargs {'return_actions': True} under simulation None.
2025-02-24 16:30:04,521 - tinytroupe - DEBUG - -----------------------------------------> Transaction: _produce_message with args () and kwargs {} under simulation None.
2025-02-24 16:30:04,524 - tinytroupe - DEBUG - [Elena Martinez] Sending messages to OpenAI API
2025-02-24 16:30:04,525 - tinytroupe - DEBUG - [Elena Martinez] Last interaction: {'role': 'user', 'content': '"Now you **must** generate a sequence of act

TypeError: 'NoneType' object is not subscriptable

We can now extract the conversations, which form the synthetic corpus we wanted.

In [None]:
people[0].pp_current_interactions()

In [None]:
reducer = ResultsReducer()

def aux_extract_content(focus_agent: TinyPerson, source_agent:TinyPerson, target_agent:TinyPerson, kind:str, event: str, content: str, timestamp:str):

    if event == "TALK":
        author = focus_agent.name
    elif event == "CONVERSATION":
        if source_agent is None:
            author = "USER"
        else:
            author = source_agent.name
    else:
        raise ValueError(f"Unknown event: {event}")
    
    
    entry = (author, content)
    print(entry)
    return entry
    


reducer.add_reduction_rule("TALK", aux_extract_content)
reducer.add_reduction_rule("CONVERSATION", aux_extract_content)

Finally, we obtain the dataframe with the data and save it to a `.csv`, for later use in other applications.

In [None]:
df = reducer.reduce_agent_to_dataframe(people[0], column_names=["author", "content"])
df

('USER', 'Get some work done together, help each other.')
('Clara Thompson', "That sounds like a great idea! I'm always up for collaborating. What specific tasks do you have in mind?")
('Liam Carter', 'I think we could work on analyzing our recent campaign metrics together. It would be great to get your insights! What do you think?')
('Clara Thompson', "I think that's a fantastic idea, Liam! I have some thoughts on the metrics we should focus on. Which specific areas do you want to analyze first?")
('Liam Carter', 'I think we should start by looking at the engagement rates first. They can give us a good idea of how our audience is responding to the content. After that, we could dive into conversion metrics. What do you think?')
('Clara Thompson', "I completely agree, Liam! Let's start with the engagement rates. I can pull up the data we have and analyze it together. After that, we can move on to conversion metrics.")
('Liam Carter', 'Great! Once you have the data, we can look for trend

Unnamed: 0,author,content
0,USER,"Get some work done together, help each other."
1,Clara Thompson,That sounds like a great idea! I'm always up f...
2,Liam Carter,I think we could work on analyzing our recent ...
3,Clara Thompson,"I think that's a fantastic idea, Liam! I have ..."
4,Liam Carter,I think we should start by looking at the enga...
5,Clara Thompson,"I completely agree, Liam! Let's start with the..."
6,Liam Carter,"Great! Once you have the data, we can look for..."
7,Clara Thompson,"Absolutely, Liam! I’ll analyze the data for tr..."
8,Liam Carter,I'm looking forward to seeing what you find! M...
9,Clara Thompson,I think we could enhance our content by focusi...


In [None]:
df.to_csv("../data/extractions/synthetic_data_generation.out.csv", index=False)