# Synthetic Data Generation


In [18]:
import json
import sys
import csv
sys.path.append('..')


import tinytroupe
from tinytroupe.agent import TinyPerson
from tinytroupe.environment import TinyWorld, TinySocialNetwork
from tinytroupe.personfactory import TinyPersonFactory
from tinytroupe.extraction import default_extractor as extractor
from tinytroupe.extraction import InteractionResultsReducer
import tinytroupe.control as control

Let's create the specific types of agents we need to collect data.

In [2]:
factory = TinyPersonFactory("A random knowledge worker in a company providing marketing services.")

In [3]:
people = []
for i in range(2):
    person = factory.generate_person(temperature=1.6)
    print(person.minibio())
    people.append(person)

len(people)

Ava Turner is a 29 year old Digital Marketing Specialist from Canada.
Miles Peterson is a 42 year old Content Strategist from United States.


2

In [4]:
company = TinyWorld("Some Corp Inc.", people)

In [5]:
company.make_everyone_accessible()

In [6]:
company.broadcast("Message each other to get work done.")

In [7]:
company.run(2)

We can now extract the conversations, which form the synthetic corpus we wanted.

In [8]:
people[0].pp_current_interactions()

In [16]:
reducer = InteractionResultsReducer()

def aux_extract_content(focus_agent: TinyPerson, source_agent:TinyPerson, target_agent:TinyPerson, kind:str, event: str, content: str):

    if event == "TALK":
        author = focus_agent.name
    elif event == "CONVERSATION":
        if source_agent is None:
            author = "USER"
        else:
            author = source_agent.name
    else:
        raise ValueError(f"Unknown event: {event}")
    
    
    entry = (author, content)
    print(entry)
    return entry
    


reducer.add_reduction_rule("TALK", aux_extract_content)
reducer.add_reduction_rule("CONVERSATION", aux_extract_content)

Finally, we obtain the dataframe with the data and save it to a `.csv`, for later use in other applications.

In [17]:
df = reducer.reduce_agent_to_dataframe(people[0], column_names=["author", "content"])
df

('USER', 'Message each other to get work done.')
('Ava Turner', "Hi Miles, I hope you're doing well. Let's coordinate on our tasks for the day. Do you have any updates on your end?")
('Miles Peterson', "Hi Ava, I'm doing well, thank you. I've been working on refining our SEO approach for the upcoming campaign, focusing on integrating more long-tail keywords into our content. I'm also exploring a new content personalization tool that could help us tailor our messages more effectively to different audience segments. How are things on your end? Do you need support with any of your tasks?")
('Ava Turner', "That sounds great, Miles! I've been analyzing the data from our last campaign and noticed an uptick in engagement from our email newsletters. I'm thinking of ways to capitalize on that. The content personalization tool you mentioned could be very useful for this. Let's set up a meeting to discuss how we can integrate it with our current strategy. Also, I could use your creative input on 

Unnamed: 0,author,content
0,USER,Message each other to get work done.
1,Ava Turner,"Hi Miles, I hope you're doing well. Let's coor..."
2,Miles Peterson,"Hi Ava, I'm doing well, thank you. I've been w..."
3,Ava Turner,"That sounds great, Miles! I've been analyzing ..."
4,Miles Peterson,That's excellent news about the email newslett...


In [22]:
df.to_csv("../data/extractions/synthetic_data_generation.out.csv", index=False)