In [1]:
# install necessary packages
!pip install faker &> /dev/null
!npx degit RealImpactAnalytics/trumania/trumania trumania &> /dev/null

In [2]:
# import necessary modules
import pandas as pd
from trumania.core import circus
from trumania.core.util_functions import make_random_bipartite_data, setup_logging
from trumania.core.operations import FieldLogger, Apply
from trumania.core.random_generators import SequencialGenerator, FakerGenerator, NumpyRandomGenerator
from trumania.core.random_generators import ConstantDependentGenerator, ConstantGenerator

In [3]:
def generate_data():

    osn_dataset = circus.Circus(
        name="osn_dataset",
        master_seed=23423,
        start=pd.Timestamp("1 Jan 2022 00:00"),
        step_duration=pd.Timedelta("1h")) # define trumania circus

    person = osn_dataset.create_population(
        name="person", size=1000,
        ids_gen=SequencialGenerator(prefix="PERSON_")) # create person population of 1000 people

    person.create_attribute(
        "NAME",
        init_gen=FakerGenerator(method="name",
                                seed=next(osn_dataset.seeder))) # generate random names using faker module
    # assign popularity to users (this will help in generating the number of friends a user has - if they're more popular, they'll have more friends)
    person.create_attribute(
        "POPULARITY",
        init_gen=NumpyRandomGenerator(
            method="uniform", low=0, high=1, seed=next(osn_dataset.seeder))) 

    interaction = SequencialGenerator(prefix="INTERACTION_").generate(4)
    random_interaction_gen = NumpyRandomGenerator(method="choice", a=interaction,
                                           seed=next(osn_dataset.seeder))

    allowed_interaction = person.create_relationship(name="interaction")

    

    # INTERACTIONS ------------------

    # Add likes
    allowed_interaction.add_relations(from_ids=person.ids,
                                to_ids=random_interaction_gen.generate(person.size),
                                weights=0.6)

    # Add shares
    allowed_interaction.add_relations(from_ids=person.ids,
                                to_ids=random_interaction_gen.generate(person.size),
                                weights=0.15)

    # Add comments
    allowed_interaction.add_relations(from_ids=person.ids,
                                to_ids=random_interaction_gen.generate(person.size),
                                weights=0.15)
    # Add posts
    allowed_interaction.add_relations(from_ids=person.ids,
                                to_ids=random_interaction_gen.generate(person.size),
                                weights=0.10)

    # FRIENDS ------------------

    friends = person.create_relationship(name="friends")

    friends_df = pd.DataFrame.from_records(
        make_random_bipartite_data(
            person.ids,
            person.ids,
            p=0.33,  # probability for a node to be connected to
                      # another one : 5 friends on average = 5/1000
            seed=next(osn_dataset.seeder)),
        columns=["A", "B"])

    friends.add_relations(
        from_ids=friends_df["A"],
        to_ids=friends_df["B"])

    # STORIES ------------------

    hello_world = osn_dataset.create_story(
        name="hello_world",
        initiating_population=person,
        member_id_field="PERSON_ID",

        # after each story, reset the timer to 0, so that it will get
        # executed again at the next clock tick (next hour)
        timer_gen=ConstantDependentGenerator(value=0)
    )

    duration_gen = NumpyRandomGenerator(method="exponential", scale=60,
                                        seed=next(osn_dataset.seeder))

    hello_world.set_operations(
        person.ops.lookup(
            id_field="PERSON_ID",
            select={"NAME": "NAME"}
        ),

        ConstantGenerator(value="hello world").ops.generate(named_as="HELLO"),

        duration_gen.ops.generate(named_as="DURATION"),
        allowed_interaction.ops.select_one(from_field="PERSON_ID",
                                     named_as="INTERACTION"),

        friends.ops.select_one(
            from_field="PERSON_ID",
            named_as="COUNTERPART_ID",
            weight=person.get_attribute_values("POPULARITY"),
            # For people that do not have friends, it will try to find
            # the POPULARITY attribute of a None and crash miserably
            # Adding this flag will discard people that do not have friends
            discard_empty=True),

        person.ops.lookup(
            id_field="COUNTERPART_ID",
            select={"NAME": "COUNTER_PART_NAME"}
        ),

        osn_dataset.clock.ops.timestamp(named_as="TIME"),

        FakerGenerator(method="sentence", nb_words=10, variable_nb_words=True,
                                 seed=next(osn_dataset.seeder)).ops.generate(named_as="MESSAGE"),
                               
        FieldLogger(log_id="hello")
        
    )

    osn_dataset.run(
        duration=pd.Timedelta("48h"),
        log_output_folder="output/osn_dataset",
        delete_existing_logs=True
    )

    with open("output/osn_dataset/hello.csv") as f:
        print("Logged {} lines".format(len(f.readlines()) - 1))

In [None]:
generate_data()
data = pd.read_csv('output/osn_dataset/hello.csv')
data.head()



Logged 240 lines


Unnamed: 0,PERSON_ID,NAME,HELLO,DURATION,INTERACTION,COUNTERPART_ID,COUNTER_PART_NAME,TIME,MESSAGE
0,PERSON_0000000000,Matthew Chapman,hello world,14.507465,INTERACTION_0000000000,PERSON_0000000003,Bryce Finley,2022-01-01 00:59:44,Determine industry in establish move week succ...
1,PERSON_0000000001,Jennifer Martin,hello world,94.979314,INTERACTION_0000000001,PERSON_0000000000,Matthew Chapman,2022-01-01 00:35:58,On always than fight article never agent build...
2,PERSON_0000000002,Nicholas Ochoa,hello world,85.295803,INTERACTION_0000000001,PERSON_0000000003,Bryce Finley,2022-01-01 00:36:19,Five red though bank and memory participant if.
3,PERSON_0000000003,Bryce Finley,hello world,43.478226,INTERACTION_0000000001,PERSON_0000000004,Justin Barr,2022-01-01 00:28:40,Pressure nothing expect memory air fish miss c...
4,PERSON_0000000004,Justin Barr,hello world,62.491785,INTERACTION_0000000001,PERSON_0000000004,Justin Barr,2022-01-01 00:05:39,Realize chair recent management think near rest.
