In [1]:
import pandas as pd
import random
from datetime import datetime, timedelta
from src.utils import HistoricalDiscordMessage, hist_msg_list_to_pandas_df, summarise_counts_by_group_and_freq, summarise_counts

# Config

In [2]:
FREQ = "D" # resample freq: "D": daily, "MS": month start

# Generate data

In [3]:
# set up parameters
start_date = datetime(2023, 1, 1, 0, 0, 0)
end_date = datetime(2023, 4, 1, 0, 0, 0)
channels = [10112, 20112, 30112]
users = [6513, 6003, 7777, 9999, 1211]
n_items = 5000

# generate list of dictionaries
hist_list = []
for i in range(n_items):
    # generate random date and time
    rand_time = start_date + timedelta(seconds=random.randint(0, int((end_date - start_date).total_seconds())))
    # generate random channel, user, and type
    channel = random.choice(channels)
    user = random.choice(users)
    # type = random.choice(types)
    reactions = random.randint(0, 6)
    # create dictionary
    historical_msg = HistoricalDiscordMessage(
        date_time=rand_time.isoformat(),
        channel_id=channel,
        author_id=user,
        reactions=reactions
    )
    # append to list
    hist_list.append(historical_msg)

# Convert to df

In [4]:
df = hist_msg_list_to_pandas_df(hist_list=hist_list)
df.head()

Unnamed: 0_level_0,channel_id,author_id,reactions
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-18 18:00:58,10112,1211,0
2023-02-26 06:00:41,10112,6513,0
2023-03-24 18:02:46,10112,1211,3
2023-01-11 03:21:59,10112,6003,0
2023-01-19 05:16:22,30112,1211,2


# Summarise counts

In [5]:
# summary of counts for every channel and user per time sample
summary_df = summarise_counts_by_group_and_freq(
    df=df, 
    groups=["channel_id", "author_id"], 
    freq=FREQ
)

summary_df.head()

Unnamed: 0,channel_id,author_id,date_time,messages,reactions
0,10112,1211,2023-01-01,5,6
1,10112,1211,2023-01-02,5,15
2,10112,1211,2023-01-03,2,5
3,10112,1211,2023-01-04,4,11
4,10112,1211,2023-01-05,3,5


In [6]:
# total count as per time sample
out_df = summarise_counts(summary_df=summary_df)
out_df

Unnamed: 0_level_0,reactions,messages,unique_authors
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,102,42,5
2023-01-02,157,52,5
2023-01-03,157,50,5
2023-01-04,131,50,5
2023-01-05,175,60,5
...,...,...,...
2023-03-27,147,51,5
2023-03-28,137,48,5
2023-03-29,148,44,5
2023-03-30,107,39,5
