In [1]:
import pandas as pd
import random
from datetime import datetime, timedelta

# Config

In [2]:
FREQ = "D" # resample freq: "D": daily, "MS": month start

# Generate data

In [3]:
# set up parameters
start_date = datetime(2023, 1, 1, 0, 0, 0)
end_date = datetime(2023, 4, 1, 0, 0, 0)
channels = [10112, 20112, 30112]
users = [6513, 6003, 7777, 9999, 1211]
n_items = 5000

# generate list of dictionaries
hist_dict = []
for i in range(n_items):
    # generate random date and time
    rand_time = start_date + timedelta(seconds=random.randint(0, int((end_date - start_date).total_seconds())))
    # generate random channel, user, and type
    channel = random.choice(channels)
    user = random.choice(users)
    # type = random.choice(types)
    reactions = random.randint(0, 6)
    # create dictionary
    item_dict = {"date_time": rand_time.isoformat(), "channel_id": channel, "author_id": user, "reactions": reactions}
    # append to list
    hist_dict.append(item_dict)

# Convert to df

In [4]:
df = pd.DataFrame(data=hist_dict)
df["date_time"] = pd.to_datetime(df["date_time"])
df = df.set_index("date_time")

df.head()

Unnamed: 0_level_0,channel_id,author_id,reactions
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-03-10 15:57:25,30112,9999,5
2023-03-22 11:37:08,30112,6513,4
2023-03-22 15:13:41,10112,1211,0
2023-03-02 17:00:32,20112,6003,4
2023-03-01 13:53:23,10112,9999,1


# Summarise counts

In [5]:
# summary of counts for every channel and user per time sample
summary_df = df.groupby(by=["channel_id", "author_id"], as_index=True)\
    .resample(rule=FREQ)\
    .agg(
        messages=("reactions","count"), 
        reactions=("reactions","sum")
        )\
    .reset_index()

summary_df.head()

Unnamed: 0,channel_id,author_id,date_time,messages,reactions
0,10112,1211,2023-01-01,6,15
1,10112,1211,2023-01-02,4,10
2,10112,1211,2023-01-03,5,15
3,10112,1211,2023-01-04,3,14
4,10112,1211,2023-01-05,4,7


In [6]:
# total count as per time sample
out_df = summary_df.groupby(by="date_time", as_index=True)[["reactions", "messages"]].sum()
out_df["unique_authors"] = summary_df.groupby(by="date_time", as_index=True)["author_id"].nunique()
out_df

Unnamed: 0_level_0,reactions,messages,unique_authors
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,142,48,5
2023-01-02,152,52,5
2023-01-03,189,60,5
2023-01-04,159,46,5
2023-01-05,142,51,5
...,...,...,...
2023-03-27,216,68,5
2023-03-28,144,51,5
2023-03-29,165,52,5
2023-03-30,192,61,5
