# Events JSON

In [1]:
import json
import pandas as pd

# load the JSON file
with open("events.json", "r") as file:
    data = json.load(file)

# convert the JSON data to a pandas DataFrame
df = pd.DataFrame(data)
df["event_timestamp"] = pd.to_datetime(df["event_timestamp"])
df.head()

Unnamed: 0,event_timestamp,event_name,event_id,user_id
0,2025-01-01 12:59:14,sent_sms,239fb2b1-3ab6-49de-9366-101f1afb2440,user_35hunkWFwnLsFYYUDQkyvY
1,2025-01-01 12:01:58,received_sms,60ae6ced-9a6c-4233-8767-c3b39d87e481,user_3nRTJEgWmfrUpLpqHD1uHU
2,2025-01-01 12:49:08,sent_sms,d55791a8-5948-4482-8aef-ac5772770c82,user_0XbqvlVfdv93F8TxbrVEJA
3,2025-01-01 12:33:56,sent_sms,1a1292d7-29ef-4a16-bcd9-50a45e0d824b,user_3HJe1WrMkDJuqvNjhVD1Z8
4,2025-01-01 12:45:11,sent_sms,0fe2b6de-bcb7-4119-adba-570a1bf4cbf7,user_03s6umNw2Kox6QgJTBPVtq


In [7]:
# filter for sent_sms events and group by minute
sent = df[df["event_name"] == "sent_sms"].copy()
sent["minute"] = sent["event_timestamp"].dt.floor("min")
display(sent.head())

# count number of sent_sms events per minute
counts = sent.groupby("minute").size().reset_index(name="count")
display(counts.head())

Unnamed: 0,event_timestamp,event_name,event_id,user_id,minute
0,2025-01-01 12:59:14,sent_sms,239fb2b1-3ab6-49de-9366-101f1afb2440,user_35hunkWFwnLsFYYUDQkyvY,2025-01-01 12:59:00
2,2025-01-01 12:49:08,sent_sms,d55791a8-5948-4482-8aef-ac5772770c82,user_0XbqvlVfdv93F8TxbrVEJA,2025-01-01 12:49:00
3,2025-01-01 12:33:56,sent_sms,1a1292d7-29ef-4a16-bcd9-50a45e0d824b,user_3HJe1WrMkDJuqvNjhVD1Z8,2025-01-01 12:33:00
4,2025-01-01 12:45:11,sent_sms,0fe2b6de-bcb7-4119-adba-570a1bf4cbf7,user_03s6umNw2Kox6QgJTBPVtq,2025-01-01 12:45:00
5,2025-01-01 12:01:02,sent_sms,0273d2e7-ad92-4970-a710-7d4365907ae7,user_3vB0gdISiVexQpbZwc2VF5,2025-01-01 12:01:00


Unnamed: 0,minute,count
0,2025-01-01 12:00:00,15
1,2025-01-01 12:01:00,14
2,2025-01-01 12:02:00,13
3,2025-01-01 12:03:00,15
4,2025-01-01 12:04:00,12


In [8]:
# peaks > 30 per minute
peaks = counts[counts["count"] > 30].sort_values("minute")
display(peaks)

Unnamed: 0,minute,count
12,2025-01-01 12:12:00,67
19,2025-01-01 12:19:00,108
29,2025-01-01 12:29:00,111
43,2025-01-01 12:43:00,115
52,2025-01-01 12:52:00,101


In [13]:
# print values in chronological order
print(peaks["count"].tolist())

[67, 108, 111, 115, 101]
