In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from loguru import logger
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
processed = Path("../data/processed")
datafile = processed / "whatsapp-20240122-182706.parq"
if not datafile.exists():
    logger.warning("Datafile does not exist. First run src/preprocess.py, and check the timestamp!")

In [None]:
df = pd.read_parquet(datafile)
df

Let's extract some more info from the timestamp:

In [None]:
df["date"] = df["timestamp"].dt.date
df["isoweek"] = df["timestamp"].dt.isocalendar().week

Now, we can group by the isoweeks, for example. 
Let's reindex in order to fill the missing weeks.

In [None]:
p = df.groupby("isoweek").count()
p = p.reindex(range(int(p.index.min()), int(p.index.max()) + 1), fill_value=0)
sns.scatterplot(data=p, x="isoweek", y="timestamp")
sns.lineplot(data=p, x="isoweek", y="timestamp")

Try to play with the colors. Make sure adding colors conveys a message, and isnt randomly adding colors.

In [None]:
p = df.groupby(["author", "isoweek"]).count()
sns.scatterplot(data=p, x="isoweek", y="timestamp", hue="author")
sns.lineplot(data=p, x="isoweek", y="timestamp", legend=False)