In [None]:
import numpy as np
import pandas as pd
from matplotlib import ticker
import matplotlib.pyplot as plt
from scipy.signal import find_peaks

In [None]:
count_frequency_data = "./../../output/Jerma985/plots/vods_chat_frequency.csv"

In [None]:
df = pd.read_csv(count_frequency_data)
df

In [None]:
# Assign peak.
df["is_peak"] = False

In [None]:
def tick_lbl_setter(tick_val: np.float64, df: pd.DataFrame):
    try:
        idx = int(tick_val)
        ts = df.iloc[idx]["timestamp"].split(" ")[1]
    except (ValueError, IndexError):
        ts = "null"
    return ts

# Clear plots
plt.clf()

# Number of vods.
vods = df["name"].unique()

SUBPLOT_HEIGHT = 7.5
SUBPLOTS_PER_COL = 4

req_n_rows = max(1, int(len(vods) / SUBPLOTS_PER_COL))
fig_height = req_n_rows * SUBPLOT_HEIGHT
# Aspect ratio of 2.0.
fig = plt.figure(figsize=(fig_height * 2, fig_height))
plt.title("Chat Peak Frequencies", fontsize=10 * req_n_rows, y=1.02)

subplots = []
for i, vod in enumerate(vods, 1):
    df_vod = (
        df.loc[df["name"] == vod].
        reset_index(drop=True)
        .drop(columns=["name"])
    )
    # Reset index on vod_df
    subplot: plt.Axes = fig.add_subplot(req_n_rows, 4, i)

    unique_patterns = df_vod["desc"].unique()
    # Get unique colors for each pattern
    colors = plt.cm.jet(np.linspace(0, 1,len(unique_patterns)))

    line_plots = []
    for pattern, color in zip(unique_patterns, colors):
        df_pattern_cnts = df_vod.loc[df_vod["desc"] == pattern].reset_index(drop=True)

        # Set prominence of peak as difference of max and median counts.
        # https://en.wikipedia.org/wiki/Topographic_prominence
        prominence_threshold = df_pattern_cnts["counts"].max() - df_pattern_cnts["counts"].quantile(0.9)

        peaks, prominences = find_peaks(df_pattern_cnts["counts"], prominence=prominence_threshold)

        subplot.plot(peaks, df_pattern_cnts.loc[peaks]["counts"], color=color, marker='o', linestyle="None")
        line_plot, = subplot.plot(df_pattern_cnts.index, df_pattern_cnts["counts"], color=color)
        line_plots.append(line_plot)

        # Label coordinates
        for x, y in zip(peaks, df_pattern_cnts.iloc[peaks]["counts"]):
            timestamp = df_vod.iloc[x]["timestamp"]
            # Set original peak to True.
            df.loc[(df["timestamp"] == timestamp) & (df["desc"] == pattern) & (df["counts"] == y), "is_peak"] = True

            timestamp = timestamp.split(" ")[1]
            plt.annotate(
                xy=(x, y), text=timestamp, color=color, xytext=(0, 5), textcoords="offset points"
            )

        label_formatter = ticker.FuncFormatter(
            lambda x, _: tick_lbl_setter(x, df_pattern_cnts)
        )
        subplot.xaxis.set_major_formatter(label_formatter)

    subplot.set_title(vod)
    subplot.legend(
        line_plots,
        unique_patterns,
        loc='upper left',
        bbox_to_anchor=(1.02, 1),
    )

plt.show()