# Visualizing the logs

In [None]:
import os
import csv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json, ast
import matplotlib.dates as mdates
from dotenv import load_dotenv
load_dotenv()
from utils import get_data, get_json_from_env

kaffe_file = os.getenv("KAFFE_LOG_FILE")
lunsj_file = os.getenv("LUNSJ_LOG_FILE")
bordtennis_file = os.getenv("BORDTENNIS_LOG_FILE")
konge_file = os.getenv("KONGE_LOG_FILE")
monark_file = os.getenv("MONARK_LOG_FILE")
wordle_file = os.getenv("WORDLE_LOG_FILE")
MAP_ID_NAME = get_json_from_env("MAP_ID_NAME")  # {user_id: user_name}


In [None]:
def plot_timeseries_logs(filename):
    print(f"Plot of {filename}")
    data = get_data(filename)
    plot_title = filename.split("/")[1].split("_")[0] # given that we have the format logs/kaffe_logs.csv
    
    plt.style.use('_mpl-gallery')
    
    df_plot = pd.DataFrame({
           "timestamp": pd.to_datetime(list(data["timestamp"])),
           "duration": pd.to_numeric(list(data["duration"]), errors="coerce")
    }).dropna().sort_values("timestamp")

    df_plot = df_plot.set_index("timestamp")
    
    df_daily = df_plot.resample('D')['duration'].sum()
    
    x = df_daily.index
    y = df_daily / 60
    
    fig, ax = plt.subplots(figsize=(10, 4))
    ax.plot(x, y, linewidth=2)
    
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m-%d"))
    fig.autofmt_xdate()
    ax.set_xlabel("timestamp")
    ax.set_ylabel("duration in minutes")
    ax.set_title(f"Duration of {plot_title} breaks")
    
    plt.show()

    
def plot_leader_logs(filename, category="konge"): # konge or monark
    if category not in ["monark", "konge", "wordle"]:
        raise ValueError(f"Category is not found. You wrote {category}")
    print(f"Plot of {filename}")
    data = get_data(filename)

    plt.style.use('_mpl-gallery')
    fig, ax = plt.subplots(figsize=(10,4))

    if category == "konge" or category == "monark":
        id_counts = pd.Series(data[category]).value_counts() # count ids
        names = [MAP_ID_NAME.get(str(id), id) for id in id_counts.index] # convert names
        counts = id_counts.values
        colors = ["C0"]
        
    elif category == "wordle":
        stats_data = []
        for i, stat in enumerate(data["stats"]): # convert to a dataframe for easier handling
            name_id = stat.split("-")[0].strip()
            score = int(stat.split("-")[1].strip().split("/")[0])
            timestamp = pd.to_datetime(data['timestamp'][i])
            stats_data.append({"name_id": name_id, "score": score, "date": timestamp.date()})

        df = pd.DataFrame(stats_data)

        all_dates = pd.date_range(df['date'].min(), df['date'].max(), freq='D').date # get all dates to fill in missing values
        all_users = df['name_id'].unique()

        user_dates = df.groupby('name_id')['date'].apply(set)
        user_scores = df.groupby('name_id')['score'].sum()

        scores = {}
        for user in all_users:
            base_score = user_scores[user]
            missing_days = len(all_dates) - len(user_dates[user])
            scores[user] = base_score + (missing_days * 7)
        
        names = [MAP_ID_NAME.get(str(id), id) for id in scores.keys()]
        counts = list(scores.values())
        colors = plt.cm.RdYlGn_r(counts / max(counts))
        ax.invert_yaxis()
        
    df_plot = pd.DataFrame({
           category: names,
           "count": counts
    })

    x = df_plot[category]
    y = df_plot["count"]

    ax.bar(x, y, linewidth=20, color=colors)

    ax.set_xlabel(category)
    ax.set_ylabel("score")
    ax.set_title(f"Count of {category}")
    
    plt.show()


In [None]:
plot_timeseries_logs(kaffe_file)

In [None]:
plot_timeseries_logs(lunsj_file)

In [None]:
plot_timeseries_logs(bordtennis_file)

In [None]:
plot_leader_logs(konge_file)

In [None]:
plot_leader_logs(monark_file, "monark")

In [None]:
plot_leader_logs(wordle_file, "wordle")