## Testing Playground

You've probably noticed this project lacks a unit testing set-up. Honestly, I found unit testing tricky for this and felt visual inspections of outputs, like markdowns or visualizations, were generally more trustworthy.

But I get the worry of accidentally breaking things when contributing code. So, I made this notebook to help with that. It's a work in progress, aimed at letting you easily see specific outputs of interest for smoother development.

Previously, my basic testing meant using a `test.py` file to generate a few markdowns and then checking them manually. Or, for a deeper look, I'd run the `main.py` and wait a while to see everything, which isn't quick on my laptop.

This notebook aims to streamline that process, letting you test and inspect targeted parts of the output without the fear of breaking things.

**Before you begin, it's recommended that you put the `conversations.json` file close by, like in a `./data/` folder.**

In [None]:
import json
from pathlib import Path
from typing import Callable, List

import controllers.file_system as fs
from models.conversation import Conversation
from models.conversation_set import ConversationSet

# Paths
conversations_path: Path = Path("data") / "conversations.json"  # adjust path if needed
output_path = Path("output")
output_path.mkdir(exist_ok=True)

# Load conversations
with open(file=conversations_path, mode="r", encoding="utf-8") as f:
    conversations = json.load(fp=f)

conversation_set = ConversationSet(conversations=conversations)

In [None]:
def clear_output() -> None:
    """Clear output folder"""
    for file in output_path.glob(pattern="*"):
        file.unlink()

In [None]:
clear_output()  # run this whenever you want to clear the output folder

## Markdown

In [None]:
# function type
AttrFunc = Callable[[Conversation], int]


# Utility function to get statistics and print conversations based on a criteria
def get_top_convos(attr_func: AttrFunc, description: str, count: int = 5) -> None:
    """Get statistics and save top conversations based on a criteria"""

    stats: List[int] = [attr_func(c) for c in conversation_set.conversation_list]
    avg_stat: float = sum(stats) / len(stats)
    median_stat: int = sorted(stats)[len(stats) // 2]
    max_stat: int = max(stats)

    print(
        f"Average {description}: {avg_stat}\n"
        f"Median {description}: {median_stat}\n"
        f"Max {description}: {max_stat}\n"
    )

    convos_sorted_by_attr: list[Conversation] = sorted(
        conversation_set.conversation_list, key=attr_func, reverse=True
    )

    for convo in convos_sorted_by_attr[:count]:
        print(
            f"id: {convo.id}\n"
            f"title: {convo.title}\n"
            f"{description}: {attr_func(convo)}\n"
        )
        file_path: Path = output_path / f"{convo.sanitized_title()}.md"
        fs.save_conversation_to_file(conversation=convo, filepath=file_path)
        print(f"saved to '{file_path.resolve()}'\n")

In [None]:
get_top_convos(attr_func=lambda c: c.leaf_count(), description="leaf count")

In [None]:
get_top_convos(attr_func=lambda c: c.message_count(), description="message count")

In [None]:
get_top_convos(
    attr_func=lambda c: len(c.content_types()), description="content type count"
)

In [None]:
get_top_convos(attr_func=lambda c: len(c.used_plugins()), description="plugin count")

## Data Visualization

### Word Clouds

In [None]:
from datetime import datetime
from random import choice

from utils.utils import get_colormap_names, get_font_names

weeks_dict: dict[datetime, ConversationSet] = conversation_set.grouped_by_week()

week: datetime = choice(seq=list(weeks_dict.keys()))

sample_conv_set: ConversationSet = weeks_dict[week]

font_name: str = choice(seq=get_font_names())

font_path: str = f"assets/fonts/{font_name}.ttf"

colormap: str = choice(seq=get_colormap_names())


fs.save_wordcloud_from_conversation_set(
    conv_set=sample_conv_set,
    dir_path=output_path,
    time_period=(week, "week"),
    font_path=font_path,
    colormap=colormap,
)

print(
    f"font: {font_name}\n"
    f"colormap: {colormap}\n"
    f"week: {week.strftime('%Y week %W')}\n"
    f"saved to '{output_path.resolve()}'\n"
)

### Graphs

In [None]:
import random
import time
from collections import defaultdict
from datetime import datetime, timedelta

import matplotlib.pyplot as plt


def generate_random_timestamps(start_date: str, num_days: int) -> list[float]:
    """
    Generates a list of random timestamps starting from the given date and spanning the specified number of days.

    Parameters:
    - start_date: Starting date in the format 'YYYY-MM-DD'.
    - num_days: Number of days to span.

    Returns:
    List of timestamps as floats.
    """

    current_date: datetime = datetime.strptime(start_date, "%Y-%m-%d")
    end_date: datetime = current_date + timedelta(days=num_days)

    timestamps: list[float] = []

    while current_date < end_date:
        # Generate a random number of timestamps for this day (e.g., between 1 to 10)
        for _ in range(random.randint(a=1, b=10)):
            # Generate a random second of the day (0 to 86399, which is 24*60*60 - 1)
            random_second: int = random.randint(a=0, b=86399)
            random_time: datetime = current_date + timedelta(seconds=random_second)
            timestamps.append(time.mktime(random_time.timetuple()))
        current_date += timedelta(days=1)

    return timestamps


def create_weekwise_timeseries_graph(timestamps: list[float]) -> None:
    """
    Creates a week-wise timeseries graph from a list of timestamps.

    Parameters:
    - timestamps: List of timestamps as floats.

    Returns:
    None. Displays the plot.
    """

    dates: list[datetime] = [datetime.fromtimestamp(ts) for ts in timestamps]

    weekday_counts: defaultdict[str, int] = defaultdict(int)
    days: list[str] = [
        "Monday",
        "Tuesday",
        "Wednesday",
        "Thursday",
        "Friday",
        "Saturday",
        "Sunday",
    ]

    for date in dates:
        weekday_counts[days[date.weekday()]] += 1

    x: list[str] = days
    y: list[int] = [weekday_counts[day] for day in days]

    plt.bar(x=x, height=y)  # type: ignore
    plt.xlabel(xlabel="Day of the Week")  # type: ignore
    plt.ylabel(ylabel="Frequency")  # type: ignore
    plt.title(label="Week-wise Frequency of Timestamps")  # type: ignore
    plt.xticks(rotation=45)  # type: ignore
    plt.tight_layout()  # type: ignore
    plt.show()  # type: ignore


timestamps: list[float] = generate_random_timestamps(
    start_date="2023-01-01", num_days=60
)


create_weekwise_timeseries_graph(timestamps=timestamps)

In [None]:
def create_monthwise_timeseries_graph(timestamps: list[float]) -> None:
    """
    Creates a month-wise timeseries graph from a list of timestamps.

    Parameters:
    - timestamps: List of timestamps as floats.

    Returns:
    None. Displays the plot.
    """

    dates: list[datetime] = [datetime.fromtimestamp(ts) for ts in timestamps]

    month_counts: defaultdict[str, int] = defaultdict(int)
    months: list[str] = [
        "January",
        "February",
        "March",
        "April",
        "May",
        "June",
        "July",
        "August",
        "September",
        "October",
        "November",
        "December",
    ]

    for date in dates:
        month_counts[months[date.month - 1]] += 1

    x: list[str] = months
    y: list[int] = [month_counts[month] for month in months]

    plt.bar(x=x, height=y)  # type: ignore
    plt.xlabel(xlabel="Month")  # type: ignore
    plt.ylabel(ylabel="Frequency")  # type: ignore
    plt.title(label="Month-wise Frequency of Timestamps")  # type: ignore
    plt.xticks(rotation=45)  # type: ignore
    plt.tight_layout()  # type: ignore
    plt.show()  # type: ignore


timestamps: list[float] = generate_random_timestamps(
    start_date="2023-01-01", num_days=365
)

create_monthwise_timeseries_graph(timestamps=timestamps)