In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.text import Text
import wordcloud
import squarify

In [None]:
df = (
    # Only use file row from multindex pivot table
    pd.read_csv("./../../output/Jerma985/plots/vods_emote_count_data_raw.csv", header=[1])
    # Drop other header rows
    .dropna()
    .rename(columns={"file": "word"})
    .set_index("word")
)

In [None]:
df

In [None]:
id_vod_name = "1742420368_We are all remembering so much right now"
id, vod_name = id_vod_name.split("_", maxsplit=1)
word_freq = df.loc[df[id_vod_name] != 0.0, id_vod_name].to_dict()

In [None]:
word_freq

In [None]:
width, height = 1200, 600

In [None]:
emotecloud = wordcloud.WordCloud(width=width, height=height).generate_from_frequencies(word_freq)

In [None]:
plt.title(f"Emote Cloud ({vod_name})")
plt.axis("off")
plt.tight_layout(pad = 0)
plt.imshow(emotecloud)

In [None]:
import os, sys
import base64
from typing import Dict, Iterable, Optional

def label_b64_images(
    dir: str, ignore_fnames: Optional[Iterable[str]] = None, keep_ext: Optional[Iterable[str]] = None
) -> Dict[str, str]:
    # Set defaults
    if ignore_fnames is None:
        ignore_fnames = []

    if keep_ext is None:
        keep_ext = [".png", ".gif"]

    fname_to_path = {}
    for full_fname in os.listdir(dir):
        full_fname_no_ext, ext = os.path.splitext(full_fname)
        # Ignore extensions.
        if ext not in keep_ext:
            continue

        # Names are base64 encode so filesafe.
        if full_fname in ignore_fnames:
            continue

        fname = os.path.basename(full_fname_no_ext)
        try:
            decoded_fname = base64.b64decode(fname).decode()
        except Exception as err:
            sys.stderr.write(f"Cannot b64decode {fname}: {err}")
            continue
        fname_to_path[decoded_fname] = os.path.join(dir, full_fname)

    return fname_to_path

In [None]:
all_emotes = {
    **label_b64_images("./../../output/Jerma985/emotes/bttv/"),
    **label_b64_images("./../../output/Jerma985/emotes/twitch/"),
    **label_b64_images("./../../output/all/emotes/bttv/"),
    **label_b64_images("./../../output/all/emotes/twitch/"),
}

In [None]:
import numpy as np
from PIL import Image
from matplotlib.offsetbox import AnnotationBbox, OffsetImage

In [None]:
x = 0.
y = 0.
width = 800.
height = 400.
# Normalize sizes to width and height of plot
values = squarify.normalize_sizes(word_freq.values(), width, height)
# Create emote to rect mapping.
rects = dict(zip(word_freq.keys(), squarify.squarify(values, x=x, y=y, dx=width, dy=height)))

In [None]:
plt.clf()
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(111)

fig = squarify.plot(sizes=values, label=word_freq.keys(), ax=ax, bar_kwargs={"edgecolor": "black"})
plt.title(f"Emote Treemap ({vod_name})")
plt.axis("off")
plt.tight_layout(pad = 0)

for child in fig.get_children():
    if isinstance(child, Text):
        emote_name = child.get_text()
        # If found child element.
        if image_path := all_emotes.get(emote_name):
            # Set text to blank and fill with image annotation.
            child.set_text("")
            img = Image.open(image_path).convert('RGBA')

            img_dim = dict(zip(["x", "y"], img.getbbox()[2:]))

            rect_dim = rects[emote_name]
            rect_x_dim, rect_y_dim = rect_dim["dx"], rect_dim["dy"]
            min_dim = "x" if rect_x_dim == min(rect_x_dim, rect_y_dim) else "y"

            img_rect_dim_ratio = img_dim[min_dim] / rect_dim["d" + min_dim]

            new_img_dim = (
                int(img_dim["x"] / img_rect_dim_ratio),
                int(img_dim["y"] / img_rect_dim_ratio)
            )

            # Resize image.
            img = img.resize(size=new_img_dim)

            im = OffsetImage(img)
            im.image.axes = fig

            ab = AnnotationBbox(im, xy=child.get_position(), xycoords="data", pad=0, frameon=False)
            fig.add_artist(ab)

In [None]:
plt.show()