In [None]:
import requests
import json
import re
import pandas as pd
import os
from os.path import isfile
from collections import namedtuple
import csv

In [None]:
emoji_patt = re.compile(r"^(?:a_?)?(n[aei]o(?:.+)|blobbee|grphn|floof|drgn|vlpn|sylveon|ying|niodog|thio|myno|wvrn)")

In [None]:
Instance = namedtuple('Instance', ['name', 'url', 'software'])

In [None]:
def download_emoji_file(server):
    if isfile(f"emoji_files/{server.name}.json"):
        print(f"File already exists for {server.name}")
        return

    match(server.software):
        case "akkoma":
            request_url = f"https://{server.url}/api/v1/pleroma/emoji"
        case "misskey":
            request_url = f"https://{server.url}/api/emojis"
        case _:
            request_url = f"https://{server.url}/api/v1/custom_emojis"

    with requests.get(request_url) as request:
        if request.ok:
            with open(f"emoji_files/{server.name}.json", "wb") as out_file:
                out_file.write(request.content)
            print(f"Wrote file for {server.name}")
        else:
            print(f"Couldn't read {server.name} at {request_url}")

In [None]:
def read_emoji(server):
    data = {}
    with open(f"emoji_files/{server.name}.json", encoding="utf8") as f:
        j = json.load(f)
        match(server.software):
            case "akkoma":
                for key, content in j.items():
                    if emoji_patt.match(key):
                        data[re.sub("_256$", "", key.lower())] = {f"{server.name}": content["image_url"]}
            case "misskey":
                for content in j["emojis"]:
                    if emoji_patt.match(content["name"]):
                        data[re.sub("_256$", "", content["name"].lower())] = {f"{server.name}": content["url"]}
            case _:
                for content in j:
                    if emoji_patt.match(content["shortcode"]):
                        data[re.sub("_256$", "", content["shortcode"].lower())] = {
                            f"{server.name}": content["static_url"]
                        }
        return data

In [None]:
def add_instance_to_df(left_df, right_instance):
    right = read_emoji(right_instance)
    right_df = pd.DataFrame.from_dict(right, orient="index")
    df = left_df.merge(right_df, left_index=True, right_index=True, how="outer", sort=True)
    return df

In [None]:
def html_img_formatter(url):
    return f"<img src=\"{url}\" width=\"48\" />"

In [None]:
def write_report(file_name, dataframe):
    html = """<html>
    <head>
    <style>
        #df_data thead th {
            height: 140px;
            white-space: nowrap;
            padding-bottom: 0;
            padding-top: 100px;
        }
        
        #df_data thead th div {
            transform: rotate(270deg);
            width: 32px;
        }
        
        #df_data thead th div span {
            border-bottom: 1px solid #ccc;
            padding: 5px 10px;
        }
        #df_data tbody tr th {
            position: sticky;
            left: 0;
            background: #FFF;
            text-align: left;
            max-width: 150px;
            overflow-wrap: break-word;
        }
        #df_data > thead th {
            position: sticky;
            top: 0;
            background: #FFF;
        }
    </style>
    </head>
    <body>
    """
    html += dataframe.to_html(
        na_rep="", 
        escape=False,
        justify="left",
        columns=col_order,
        table_id="df_data",
        formatters={c: html_img_formatter for c in df.columns.to_list()[1:]}
    )
    html += """
    </body>
    """
    html = html.replace('<th>', '<th><div><span>')
    html = html.replace('</th>', '</span></div></th>')

    with open(f"./reports/{file_name}.html", "w") as out_file:
        out_file.write(html)

In [None]:
instances = {}
with open("sample_instances.csv") as in_file:
    instance_reader = csv.reader(in_file)
    for name, url, software, enabled in instance_reader:
        if enabled == "True":
            instances[name] = Instance(name, url, software)

In [None]:
for name, inst in instances.items():
    download_emoji_file(inst)

In [None]:
instance_iter = iter(instances.values())
first = read_emoji(next(instance_iter))
df = pd.DataFrame.from_dict(first, orient="index")
for instance in instance_iter:
    try:
        df = add_instance_to_df(df, instance)
    except KeyError:
        print(f"Key error in {instance.name}")
    except UnicodeDecodeError:
        print(f"Unicode error in {instance.name}")

In [None]:
df["instance_count"] = df.notna().sum(axis=1)
df = df[["instance_count", *df.columns.to_list()[:-1]]]

In [None]:
print(df[df["instance_count"]==1].notna().sum()[1:].sort_values(ascending=False))
col_order = df[df["instance_count"]==1].notna().sum()[1:].sort_values(ascending=False).index.to_list()

In [None]:
download_list = {}
for in_file in os.scandir("emoji"):
    if in_file.is_file:
        name = in_file.name.rsplit(".", 1)[0]
        if name in df.index:
            download_list[name] = True
df["downloaded"] = False
df.update(pd.DataFrame.from_dict(download_list, orient="index", columns=["downloaded", ]))

In [None]:
write_report("new_2024-09-20", df[df["downloaded"]==False].sort_index())