# Make Web Site

In [1]:
import sys, os, io, datetime
import json
import random
import requests
import zipfile
import glob
import re
import io
import shutil
import pandas as pd
from slugify import slugify
from tqdm.notebook import tqdm
from html import escape
from collections import defaultdict

In [2]:
import corpi
import cdli
import oracc
import languages

In [3]:
import importlib

In [4]:
pd.set_option("display.max_columns", None)

In [5]:
supported_langs = set(["akk", "sux"])

In [6]:
wwwroot = os.path.abspath("../dist")
os.makedirs(wwwroot, exist_ok=True)
wwwroot

'/Users/fak/Dropbox/Projects/CuneiformTranslators/dist'

## CDLI

In [7]:
importlib.reload(cdli)
importlib.reload(oracc)
importlib.reload(corpi)

<module 'corpi' from '/Users/fak/Dropbox/Projects/CuneiformTranslators/tools/corpi.py'>

In [8]:
print(f"Loading CDLI...")
cdli_corpus = corpi.CDLI()

Loading CDLI...


## ORACC

In [9]:
print(f"Loading Oracc...")
oracc_dir="/Volumes/FrankDisk/oracc_zips"
oracc_corpus = corpi.ORACC(oracc_dir=oracc_dir, tqdm=tqdm)

Loading Oracc...


## Merge

In [10]:
all_pubs = corpi.merge_corpus_pubs(
    [
        ("oracc", oracc_corpus.oracc_pubs.values()),
        ("cdli", cdli_corpus.cdli_pubs.values())
    ], supported_langs)
print(f"{len(all_pubs):,} unique publications")

132,155 unique publications


In [11]:
num_translations = len([x for x in all_pubs.values() if x.has_translations()])
print(f"{num_translations:,} human translations:")

9,444 human translations:


## Get the ML Translations

In [12]:
translations_zip_path = "../data/ml_translations.zip"
with open(translations_zip_path, "rb") as f:
    with zipfile.ZipFile(f) as zf:
        json_name = [n for n in zf.namelist() if n.endswith(".json")][0]
        translations = json.loads(str(zf.read(json_name), "utf-8"))
for k in translations.keys():
    if "_to_" in k:
        print(k, len(translations[k]))

akk_to_en 195312
sux_to_en 458557


## Find Publications that have ML Translations

In [13]:
just_dots_and_spaces = re.compile(r"^[ .]+$")

def translation_is_good(tgt):
    tgt = tgt.strip()
    if len(tgt) == 0:
        return False
    return not just_dots_and_spaces.match(tgt)


In [14]:
tgt_lang = "en"

translated_pubs = []

for pub in tqdm(list(all_pubs.values())):
    has_new_translations = False
    has_ml_translations = False
    st_key = f"{pub.language}_to_{tgt_lang}"
    if st_key not in translations:
        continue
    st_translations = translations[st_key]
    for a in pub.text_areas:
        if pub.corpus == "cdli" and len(a.lines) > 0 and len(a.paragraphs) == 0:
            a.lines_to_paragraphs(pub.language, tgt_lang)
        paras = a.paragraphs_to_lines(lang=pub.language, corpus_id=pub.corpus)
        for i, plines in enumerate(paras):
            p = a.paragraphs[i]
            p.languages["ml_"+tgt_lang] = ""
            head = ""
            for si,ei,s in plines:
                s = s.strip()
                if len(s) > 1:
                    has_lines = True
                    if s in st_translations:
                        t = st_translations[s].strip()
                        if translation_is_good(t) > 0:
                            has_new_translations = has_new_translations or (tgt_lang not in p.languages)
                            p.languages["ml_"+tgt_lang] += head + st_translations[s]
                            head = " "
                    else:
                        # has_ml_translations = False
                        # raise Exception(f"pub {pub.id} has no translation ({st_key}) for: {s}")
                        pass
            p.languages["ml_"+tgt_lang] = languages.remove_suffix_repeats(p.languages["ml_"+tgt_lang].strip()).strip()
            has_ml_translations = has_ml_translations or len(p.languages["ml_"+tgt_lang]) > 0
    pub.has_new_translations = has_new_translations
    pub.has_ml_translations = has_ml_translations
    if has_ml_translations:
        translated_pubs.append(pub.id)
    
newly_translated_pubs = [x for x in all_pubs.values() if x.has_new_translations]
ml_translated_pubs = [x for x in all_pubs.values() if x.has_ml_translations]
print(f"{len(ml_translated_pubs):,} ml translated pubs")
print(f"{len(newly_translated_pubs):,} newly translated pubs")

  0%|          | 0/132155 [00:00<?, ?it/s]

129,705 ml translated pubs
115,306 newly translated pubs


In [15]:
len([x for x in translated_pubs if all_pubs[x].language == "sux"]), "sux"

(99295, 'sux')

In [16]:
newly_translated_pubs[0]

Publication('P468981', 'akk', [TextArea('tablet', [], []), TextArea('obverse', [TextLine('1.', '[a-na {d}]marduk#? kab#-tu szit-ra-hu {d}en-lil2 _dingir-mesz_ sza2-qu-u2', {}), TextLine('2.', '[e-li _dingir]-mesz#_ a-szir# _dingir-mesz_ ka-la-me mu-kil mar-kas {d}i2-gi3-gi3', {}), TextLine('3.', "[u3 {d}a]-nun#-na-ki mu-ma-'e-er _dingir_ ku-na _lugal szu2 an_-e u3 _ki_-tim", {}), TextLine('4.', 'sza2#? a#?-na#? zik#-ri-szu2 _dingir-mesz gal-mesz_ pal#-hisz u2-taq-qu-u2 qi2-bit-su', {}), TextLine('5.', 'szah-tu2 la-a-nu szi-i-hu sza2 ina _zu-ab_ ir-bu-u2 bal-ti szur-ru-hu', {}), TextLine('6.', "mi3-na-a-ta szu-tu-ru s,u-ub-bu-u2 nab-ni-ti le-e'-um", {}), TextLine('7.', "le-e'-u2-tu mu-du-u2 ka-la-me la-mid t,e3-em _zu-ab_", {}), TextLine('8.', 'a-hi-iz pi-risz-ti lal3-gar _en_ babila2{ki}', {}), TextLine('9.', 'a-szib e2-sag-il2 _en gal_-u2 _en szu2 {disz}an-szar2-e-tel-li-dingir-mesz_', {}), TextLine('10.', '_lugal szu2 lugal kur_ an-szar2{ki} _{gesz}banszur {gesz}mes-ma2-kan-na_', {})

## Filter out publications with no translations

In [17]:
def keep_pub(pub):
    if not pub.has_ml_translations:
        return False
    return pub.is_translated("ml_en")

all_pubs = {k:v for k,v in all_pubs.items() if v.has_ml_translations}
print(f"{len(all_pubs):,} publications to output")

129,705 publications to output


In [18]:
# Group publications by the first 4 characters of pub.id to create a directory structure
all_pubs_by_dir = defaultdict(list)
for p in all_pubs.values():
    all_pubs_by_dir[p.id[:4].lower()].append(p)
print(f"{len(all_pubs_by_dir):,} publication directories")


376 publication directories


## Data Dimensions

In [19]:
browser_dimensions = [
#     ("new", lambda p: ["new" if p.has_new_translations else "old"]),
#     ("language", lambda p: [p.language]),
    ("object_type", lambda p: [cdli.get_object_type(p.object_type)]),
    ("genre", lambda p: cdli.get_genres(p.genre)),
    ("period", lambda p: [cdli.period_slug_from_period[x] for x in cdli.get_periods(p.period)]),
]

## HTML Components

## HTML Pages

In [20]:
def get_file_path(site_path):
    return f"{wwwroot}{site_path}"

def get_page_file_path(site_path):
    return f"{get_file_path(site_path)}.html"

def get_json_file_path(site_path):
    return f"{get_file_path(site_path)}.json"
    

In [21]:
def header(paths_and_titles, f):
    title = paths_and_titles[-1][1]
    f.write(f"<!DOCTYPE html>\n")
    f.write(f"<html>\n<head>\n")
    f.write(f"<meta charset='utf-8'>\n")
    f.write(f"<title>{escape(title)}</title>\n")
    f.write(f"<meta name='viewport' content='width=device-width, initial-scale=1'>\n")
    f.write(f"<link rel='stylesheet' href='/main.css'>\n")
    f.write(f"</head>\n")
    f.write(f"<body>\n")
    f.write(f"<header><h1 id='page-title'>")
    f.write(f"<a href='/'>AICC</a>")
    for i, (path, title) in enumerate(paths_and_titles):
        f.write(f" / ")
        if i < len(paths_and_titles) - 1:
            abs_path = "/" + "/".join([x[0] for x in paths_and_titles[:i+1]])
            f.write(f"<a href='{abs_path}/'>{escape(title)}</a>")
        else:
            f.write(f"{escape(title)}")
    f.write(f"</h1>\n")
    f.write(f"<form id='search-form' action='/search.html' method='get'>\n")
    f.write(f"<input type='text' id='search-input' name='q' autocomplete='off' autocapitalize='off' autocorrect='off' spellcheck='false' placeholder='Search...'>\n")
    f.write(f"</form>\n")
    f.write(f"</header>\n")
    f.write(f"<div class='content'>\n")
    
def footer(f, script=None):
    f.write(f"</div>\n")
    f.write(f"<footer>\n")
    f.write(f"<p class='otitle'>Code by&nbsp;<a class='external' href='https://github.com/praeclarum/CuneiformTranslators'>praeclarum</a> — AI&nbsp;Translations by <a class='external' href='https://huggingface.co/praeclarum/cuneiform'>praeclarum/cuneiform</a></p>\n")
    f.write(f"</footer>\n")
    f.write(f"<script src='/main.js'></script>\n")
    if script is not None:
        f.write(f"<script>{script}</script>\n")
    f.write(f"</body>\n</html>")

In [22]:
def start_page(paths_and_titles):
    paths = [x[0] for x in paths_and_titles]
    path = "/" + "/".join(paths)
    file_path = get_page_file_path(path)
    file_dir = os.path.dirname(file_path)
    os.makedirs(file_dir, exist_ok=True)
    f = open(file_path, "wt")
#     print(f"Writing {path} at {file_path}")
    header(paths_and_titles, f)
    return f

def end_page(f):
    footer(f)

In [23]:
def start_json(path):
    file_path = get_json_file_path(path)
    file_dir = os.path.dirname(file_path)
    os.makedirs(file_dir, exist_ok=True)
    f = open(file_path, "wt")
    return f

def end_json(f):
    pass

### Publication Index Pages

In [24]:
data_links = [
    ("Oracc - Open Richly Annotated Cuneiform Corpus", "http://oracc.museum.upenn.edu"),
    ("CDLI - Cuneiform Digital Library Initiative", "https://cdli.ucla.edu"),
    ("ETCSL - Electronic Text Corpus of Sumerian Literature", "https://etcsl.orinst.ox.ac.uk"),
]

In [25]:
language_sort = {
    "akk": 0,
#     "akkts": 1,
#     "elx": 2,
#     "elxts": 3,
    "sux": 4,
#     "suxts": 5,
    "ml_en": 100,
    "en": 1000,
#     "fr": 1001,
}

bad_translators = {"uncertain", "NaN", "no translation", "", "check"}

In [26]:
def paragraphs_to_html(a, paragraphs, lang, corpus):
    html = []
    for pi, plines in enumerate(paragraphs):
        p = a.paragraphs[pi]
        text = ""
        if lang in p.languages:
            text = p.languages[lang]
            if not lang.startswith("ml_"):
                text = languages.prep_tgt_for_nn(text, lang, corpus)
        else:
            text = " ".join(x for (_, _, x) in plines)
        tag = p.tag
        html.append(f"<{tag}>")
        line_index = plines[0][0] if len(plines) > 0 else 0
        html.append(f"<span class='line line-{line_index}'>{escape(text)}</span>\n")
        html.append(f"</{tag}>\n")
    return "".join(html)

def title_case(str):
    if len(str) == 0:
        return str
    if len(str) == 1:
        return str.upper()
    return str[0].upper() + str[1:]

def output_pub(p, f):
    if p.id in ["P229313"]:
        print(cdli.pub_to_json(p))
        return
    pdir = p.id.lower()[:4]
    f.write(f"<h1 class='otitle'><a href='/p/{pdir}.html#{p.id}'>{p.id}</a>: {' and '.join(cdli.get_genres(p.genre))} {cdli.get_object_type(p.object_type)}</h1>\n")
    src_a = ""
    if p.corpus == "cdli":
        src_a = f"<a class='external' href='https://cdli.ucla.edu/search/archival_view.php?ObjectID={p.id}'>CDLI</a>"
    else:
        src_a = f"<a class='external' href='{p.src_url}'>Oracc</a>"
    f.write(f"<p class='otitle'>{p.period} {src_a}</p>\n")
    areas_with_paras = [x for x in p.text_areas if len(x.lines) > 0 and len(x.paragraphs) > 0]
    for a in areas_with_paras:
        f.write(f"<section class='textarea'>\n")
        if len(areas_with_paras) > 1:
            f.write(f"<h1>{escape(title_case(a.name))}</h1>\n")
        f.write(f"<div class='translations-container'>\n")
        paragraphs = a.paragraphs_to_lines(p.language, corpus_id=p.corpus)
        texts = {p.language: paragraphs_to_html(a, paragraphs, p.language, p.corpus)}
        langs = set()
        for para in a.paragraphs:
            for lang in para.languages:
                if lang in language_sort:
                    langs.add(lang)
        for lang in langs:
            texts[lang] = paragraphs_to_html(a, paragraphs, lang, p.corpus)
        langs.add(p.language)
#             f.write(f"<p><pre>{escape(repr(paragraphs))}</pre></p> ")
#             if "akkts" in langs and "akk" in langs:
#                 langs.remove("akk")
#             if "suxts" in langs and "sux" in langs:
#                 langs.remove("sux")
        langs = sorted(list(langs), key=lambda x:language_sort[x])
        for lang in langs:
            f.write(f"<div class='lang-{lang} text'>\n")
            translator = "AI Translation" if lang.startswith("ml_") else (languages.all_languages[lang])
            if lang == tgt_lang:
                if p.translation_source is not None and p.translation_source not in bad_translators:
                    translator = escape(p.translation_source)
                else:
                    translator = "Human"
            f.write(f"<div class='langid'>{translator}</div>\n")
            f.write(texts[lang])
            f.write(f"</div>\n")
        f.write(f"</div></section>\n")

In [27]:
with start_page([("p/index", "Publications")]) as f:
    pdirs = sorted(list(all_pubs_by_dir.keys()))
    groups = defaultdict(list)
    for pdir in pdirs:
        groups[pdir[:2]].append(pdir)
    f.write(f"<div class='publications-list-container'>\n")
    for gid, gpdirs in tqdm(groups.items()):
        first_dir = gpdirs[0]
        first_pub = sorted(all_pubs_by_dir[first_dir], key=lambda p:p.id)[0]
        last_dir = gpdirs[-1]
        last_pub = sorted(all_pubs_by_dir[last_dir], key=lambda p:p.id)[-1]
        f.write(f"<section id='{gid}' class='publications-list'>\n")
        f.write(f"<h1>{first_pub.id} - {last_pub.id}</h1>\n")
        f.write(f"<ul>\n")
        for pdir in gpdirs:
            pubs = sorted(all_pubs_by_dir[pdir], key=lambda p:p.id)
            f.write(f"<li><a href='/p/{pdir}.html'>{pubs[0].id} - {pubs[-1].id}</a></li>\n")
        f.write(f"</ul>\n")
        f.write(f"</section>\n")
    f.write(f"</div>\n")
    end_page(f)


  0%|          | 0/12 [00:00<?, ?it/s]

In [28]:
for pdir in tqdm(sorted(list(all_pubs_by_dir.keys()))):
    pubs = sorted(all_pubs_by_dir[pdir], key=lambda p:p.id)
    with start_page([("p", "Publications"), (pdir, pdir)]) as f:
        f.write(f"<div class='pubs-container'>\n")
        for p in pubs:
            f.write(f"<section id='{p.id}' class='pub'>\n")
            output_pub(p, f)
            f.write(f"</section>\n")
        f.write(f"</div>\n")
        end_page(f)
    with start_json(f"/p/{pdir}") as f:
        f.write("{\n")
        head = ""
        for p in pubs:
            pjson = {}
            phtml = io.StringIO()
            output_pub(p, phtml)
            pjson["html"] = phtml.getvalue()
            pjson = json.dumps(pjson)
            f.write(f"{head}\"{p.id}\": {pjson}")
            head = ",\n"
        f.write("\n}\n")
        end_json(f)

  0%|          | 0/376 [00:00<?, ?it/s]

In [29]:
os.makedirs(f"{wwwroot}/fonts", exist_ok=True)
for font in glob.glob("../fonts/*.woff"):
    shutil.copy2(font, f"{wwwroot}/fonts/{os.path.basename(font)}")

In [30]:
with start_page([("404", "Not Found")]) as f:
    f.write(f"<p>The page you are looking for is not here.</p>\n")
    footer(f)


In [31]:
shutil.copy("../web/translator.html", f"{wwwroot}/translator.html")
shutil.copy("../web/main.css", f"{wwwroot}/main.css")
shutil.copy("../web/main.js", f"{wwwroot}/main.js")

'/Users/fak/Dropbox/Projects/CuneiformTranslators/dist/main.js'

In [32]:
with start_page([("search", "Search")]) as f:
    f.write(f"<div id='search'></div>\n")
    script = """
    // get the q query parameter
    const q = new URLSearchParams(window.location.search).get('q');
    (async function() {
        const s = new PublicationSearch(document.getElementById('search'), document.getElementById('search-input'), q);
        if (q) {
            await s.searchAsync(q, true);
        }
    })();"""
    footer(f, script=script)



In [33]:
letter_number_re = re.compile(r"^[a-z][0-9]")

In [34]:
en_index = defaultdict(set)
en_histogram = defaultdict(int)
en_pub_histogram = defaultdict(lambda: defaultdict(int))
for p in tqdm(all_pubs.values()):
    for a in p.text_areas:
        for para in a.paragraphs:
            if "ml_en" in para.languages:
                text = para.languages["ml_en"]
                words = text.split()
                for word in words:
                    w = word.lower().replace("\"", "").replace("'", "").replace(".", "").replace(",", "").replace(";", "").replace(":", "").replace("?", "").replace("!", "").replace("”", "").replace("“", "").replace("(", "").replace(")", "").replace("[", "").replace("]", "").replace("{", "").replace("}", "").replace("’", "").replace("‘", "").replace("…", "").replace("+", "").replace("$", "").replace("*", "").replace("/", "").replace("\\", "").replace("=", "").replace(">", "").replace("<", "").replace("|", "")
                    w = w.strip("-").strip("_").strip()
                    is_number = len(w) > 1 and w[0] in "0123456789" or (letter_number_re.match(w) is not None)
                    if not is_number and len(w) > 1 and len(w) < 64 and w not in languages.en_index_ignore_words:
                        en_index[w].add(p.id)
                        en_histogram[w] += 1
                        en_pub_histogram[w][p.id] += 1
print(len(en_index), "words in index")

  0%|          | 0/129705 [00:00<?, ?it/s]

157477 words in index


In [35]:
print(len(en_histogram), "histogram words")
# Get the most common words
# en_common = sorted(list(en_histogram.items()), key=lambda x:-x[1])[:100]
# for w, c in en_common:
#     print(w, c)

157477 histogram words


In [36]:
# Group en_index by first letter
en_index_by_letter = defaultdict(dict)
for word in en_index:
    en_index_by_letter[word[0]][word] = en_index[word]

In [37]:
def output_index(lang):
    with start_page([(f"{lang}_index/index", f"{languages.all_languages[lang]} Index")]) as f:
        f.write(f"<div class='text-container'><div>\n")
        f.write(f"<p>{len(en_index):,} Indexed Words</p>\n")
        f.write(f"<p>\n")
        for letter in sorted(list(en_index_by_letter.keys())):
            f.write(f"<span><a href='#{letter}'>{letter}</a></span>\n")
        f.write(f"</p>\n")
        f.write(f"</div></div>\n")
        f.write(f"<div class='browsedims-container'>\n")
        all_two_letters = defaultdict(list)
        for letter in sorted(list(en_index_by_letter.keys())):
            two_letters = defaultdict(list)
            words = sorted(list(en_index_by_letter[letter].keys()))
            for word in words:
                two_letters[word[:2]].append(word)
                all_two_letters[word[:2]].append(word)
            f.write(f"<div>\n")
            f.write(f"<h2 id='{letter}'>{escape(words[0])} - {escape(words[-1])}</h2>\n")
            f.write(f"<ul>\n")
            for prefix in sorted(list(two_letters.keys())):
                twords = sorted(list(two_letters[prefix]))
                f.write(f"<li><a href='/en_index/{prefix}.html'>{escape(twords[0])} - {escape(twords[-1])}</a></li>\n")
            f.write(f"</ul>\n")
            f.write(f"</div>\n")
        f.write(f"</div>\n")
        script = """
        """
        footer(f, script=script)
    return all_two_letters
two_letters = output_index("en")

In [38]:
def output_index_json(lang="en"):
    for prefix in tqdm(sorted(list(two_letters.keys()))):
        words = sorted(two_letters[prefix])
        title = f"{words[0]} - {words[-1]}"
        with start_page([("en_index", f"{languages.all_languages[lang]} Index"), (prefix, title)]) as f:
            f.write(f"<div class='browsedims-container'>\n")
            f.write(f"<ul>\n")
            for word in sorted(list(two_letters[prefix])):
                f.write(f"<li><a href='/search.html?q={word}'>{word}</a> ({len(en_index[word]):,})</li>\n")
            f.write(f"</ul>\n")
            f.write(f"</div>\n")
            footer(f)
        with start_json(f"/en_index/{prefix}") as f:
            words = two_letters[prefix]
            word_json = {word: [(p, en_pub_histogram[word][p]) for p in en_index[word]] for word in words}
            json.dump(word_json, f, indent=0)
output_index_json()

  0%|          | 0/424 [00:00<?, ?it/s]

In [39]:
def output_browser(paths_and_titles, pubs, ignore_dims, f, include_browser=True):
    next_pages = []
    if len(pubs) == 0:
        return next_pages
    f.write(f"<div id='browser'></div>\n")
    f.write(f"<nav class='browsedims-container'>\n")
    for dname, dselect in browser_dimensions:
        if dname in ignore_dims:
            continue
        vgroups = defaultdict(lambda: [])
        for p in pubs:
            for v in dselect(p):
                vgroups[v].append(p)
        if len(vgroups) < 2:
            continue
        f.write(f"<section>\n")
        f.write(f"<h1>{escape(dname)}</h1>\n")    
        for gv in vgroups.keys():
            gpubs = vgroups[gv]
            if len(gpubs) > 0:
                next_pages.append((dname, gv, gpubs))
                f.write(f"<a href='{gv}/'>{len(gpubs):,}&nbsp;{escape(gv)}</a>\n")
        f.write(f"</section>\n")
    f.write(f"</nav>\n")
    if include_browser:
        f.write(f"<script>\n")
        f.write(f"const publicationIds = {json.dumps(sorted([p.id for p in pubs]))};\n")
        f.write(f"</script>\n")
    # f.write(f"<section>\n")
    # max_on_page = 200
    # if len(pubs) <= max_on_page or len(next_pages) == 0:
    #     for p in pubs:
    #         output_pub(p, f)        
    # else:
    #     f.write(f"<p>{len(pubs):,} publications. Narrow the list to less than {max_on_page:,} by choosing links above.</p>\n")
    # f.write(f"</section>\n")
    footer(f, script="makePublicationBrowser(document.getElementById('browser'), publicationIds);" if include_browser else None)
    return next_pages

def output_browser_page(parent_paths_and_titles, dim_value, pubs, ignore_dims):
    paths_and_titles = parent_paths_and_titles + [(dim_value + "/index", dim_value)]
    with start_page(paths_and_titles) as f:
        next_pages = output_browser(paths_and_titles, pubs, ignore_dims, f)
    paths_and_titles = parent_paths_and_titles + [(dim_value, dim_value)]
    for gk, gv, gpubs in next_pages:
        ignores = set(ignore_dims)
        ignores.add(gk)
        output_browser_page(paths_and_titles, gv, gpubs, ignores)


In [40]:
importlib.reload(cdli)
print("Writing /")
with start_page([("index", "AI Cuneiform Corpus")]) as f:
    f.write(f"<div class='text-container'>")
    f.write(f"<section>\n")
    f.write(f"<p>The Largest Online Corpus of Translated Cuneiform Texts</p>\n")
    # f.write(f"<a href='/translator'>Online Translator!</a>")
#     pubs = [p for p in output_pubs if p.id>393000 and p.id<394000]
    by_lang = defaultdict(lambda: [])
    for p in all_pubs.values():
        by_lang[p.language].append(p)
    next_pages = []
    f.write(f"<nav>\n")
    f.write(f"<ul>\n")
    f.write(f"<li><a href='/p/'>{len(all_pubs):,} Translated Cuneiform Publications</a></li>\n")
    for lang in sorted(list(by_lang.keys())):
        gpubs = by_lang[lang]
        # f.write(f"<li><a href='browse.html?q={lang}'>{len(gpubs):,} {escape(lang)}</a></li>\n")
        f.write(f"<li><a href='/{lang}/'>{len(gpubs):,} from {escape(languages.all_languages[lang])}</a></li>\n")
        next_pages.append(("language", lang, gpubs))
    f.write(f"<li><a href='/en_index/'>English Index</a></li>\n")
    f.write(f"</ul>\n")
    f.write(f"</nav>\n")
    f.write(f"</section>\n")
    f.write(f"</div>")
    next_pages.extend(output_browser("", list(all_pubs.values()), "", f, include_browser=False))
    
for gk, gv, gpubs in tqdm(next_pages):
    output_browser_page([], gv, gpubs, set([gk]))
    pass
    
for f in glob.glob("../dist/*"):
    print(f)

Writing /


  0%|          | 0/80 [00:00<?, ?it/s]

../dist/eponym-chronicle
../dist/akk
../dist/omen
../dist/extispicy-query
../dist/priestly-letter
../dist/other-genre
../dist/tablet
../dist/appointment
../dist/uruk-iii
../dist/index.html
../dist/lexical
../dist/uncertain
../dist/parthian
../dist/treaty
../dist/lexical-mathematical
../dist/incantation-ritual
../dist/hellenistic
../dist/hemerological
../dist/royal-inscription
../dist/scholarly-letter
../dist/ur-iii
../dist/barrel
../dist/private-votive
../dist/astrological
../dist/votive-donation
../dist/scholarly
../dist/old-akkadian
../dist/prism
../dist/404.html
../dist/cone
../dist/ed-i-ii
../dist/letter
../dist/other-period
../dist/astronomical-diary
../dist/main.css
../dist/early-neo-babylonian
../dist/en_index
../dist/old-babylonian
../dist/ed-iiib
../dist/seal
../dist/vase
../dist/other-object
../dist/neo-babylonian
../dist/astronomical
../dist/royal-ritual
../dist/gift
../dist/early-old-babylonian
../dist/bulla
../dist/neo-assyrian
../dist/decree
../dist/old-assyrian
../dist/m