# Create an overview

In [None]:
import pathlib

from tabulate import tabulate

from typing import List
from typing import Dict
from typing import Union

In [None]:
# utility

def fname_to_html(fname: Union[pathlib.Path, None]):
    """
    Takes a file name and produces a nice
    little html snippet to be put in a table cell.

    :param fname: Either None or image file name
    """
    if fname is None:
        return '-'

    fmt = '<h3>{title}</h3><a href="{href}"><img src="{src}" /></a>'

    src = str(pathlib.Path('.').joinpath(*fname.parts[-3:]))
    href = '-'

    return fmt.format(src=src, href=href, title=fname.stem)


def fname_to_li(fname: str):
    path = pathlib.Path(fname)
    href = pathlib.Path('.').joinpath(path.parts[-2], path.parts[-1])
    print('>>> ', fname)
    return '<li><a href="{}">{}</a></li>'.format(href, path.stem)


def expand(desired: List[str], keys: List[str]):
    """
    Takes a list of desired files. If the file name ends
    with an asterisk it is expanded and three files are selected
    (the first, one in the middle and the last).

    :param desired: List of files to be included
    :param keys: List of available files

    """

    selection = []

    for target in desired:
        # select three from sets
        if target.endswith('*'):
            candidates = [key for key in keys if key.startswith(target[:-1])]
            if not len(candidates):
                selection += [None, None, None]
                continue

            candidates.sort(key=lambda x: int(x.split('_')[-1]))

            selection += [
                candidates[0],
                candidates[len(candidates)//2],
                candidates[-1]]

        else:
            selection.append(target)

    return selection

In [None]:
def get_images(gen):
    """
    Creates a data structure containing the following mapping:
    embedding -> experiment series -> experiment -> filename

    :param gen: pathlib glob

    """
     # this holds all images, categorized by embedding type and experiment name
    data: Dict['embedding', Dict['experiment', Dict['name', pathlib.Path]]] = {}

    for count, img in enumerate(gen):
        n_emb, n_exp = img.parts[-3].split('-')
        n_file = img.stem
        data.setdefault(n_emb, {}).setdefault(n_exp, {})[n_file] = img
        count += 1

    if len(data) == 0:
        return None

    print('found {} images'.format(count))
    return data


def enumerate_series(folder: pathlib.Path, data: Dict['embedding', 'exp']):
    """
    Opens experiments/<EMBEDDING>.html and offers
    the corresponding data structure

    :param folder: experiments/<SERIES>
    :param data: mapping from embedding -> experiment series
    """
    items = data.items()
    assert len(items), 'no items in database'

    for n_emb, series in items:
        with (folder / (n_emb + '.html')).open(mode='w') as fd:
            yield fd, series


def transform_to_col(desired: List[str], exp: Dict['exp', Dict['name', 'path']]):
    """
    Takes the data structure and an enumeration of desired files and
    produces a list containing all data to be put in a column of the
    final table.

    :param desired: enumeration of desired files
    :param exp: mapping of experiment -> file name -> path

    """

    header = sorted(exp.keys())
    cols = []

    for experiment in header:
        selection = exp[experiment]
        imgs = expand(desired, selection.keys())
        cols.append([fname_to_html(selection.get(d, None)) for d in imgs])

    return header, cols


def create_files(folder, data):
    """
    Creates all <EMBEDDING>.html files in folder.

    :param folder: folder with experiments
    :param data: data structure produced by get_images

    """

    with open('overview.template.html', mode='r') as fd:
        template = fd.read()

    files = []
    for fd, exp in enumerate_series(folder, data):
        print('handling', fd.name)

        desired = (
            'loss', 'loss-training', 'loss-validation',
            'encoder-activation-train*',
            'codebooks*',
            'norms*', )

        header, cols = transform_to_col(desired, exp)

        title = '{}: {}'.format(folder.stem, pathlib.Path(fd.name).stem)
        thead = ['<h2>{}</h2>'.format(h) for h in header]
        table = tabulate(zip(*cols), headers=thead, tablefmt='html')

        html = template.format(title=title, content=table)

        print('writing...')
        fd.write(html)
        files.append(fd.name)

    return files

In [None]:
basepath = pathlib.Path('../opt/experiments')
content = []

for folder in basepath.glob('*'):
    print('\ncreating files for', folder.stem)

    gen = folder.glob('**/images/*.png')
    data = get_images(gen)

    with open('overview.template.html', mode='r') as fd:
        template = fd.read()

    ul = '<ul>{}</ul>'

    if data is not None:
        files = create_files(folder, data)

        sublist = '\n'.join([fname_to_li(fname) for fname in files])
        content += ['<li><h2>{}</h2></li>'.format(folder.stem)]
        content += ['<li><ul>{}</ul></li>'.format(sublist)]

content = ul.format('\n'.join(content))
with (basepath / 'overview.html').open(mode='w') as fd:
    fd.write(template.format(content=content, title='Overview'))

print('done')