# Entries Per Data Source

This notebook reports the number of entries per data source.

In [1]:
from copy import deepcopy
from typing import List

import pandas as pd
from IPython.display import display, Markdown

import sys

sys.path.append("../")

from builders import build_visualizations
from _loader import load_zipped_json


def get_path_all(path_data_source: str) -> str:
    return f"{path_data_source}/output/metadata-processed/all.zip"


def get_path_vis(path_data_source: str) -> str:
    return f"{path_data_source}/output/metadata-processed/visualizations.zip"


def get_markdown_data(data_sources: List) -> List:
    """
    Edit the input data sources to save information
    for generating a markdown table.
    """

    data_sources = deepcopy(data_sources)

    for d in data_sources:
        path_vis = get_path_vis(d["pathDataSource"])
        processed_metadata_vis = build_visualizations(load_zipped_json(path_vis))
        d["#oldvis"] = len(processed_metadata_vis)

        path_all = get_path_all(d["pathDataSource"])
        processed_metadata_all = load_zipped_json(path_all)
        d["#all"] = len(processed_metadata_all)

        d["name"] = f"[{d['name']}]({d['url']})"
        del d["url"]

        path_data_dir = f"{d['pathDataSource']}/output/metadata-processed/"
        d["data files"] = f"[link]({path_data_dir})"
        del d["pathDataSource"]

    data_sources.append(
        {
            "name": "Total",
            "#oldvis": sum([d["#oldvis"] for d in data_sources]),
            "#all": sum([d["#all"] for d in data_sources]),
            "data files": "/",
        }
    )

    return data_sources


def print_markdown_table(data_sources: List) -> None:
    data_sources = get_markdown_data(data_sources)
    df = pd.DataFrame(data_sources)
    display(Markdown(df.to_markdown(index=False)))


path_to_data_sources = "../../data-sources"
data_sources = [
    {
        "name": "Alabama Maps",
        "url": "http://alabamamaps.ua.edu/historicalmaps/",
        "pathDataSource": f"{path_to_data_sources}/alabama-maps",
    },
    {
        "name": "British Library Collection Items",
        "url": "https://www.bl.uk/collection-items",
        "pathDataSource": f"{path_to_data_sources}/british-library-collection-items",
    },
    {
        "name": "British Library Images Online",
        "url": "https://imagesonline.bl.uk/",
        "pathDataSource": f"{path_to_data_sources}/british-library-images-online",
    },
    {
        "name": "David Rumsey Map Collection",
        "url": "https://www.davidrumsey.com/",
        "pathDataSource": f"{path_to_data_sources}/david-rumsey-map-collection",
    },
    {
        "name": "Gallica",
        "url": "https://gallica.bnf.fr/",
        "pathDataSource": f"{path_to_data_sources}/gallica",
    },
    {
        "name": "Internet Archive",
        "url": "https://archive.org/",
        "pathDataSource": f"{path_to_data_sources}/internet-archive",
    },
    {
        "name": "Library of Congress",
        "url": "https://www.loc.gov/",
        "pathDataSource": f"{path_to_data_sources}/library-of-congress",
    },
    {
        "name": "Telefact",
        "url": "https://modley-telefact-1939-1945.tumblr.com/",
        "pathDataSource": f"{path_to_data_sources}/telefact",
    },
]

## Result

In [2]:
print_markdown_table(data_sources)

| name                                                                   |   #oldvis |   #all | data files                                                                             |
|:-----------------------------------------------------------------------|----------:|-------:|:---------------------------------------------------------------------------------------|
| [Alabama Maps](http://alabamamaps.ua.edu/historicalmaps/)              |        51 |    142 | [link](../../data-sources/alabama-maps/output/metadata-processed/)                     |
| [British Library Collection Items](https://www.bl.uk/collection-items) |        94 |  32687 | [link](../../data-sources/british-library-collection-items/output/metadata-processed/) |
| [British Library Images Online](https://imagesonline.bl.uk/)           |        38 |   6647 | [link](../../data-sources/british-library-images-online/output/metadata-processed/)    |
| [David Rumsey Map Collection](https://www.davidrumsey.com/)            |      7816 |  25581 | [link](../../data-sources/david-rumsey-map-collection/output/metadata-processed/)      |
| [Gallica](https://gallica.bnf.fr/)                                     |      2090 | 195644 | [link](../../data-sources/gallica/output/metadata-processed/)                          |
| [Internet Archive](https://archive.org/)                               |      2985 |  82389 | [link](../../data-sources/internet-archive/output/metadata-processed/)                 |
| [Library of Congress](https://www.loc.gov/)                            |       212 |  63370 | [link](../../data-sources/library-of-congress/output/metadata-processed/)              |
| [Telefact](https://modley-telefact-1939-1945.tumblr.com/)              |       225 |    264 | [link](../../data-sources/telefact/output/metadata-processed/)                         |
| Total                                                                  |     13511 | 406724 | /                                                                                      |