In [None]:
# default_exp datasets.loaders

# Dataset loaders

Methods for downloading the manga covers from the MangaDex database.

In [None]:
# hide
from nbdev.showdoc import *

In [None]:
# export
import pandas as pd
from tqdm.auto import tqdm
from mangacover.datasets.internals.MangaDex import MangaDexClient


def _get_covers_for_all_tags(num_mangas=10):
    client = MangaDexClient("credentials.json")
    tags = client.get_manga_tags()
    mangas = [
        manga
        for _, tag_id in tqdm(tags.items())
        for manga in client.search_manga_tags_covers(total=20, includedTags=[tag_id])
    ]

    # Deduplicate mangas in list by mangaId
    seen = set()
    mangas = [
        seen.add(manga["mangaId"]) or manga
        for manga in mangas
        if manga["mangaId"] not in seen
    ]

    return pd.DataFrame(
        [
            {
                "mangaId": manga["mangaId"],
                "url": f'https://uploads.mangadex.org/covers/{manga["mangaId"]}/{filename}',
                "filename": f'{manga["mangaId"]}_{filename}',
                "tags": "|".join(manga["tags"]),
            }
            for manga in mangas
            for filename in manga["cover_art_filenames"]
        ]
    )

We use the MangaDexAPI (https://api.mangadex.org/docs.html) to get labels and covers references for the manga. We can use those to create our dataset. 

In [None]:
client = MangaDexClient("notebooks/credentials.json")

In [None]:
show_doc(MangaDexClient.get_manga_tags)

<h4 id="MangaDexClient.get_manga_tags" class="doc_header"><code>MangaDexClient.get_manga_tags</code><a href="__main__.py#L20" class="source_link" style="float:right">[source]</a></h4>

> <code>MangaDexClient.get_manga_tags</code>()

Returns a dict from human readable tag names to tag_ids for each tag in the mangadex database

Here's what it looks like:

In [None]:
tags = client.get_manga_tags()
tags["Fantasy"]

'cdc58593-87dd-415e-bbc0-2ec27bf404cc'

In [None]:
show_doc(MangaDexClient.search_manga_tags_covers)

<h4 id="MangaDexClient.search_manga_tags_covers" class="doc_header"><code>MangaDexClient.search_manga_tags_covers</code><a href="__main__.py#L34" class="source_link" style="float:right">[source]</a></h4>

> <code>MangaDexClient.search_manga_tags_covers</code>(**`total`**=*`None`*, **`limit`**=*`100`*, **`offset`**=*`0`*, **`includedTags`**=*`None`*, **`excludedTags`**=*`None`*)

Gets a list of manga with id, tags and cover image filenames

We can use this method to get covers from mangas that have (or don't) certain tags

In [None]:
mangas = client.search_manga_tags_covers(includedTags=["fantasy"])
mangas[:3]