In [None]:
# default_exp datasets.loaders

# Dataset loaders

Methods for downloading the manga covers from the MangaDex database.

In [None]:
# hide
from nbdev.showdoc import *

In [None]:
# export
from PIL import Image, UnidentifiedImageError
from pathlib import Path
from tqdm.auto import tqdm
from mangacover.datasets.internals.MangaDex import get_covers_for_all_tags

def create_multiclass_tag_manga_dataset(path):
    """Saves a dataset for the multiclassification problem of deriving the tags associated with
    a manga by its cover, in `path`.
    """
    manga_df = get_covers_for_all_tags()
    folder = Path(f'{path}')
    folder.mkdir(parents=True, exist_ok=True)
    manga_df.to_csv(folder/'dataset.csv')
    for manga in tqdm(list(manga_df.itertuples())):
        my_file = folder/manga.filename
        if my_file.exists():
            continue
        try:
            im = Image.open(requests.get(manga.url, stream=True).raw)
            im.save(folder/manga.filename)
        except UnidentifiedImageError:
            print("Warning: Couldn't identify image file " + manga.filename + ". Skipping.")

In [None]:
show_doc(create_multiclass_tag_manga_dataset)

<h4 id="create_multiclass_tag_manga_dataset" class="doc_header"><code>create_multiclass_tag_manga_dataset</code><a href="__main__.py#L7" class="source_link" style="float:right">[source]</a></h4>

> <code>create_multiclass_tag_manga_dataset</code>(**`path`**)

Saves a dataset for the multiclassification problem of deriving the tags associated with
a manga by its cover, in `path`.

Let's create the dataset in the `data/` folder

In [None]:
create_multiclass_tag_manga_dataset('data/')

'cdc58593-87dd-415e-bbc0-2ec27bf404cc'

In [None]:
show_doc(MangaDexClient.search_manga_tags_covers)

<h4 id="MangaDexClient.search_manga_tags_covers" class="doc_header"><code>MangaDexClient.search_manga_tags_covers</code><a href="__main__.py#L34" class="source_link" style="float:right">[source]</a></h4>

> <code>MangaDexClient.search_manga_tags_covers</code>(**`total`**=*`None`*, **`limit`**=*`100`*, **`offset`**=*`0`*, **`includedTags`**=*`None`*, **`excludedTags`**=*`None`*)

Gets a list of manga with id, tags and cover image filenames

We can use this method to get covers from mangas that have (or don't) certain tags

In [None]:
mangas = client.search_manga_tags_covers(includedTags=["fantasy"])
mangas[:3]