In [None]:
import requests
import json

In [None]:
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

# Login

## Login flow

In [None]:
response = requests.post(
    'https://api.mangadex.org/auth/login',
    json={
        'username': '',
        'password': '',
    }
)
content = json.loads(response.content)
refresh_token = content['token']['refresh']
session_token = content['token']['session']
content

## Checar validade do refresh token

In [None]:
response = requests.post(
    'https://api.mangadex.org/auth/check',
    json={
        'token': refresh_token,
    },
    headers = {
        'Authorization': f'Bearer {session_token}'
    },
)
response.text

## Gerar um novo refresh token

In [None]:
response = requests.post(
    'https://api.mangadex.org/auth/refresh',
    json={
        'token': refresh_token,
    },
    headers = {
        'Authorization': f'Bearer {session_token}'
    },
)
content = json.loads(response.content)
refresh_token = content['token']['refresh']
session_token = content['token']['session']
content

# Pegar covers de mangas isekai

## Buscar por mangas de isekai

In [None]:
response = requests.get(
    'https://api.mangadex.org/manga/tag',
    headers = {
        'Authorization': f'Bearer {session_token}',
    },
)
content = json.loads(response.content)
tags = [(item['data']['id'], item['data']['attributes']['name']['en']) for item in content]

In [None]:
def search_manga(session_token,
                 total=None,
                 limit=100,
                 offset=0,
                 includedTags=None,
                 excludedTags=None):
    contents = []
    while total is None or offset < total:
        response = requests.get(
            'https://api.mangadex.org/manga',
            params={
                'limit': min(limit, total-offset),
                'offset': offset,
                'includedTags[]': includedTags,
                'excludedTags[]': excludedTags,
                'includes[]': "cover_art"
            },
            headers = {
                'Authorization': f'Bearer {session_token}',
            },
        )
        content = json.loads(response.content)
        if not total:
            total = content['total']
        contents.append(content)
        offset += limit

    return [
        {
            "mangaId": result['data']['id'],
            "tags": [
                     tag['attributes']['name']['en']
                     for tag in result['data']['attributes']['tags']
            ],
            "cover_art_filenames": [
                       relationship['attributes']['fileName']
                       for relationship in result['relationships']
                       if relationship['type'] == "cover_art"
            ]
        }
        for content in contents
        for result in content['results']]

mangas = [
    manga
    for tag_id, _ in tags
    for manga in search_manga(session_token,
                              total=20,
                              offset=10,
                              includedTags=[tag_id])]
mangas[:5]

In [None]:
seen = set()
mangas = [seen.add(manga['mangaId']) or manga
          for manga in mangas
          if manga['mangaId'] not in seen]

In [None]:
import pandas as pd

test = pd.DataFrame([{
    'mangaId': manga['mangaId'],
    'url': f'https://uploads.mangadex.org/covers/{manga["mangaId"]}/{filename}',
    'filename': f'{manga["mangaId"]}_{filename}',
    'tags': '|'.join(manga['tags']),
} for manga in mangas for filename in manga['cover_art_filenames']])
test.head()

In [None]:
from collections import defaultdict

MAX_COVERS_PER_MANGA = 1
max_filter = defaultdict(list)
for filename, manga_id in manga_cover_art_filenames:
    if len(max_filter[manga_id]) < MAX_COVERS_PER_MANGA:
       max_filter[manga_id].append(filename)
filtered_manga_cover_art_filenames = [
    (filename, manga_id)
    for manga_id, _ in max_filter.items()
    for filename in max_filter[manga_id]]
filtered_manga_cover_art_filenames[:5]

In [None]:
requests.get('https://uploads.mangadex.org/covers/2661ccb2-9b4e-42bb-9697-bed499b9b363/be17ccee-d9dc-40e1-bca4-0cf464dd632d.jpg')

In [None]:
from PIL import Image, UnidentifiedImageError
from pathlib import Path
from tqdm.auto import tqdm

def create_manga_dataset(path, manga_df, kind='train'):
    folder = Path(f'{path}/{kind}/')
    folder.mkdir(parents=True, exist_ok=True)
    manga_df.to_csv(folder/f'{kind}.csv')
    for manga in manga_df.itertuples():
        my_file = folder/manga.filename
        if my_file.exists():
            continue
        try:
            im = Image.open(requests.get(manga.url, stream=True).raw)
            im.save(folder/manga.filename)
        except UnidentifiedImageError:
            print("Warning: Couldn't identify image file " + manga.filename + ". Skipping.")

create_manga_dataset("drive/MyDrive/isekai_model/data/multicat/",
                     test,
                     kind='test')

In [None]:
!rm gdrive/MyDrive/isekai_model/data/valid/fantasy/*

# Modelagem

In [None]:
#hide
from fastbook import *
from fastai.vision.widgets import *

In [None]:
import fastai
fastai.__version__

In [None]:
df[df['filename'] == '155bd488-f71f-4523-bb5a-a9b25fa955c4_7bda3885-6ee4-44cd-b0c5-bb1da30a7999.jpg']

In [None]:
import pandas as pd

path = Path("gdrive/MyDrive/isekai_model/data/multicat/")
train = pd.read_csv(path/'train/train.csv')
train['folder'] = 'train'
test = pd.read_csv(path/'test/test.csv')
test['folder'] = 'test'
df = pd.concat([train, test])

In [None]:
def get_x(r): return path/r['folder']/r['filename']
def get_y(r): return r['tags'].split('|')

dblock = DataBlock(blocks=(ImageBlock, MultiCategoryBlock), 
                   get_y=get_y, get_x=get_x,
                   item_tfms=Resize(128, ResizeMethod.Pad, pad_mode='zeros'))

In [None]:
dsets = dblock.datasets(df)
dsets.train[0]

In [None]:
??dblock.datasets

In [None]:
dls = dblock.dataloaders(df)
dls.show_batch(nrows=3, ncols=1)

In [None]:
def accuracy_multi(inp, targ, thresh=0.5, sigmoid=True):
    "Compute accuracy when `inp` and `targ` are the same size."
    if sigmoid: inp = inp.sigmoid()
    return ((inp>thresh)==targ.bool()).float().mean()

In [None]:
learn = cnn_learner(dls, resnet50, metrics=partial(accuracy_multi, thresh=0.2)).to_fp16()
lr = learn.lr_find()

In [None]:
lr

In [None]:
print(f"Minimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")

In [None]:
learn.fit_one_cycle(3, 3e-2)
learn.unfreeze()
learn.fit_one_cycle(12, lr_max=slice(1e-5,1e-3))

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(12,12), dpi=60)

In [None]:
interp.plot_top_losses(9, nrows=3, figsize=(11,8))

In [None]:
preds,targs = learn.get_preds()
xs = torch.linspace(0.05,0.95,29)
accs = [accuracy_multi(preds, targs, thresh=i, sigmoid=False) for i in xs]
plt.plot(xs,accs)

In [None]:
cleaner = ImageClassifierCleaner(learn)
cleaner

In [None]:
learn.export('gdrive/MyDrive/isekai_model/data/multicat/export.pkl')

In [None]:
learn.dl.

## Usando o modelo

In [None]:
learn_inf = load_learner('gdrive/MyDrive/isekai_model/data/multicat/export.pkl')

In [None]:
def get_x(r): pass
def get_y(r): pass

In [None]:
learn_inf.__class__

In [None]:
btn_upload = widgets.FileUpload()
btn_upload

In [None]:
img = PILImage.create(btn_upload.data[-1])

In [None]:
out_pl = widgets.Output()
out_pl.clear_output()
with out_pl: display(img.to_thumb(128,128))
out_pl

In [None]:
list(zip(learn.dls.vocab, [f'{prob:.04f}' for prob in prob])).sort(key=lambda x: x[1], reverse=True)

In [None]:
lab, filt, probs = learn_inf.predict(img)
preds = list(zip(learn_inf.dls.vocab, [prob.item() for prob in probs]))
preds = [(lab, prob) for lab, prob in preds if prob >= 0.5]
preds.sort(key=lambda x: x[1], reverse=True)
preds

In [None]:
pred,pred_idx,probs = learn_inf.predict(img)
lbl_pred = widgets.Label()
lbl_pred.value = json.dumps(f'Prediction: {pred}; Probability: {probs[pred_idx]}')
lbl_pred

In [None]:
json.dumps({"aaa": f'{probs[pred_idx]:.04f}'})

In [None]:
btn_run = widgets.Button(description='Classify')
btn_run

In [None]:
def on_click_classify(change):
    img = PILImage.create(btn_upload.data[-1])
    out_pl.clear_output()
    with out_pl: display(img.to_thumb(128,128))
    pred,pred_idx,probs = learn_inf.predict(img)
    lbl_pred.value = f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}'

btn_run.on_click(on_click_classify)

In [None]:
VBox([widgets.Label('Upload your manga cover for prediction!'), 
      btn_upload, btn_run, out_pl, lbl_pred])