In [1]:
from dotenv import load_dotenv

assert load_dotenv()

In [40]:
import os
from gppt import GetPixivToken
from pixivpy3 import AppPixivAPI


def get_refresh_token() -> str:
    try:
        with open("export/refresh_token.txt") as f:
            if refresh_token := f.read().strip():
                return refresh_token
    except FileNotFoundError:
        pass

    g = GetPixivToken(headless=True)
    username = os.getenv("PIXIV_USERNAME")
    password = os.getenv("PIXIV_PASSWORD")
    refresh_token = g.login(username=username, password=password)["refresh_token"]
    with open("export/refresh_token.txt", "w") as f:
        f.write(refresh_token)
    return refresh_token


aapi = AppPixivAPI()
aapi.auth(refresh_token=get_refresh_token());

In [41]:
# Config
USER_ID = 18218607

In [42]:
def handle_err(res):
    if "error" in res:
        print("error:", res.error)
        return True
    else:
        return False

In [None]:
illusts = []
qs = {"user_id": USER_ID}
while qs is not None:
    res = aapi.user_bookmarks_illust(**qs)
    if handle_err(res):
        print("qs:", qs)
        break
    if res.illusts is not None:
        illusts.extend(res.illusts)
    qs = aapi.parse_qs(res.next_url)
print("total illusts:", len(illusts))

In [None]:
last_id = None
try:
    with open("export/last_id.txt") as f:
        if last_id_form_file := f.read().strip():
            last_id = int(last_id_form_file)
except FileNotFoundError:
    pass
print("last_id:", last_id)

last_illust_idx = None
if last_id is not None:
    for i, illust in enumerate(illusts):
        if illust["id"] == last_id:
            last_illust_idx = i
            break
print("last_illust_idx:", last_illust_idx)
if last_illust_idx is not None:
    illusts = illusts[:last_illust_idx]
print("left illusts:", len(illusts))


In [45]:
illust_urls = {}
for illust in illusts:
    if "meta_pages" in illust and len(illust["meta_pages"]) > 0:
        urls = []
        for page in illust["meta_pages"]:
            if "image_urls" not in page:
                print(page)
            urls.append(page["image_urls"]["original"])
        illust_urls[illust["id"]] = urls
    else:
        illust_urls[illust["id"]] = [illust["meta_single_page"]["original_image_url"]]

In [46]:
from pathlib import Path

for i, (id, urls) in enumerate(illust_urls.items()):
    if len(urls) <= 1:
        try:
            aapi.download(urls[0], path="export")
        except:
            print("failed to download at idx %d id %s" % (i, id))
            raise
    else:
        path = Path("export") / str(id)
        path.mkdir(exist_ok=True)
        for j, url in enumerate(urls):
            try:
                aapi.download(url, path=str(path))
            except:
                print("failed to download at idx %d-%d id %s" % (i, j, id))
                raise

- Remove images with the `limit_` prefix manually, which are not illustrations
- For images with the `_ugoira0` suffix manually, they are ugoiras and need special handling to get the zip pack


In [None]:
ugoira_ids = [illust["id"] for illust in illusts if illust["type"] == "ugoira"]
print("ugoiras:", len(ugoira_ids))

ugoiras = [aapi.ugoira_metadata(id)["ugoira_metadata"] for id in ugoira_ids]
ugoira_meta = {ugoira_ids[i]: ugoira for i, ugoira in enumerate(ugoiras)}
ugoira_urls = {ugoira_ids[i]: ugoira["zip_urls"]["medium"] for i, ugoira in enumerate(ugoiras)}
# To get the original (or at least larger) ugoira
ugoira_urls = {id: url.replace("600x600", "1920x1080") for id, url in ugoira_urls.items()}

In [48]:
for i, (id, url) in enumerate(ugoira_urls.items()):
    try:
        aapi.download(url, path="export")
    except:
        print("failed to download ugoira at idx %d id %s" % (i, id))
        raise

In [53]:
import json

if len(ugoira_ids) > 0:
    saved_ugoira_meta = {}
    try:
        with open("export/fav_ugoira_meta.json") as f:
            saved_ugoira_meta = json.load(f)
    except FileNotFoundError:
        pass
    saved_ugoira_meta.update(ugoira_meta)
    with open("export/fav_ugoira_meta.json", "w") as f:
        json.dump(saved_ugoira_meta, f)

In [50]:
if len(illusts) > 0:
    saved_illusts = []
    try:
        with open("export/fav_illusts.json") as f:
            saved_illusts = json.load(f)
    except FileNotFoundError:
        pass
    saved_illusts = illusts + saved_illusts
    with open("export/fav_illusts.json", "w") as f:
        json.dump(saved_illusts, f)

In [51]:
if len(illusts) > 0:
    with open("export/last_id.txt", "w") as f:
        f.write(str(illusts[0]["id"]))