In [1]:
import os
import requests
import shutil
from bs4 import BeautifulSoup
from PIL import Image

In [2]:
def get_booth_items_url() -> str:
    target_url = os.getenv("VBNA_BOOTH_TARGET_URL")
    if target_url:
        return target_url
    else:
        # default
        return "https://booth.pm/ja/items?sort=new&tags%5B%5D=VRChat"
    
target_url = get_booth_items_url()

In [7]:
def get_raw_html(url: str) -> str | None:
    res = requests.get(target_url)
    if res.status_code != 200:
        return None
    return res.text

html_txt = get_raw_html(target_url)

In [8]:
def parse_items(text: str) -> list[dict[str, str]]:
    soup = BeautifulSoup(text, "html.parser")
    items = soup.select("body > div.page-wrap > main > div.container > div.l-row.l-market-grid.u-mt-0.u-ml-0 > div > div.u-mt-400 > ul > li")

    urls = [i.select("div.item-card__thumbnail.js-thumbnail > div.item-card__thumbnail-images > a:nth-child(1)")[0].attrs["href"] for i in items]
    image_urls = [i.select("div.item-card__thumbnail.js-thumbnail > div.item-card__thumbnail-images > a:nth-child(1)")[0].attrs["data-original"] for i in items]
    names = [i.select("div.item-card__summary > div.item-card__title > a")[0].text for i in items]
    prices = [i.select("div.item-card__summary > div.u-d-flex.u-align-items-center.u-justify-content-between > div.price.u-text-primary.u-text-left.u-tpg-caption2")[0].text for i in items]

    return [{"name": name, "url": url, "image_url": image_url, "price": price} for (name, url, image_url, price) in zip(names, urls, image_urls, prices)]

items = parse_items(html_txt)

In [12]:
def get_work_dir(category_name: str, need_recreate: bool) -> str:
    base_dir = os.getenv("VBNA_CACHE_DIR")
    if not(base_dir):
        base_dir = os.path.join(os.path.curdir, "cache")
    if not (os.path.exists(base_dir)):
        os.mkdir(base_dir)

    dst_dir = os.path.join(base_dir, category_name)
    if not (os.path.exists(dst_dir)):
        # create new
        os.mkdir(dst_dir)
    elif need_recreate:
        # remove and recreate
        shutil.rmtree(dst_dir)
        os.mkdir(dst_dir)
    return dst_dir

def get_filename_from_url(url: str) -> str:
    filename = url[url.rfind("/") + 1:]
    if not(filename):
        raise NameError(f"get_filename_from_url(url={url}) -> {filename}")
    return filename

def download_images(category: str, target_urls: list[str], clear_cache=False) -> list[str]:
    base_dir = get_work_dir(f"download_cache_{category}", need_recreate=clear_cache)
    for target_url in target_urls:
        dst_name = get_filename_from_url(target_url)
        dst_path = os.path.join(base_dir, dst_name)
        # use from cache
        if os.path.exists(dst_path):
            continue
        # download
        res = requests.get(target_url)
        if res.status_code != 200:
            raise ConnectionError(res)
        # write to file
        with open(dst_path, mode="wb") as f:
            f.write(res.content)
        yield dst_path

local_images = list(download_images("items", [item["image_url"] for item in items], clear_cache=False))

In [None]:
src_width = 300
src_height = 300
dst_width = 2048
dst_height = 2048

num_column = dst_width // src_width
num_row = dst_height // src_height
num_visible_items = num_column * num_row
tile_width = dst_width // num_column
tile_height = dst_height // num_row