In [1]:
import os
import requests
import shutil
from bs4 import BeautifulSoup
from PIL import Image

In [10]:
def get_config_or_default(key: str, default_value: object) -> object:
    if key in os.environ:
        return os.getenv(key)
    else:
        return default_value

def get_booth_items_url() -> str:
    return get_config_or_default("VBNA_TARGET_URL", "https://booth.pm/ja/items?sort=new&tags%5B%5D=VRChat")
    
target_url = get_booth_items_url()

In [3]:
def get_raw_html(url: str) -> str | None:
    res = requests.get(target_url)
    if res.status_code != 200:
        return None
    return res.text

html_txt = get_raw_html(target_url)

In [4]:
def parse_items(text: str) -> list[dict[str, str]]:
    soup = BeautifulSoup(text, "html.parser")
    items = soup.select("body > div.page-wrap > main > div.container > div.l-row.l-market-grid.u-mt-0.u-ml-0 > div > div.u-mt-400 > ul > li")

    urls = [i.select("div.item-card__thumbnail.js-thumbnail > div.item-card__thumbnail-images > a:nth-child(1)")[0].attrs["href"] for i in items]
    image_urls = [i.select("div.item-card__thumbnail.js-thumbnail > div.item-card__thumbnail-images > a:nth-child(1)")[0].attrs["data-original"] for i in items]
    names = [i.select("div.item-card__summary > div.item-card__title > a")[0].text for i in items]
    prices = [i.select("div.item-card__summary > div.u-d-flex.u-align-items-center.u-justify-content-between > div.price.u-text-primary.u-text-left.u-tpg-caption2")[0].text for i in items]

    return [{"name": name, "url": url, "image_url": image_url, "price": price} for (name, url, image_url, price) in zip(names, urls, image_urls, prices)]

items = parse_items(html_txt)

In [23]:
def get_work_dir(category_name: str, need_recreate: bool) -> str:
    base_dir = get_config_or_default("VBNA_WORK_DIR", os.path.join(os.path.curdir, "tmp"))
    if not (os.path.exists(base_dir)):
        os.mkdir(base_dir)

    dst_dir = os.path.join(base_dir, category_name)
    if not (os.path.exists(dst_dir)):
        # create new
        os.mkdir(dst_dir)
    elif need_recreate:
        # remove and recreate
        shutil.rmtree(dst_dir)
        os.mkdir(dst_dir)
    return dst_dir

def get_filename_from_url(url: str) -> str:
    filename = url[url.rfind("/") + 1:]
    if not(filename):
        raise NameError(f"get_filename_from_url(url={url}) -> {filename}")
    return filename

def download_images(category: str, target_urls: list[str], clear_cache: bool = False) -> list[str]:
    base_dir = get_work_dir(f"download_cache_{category}", need_recreate=clear_cache)
    for target_url in target_urls:
        dst_name = get_filename_from_url(target_url)
        dst_path = os.path.join(base_dir, dst_name)

        if os.path.exists(dst_path):
            # use from cache
            pass
        else:
            # download
            res = requests.get(target_url)
            if res.status_code != 200:
                raise ConnectionError(res)
            # write to file
            with open(dst_path, mode="wb") as f:
                f.write(res.content)
        yield dst_path

local_image_path_arr = list(download_images("items", [item["image_url"] for item in items], clear_cache=False))

In [31]:
src_width = get_config_or_default("VBNA_SRC_IMAGE_WIDTH", 300)
src_height = get_config_or_default("VBNA_SRC_IMAGE_HEIGHT", 300)

dst_width = get_config_or_default("VBNA_DST_IMAGE_WIDTH", 2048)
dst_height = get_config_or_default("VBNA_DST_IMAGE_HEIGHT", 2048)
dst_margin = get_config_or_default("VBNA_DST_IMAGE_MARGIN", 16)

num_column = dst_width // src_width
num_row = dst_height // src_height
num_visible_items = num_column * num_row

def get_dst_dir(clear_cache: bool = False) -> str:
    dst_dirname = get_config_or_default("VBNA_DST_DIR", "dist")
    dst_dir = get_work_dir(dst_dirname, need_recreate=clear_cache)
    return dst_dir

def create_tile_image(dst_dir: str, local_images: list[str]) -> str:
    dst_filename = get_config_or_default("VBNA_DST_IMAGE_NAME", "index.jpg")
    dst_path = os.path.join(dst_dir, dst_filename);

    with Image.new("RGB", (dst_width, dst_height)) as dst_image:
        for i, src_image_path in enumerate(local_images):
            # tile offset
            x = i % num_column
            y = i // num_column
            # overrun
            if x >= num_column or y>= num_row:
                break;
            # create tile image
            with Image.open(src_image_path) as src_image:
                src_image.resize((src_width, src_height))
                dst_image.paste(src_image, (x * (src_width + dst_margin), y * (src_height + dst_margin)))
        dst_image.save(dst_path)
    return dst_path

dst_dir = get_dst_dir()
create_tile_image(dst_dir, local_image_path_arr)

.\tmp\download_cache_items\61232b6d-7267-401e-ab66-56af39d4ee18_base_resized.jpg
.\tmp\download_cache_items\9c8665ca-5817-456b-9ece-bdd72afe7ed5_base_resized.jpg
.\tmp\download_cache_items\74fdf2f9-c2a9-4f1e-93ec-1a5374a0f0bd_base_resized.jpg
.\tmp\download_cache_items\879ae9cc-48b4-43bb-8045-b85d647c4296_base_resized.jpg
.\tmp\download_cache_items\15480c9e-aa12-4e29-a8fd-7515a44b0b1f_base_resized.jpg
.\tmp\download_cache_items\7b01a366-f91f-418d-875e-328a24be3561_base_resized.jpg
.\tmp\download_cache_items\1a36e422-222d-478c-a021-3f5cdb594eaf_base_resized.jpg
.\tmp\download_cache_items\0cec1dfb-6dc7-4c57-933f-1c47609c962e_base_resized.jpg
.\tmp\download_cache_items\5a0ae51e-1758-4c5b-9432-2e68ed967fb8_base_resized.jpg
.\tmp\download_cache_items\77d65801-40bc-46d2-969c-33e1aac561b4_base_resized.jpg
.\tmp\download_cache_items\31438088-4d01-4168-9097-5cc4de0c4076_base_resized.jpg
.\tmp\download_cache_items\5be8f598-3b45-4411-b894-e14a52fcb3c2_base_resized.jpg
.\tmp\download_cache_items\6

'.\\tmp\\dist\\index.jpg'