In [82]:
import os
import datetime
import requests
import shutil
import subprocess
import json
from bs4 import BeautifulSoup
from PIL import Image
from discord_webhook import DiscordWebhook, DiscordEmbed

In [10]:
def get_config_or_default(key: str, default_value: object) -> object:
    if key in os.environ:
        return os.getenv(key)
    else:
        return default_value

def get_booth_items_url() -> str:
    return get_config_or_default("VBNA_TARGET_URL", "https://booth.pm/ja/items?sort=new&tags%5B%5D=VRChat")
    
target_url = get_booth_items_url()

In [3]:
def get_raw_html(url: str) -> str | None:
    res = requests.get(target_url)
    if res.status_code != 200:
        return None
    return res.text

html_txt = get_raw_html(target_url)

In [85]:
def parse_items(text: str) -> list[dict[str, str]]:
    soup = BeautifulSoup(text, "html.parser")
    items = soup.select("body > div.page-wrap > main > div.container > div.l-row.l-market-grid.u-mt-0.u-ml-0 > div > div.u-mt-400 > ul > li")

    urls = [i.select("div.item-card__thumbnail.js-thumbnail > div.item-card__thumbnail-images > a:nth-child(1)")[0].attrs["href"] for i in items]
    image_urls = [i.select("div.item-card__thumbnail.js-thumbnail > div.item-card__thumbnail-images > a:nth-child(1)")[0].attrs["data-original"] for i in items]
    names = [i.select("div.item-card__summary > div.item-card__title > a")[0].text for i in items]
    prices = [i.select("div.item-card__summary > div.u-d-flex.u-align-items-center.u-justify-content-between > div.price.u-text-primary.u-text-left.u-tpg-caption2")[0].text for i in items]

    return [{"name": name, "url": url, "image_url": image_url, "price": price} for (name, url, image_url, price) in zip(names, urls, image_urls, prices)]

items = parse_items(html_txt)

In [23]:
def get_work_dir(category_name: str, need_recreate: bool) -> str:
    base_dir = get_config_or_default("VBNA_WORK_DIR", os.path.join(os.path.curdir, "tmp"))
    if not (os.path.exists(base_dir)):
        os.mkdir(base_dir)

    dst_dir = os.path.join(base_dir, category_name)
    if not (os.path.exists(dst_dir)):
        # create new
        os.mkdir(dst_dir)
    elif need_recreate:
        # remove and recreate
        shutil.rmtree(dst_dir)
        os.mkdir(dst_dir)
    return dst_dir

def get_filename_from_url(url: str) -> str:
    filename = url[url.rfind("/") + 1:]
    if not(filename):
        raise NameError(f"get_filename_from_url(url={url}) -> {filename}")
    return filename

def download_images(category: str, target_urls: list[str], clear_cache: bool = False) -> list[str]:
    base_dir = get_work_dir(f"download_cache_{category}", need_recreate=clear_cache)
    for target_url in target_urls:
        dst_name = get_filename_from_url(target_url)
        dst_path = os.path.join(base_dir, dst_name)

        if os.path.exists(dst_path):
            # use from cache
            pass
        else:
            # download
            res = requests.get(target_url)
            if res.status_code != 200:
                raise ConnectionError(res)
            # write to file
            with open(dst_path, mode="wb") as f:
                f.write(res.content)
        yield dst_path

local_image_path_arr = list(download_images("items", [item["image_url"] for item in items], clear_cache=False))

In [33]:
def get_dst_dir(clear_cache: bool = False) -> str:
    dst_dirname = get_config_or_default("VBNA_DST_DIR", "dist")
    dst_dir = get_work_dir(dst_dirname, need_recreate=clear_cache)
    return dst_dir

dst_dir = get_dst_dir()

In [79]:
def create_tile_image(dst_dir: str, local_images: list[str]) -> (str, dict[str, str]):
    # dst path
    dst_filename = get_config_or_default("VBNA_DST_IMAGE_NAME", "index.jpg")
    dst_path = os.path.join(dst_dir, dst_filename);
    # image info
    src_width = get_config_or_default("VBNA_SRC_IMAGE_WIDTH", 300)
    src_height = get_config_or_default("VBNA_SRC_IMAGE_HEIGHT", 300)

    dst_width = get_config_or_default("VBNA_DST_IMAGE_WIDTH", 2048)
    dst_height = get_config_or_default("VBNA_DST_IMAGE_HEIGHT", 2048)
    dst_margin = get_config_or_default("VBNA_DST_IMAGE_MARGIN", 32)

    num_columns = dst_width // src_width
    num_rows = dst_height // src_height
    num_items = 0
    # create image
    with Image.new("RGB", (dst_width, dst_height)) as dst_image:
        for i, src_image_path in enumerate(local_images):
            # tile offset
            x = i % num_columns
            y = i // num_columns
            # overrun
            if x >= num_columns or y>= num_rows:
                break;
            # paste to tile
            with Image.open(src_image_path) as src_image:
                src_image.resize((src_width, src_height))
                dst_image.paste(src_image, (x * (src_width + dst_margin), y * (src_height + dst_margin)))
                num_items += 1
        dst_image.save(dst_path)
    return (dst_path, {"name": dst_filename, "src_width": src_width, "src_height": src_height, "dst_width": dst_width, "dst_height": dst_height, "dst_margin": dst_margin, "num_columns": num_columns, "num_rows": num_rows})

dst_image_path, img_info = create_tile_image(dst_dir, local_image_path_arr)

In [90]:
def create_info_file(target_url: str, items: dict[str, str], dst_image_path: str, img_info: dict[str, str]) -> dict[str, str]:
    dst_json_filename = get_config_or_default("VBNA_DST_INFO_NAME", "index.json")
    dst_json_path = os.path.join(dst_dir, dst_json_filename);

    dst = {
        "created_at": str(datetime.datetime.now()),
        "hash": subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).strip().decode("utf-8"),
        "target_url": target_url,
        "items": items,
        "img_info": img_info,
    }
    # save file
    with open(dst_json_path, mode="w", encoding="utf-8") as f:
        json.dump(dst, f, ensure_ascii=False)
    # webhook
    webhook_url = get_config_or_default("VBNA_WEBHOOK_URL", "")
    if webhook_url:
        webhook = DiscordWebhook(url=webhook_url, content="Data Updated!")
        # image
        with open(dst_image_path, "rb") as f:
            webhook.add_file(file=f.read(), filename=os.path.basename(dst_image_path))
        embed = DiscordEmbed(title="Content Details")
        embed.set_author(name="Booth", url=target_url)
        for i in items:
            embed.add_embed_field(name=i["name"], value=i["url"])
        webhook.add_embed(embed)
        # json
        with open(dst_json_path, "rb") as f:
            webhook.add_file(file=f.read(), filename=os.path.basename(dst_json_path))

        resp = webhook.execute()
        if not(resp.ok):
            raise ConnectionError(resp)
    return dst

info = create_info_file(target_url, items, dst_image_path, img_info)
info

{'created_at': '2023-09-03 16:31:42.014456',
 'hash': 'ea195bf',
 'target_url': 'https://booth.pm/ja/items?sort=new&tags%5B%5D=VRChat',
 'items': [{'name': 'シンプルアクセサリー12点セット',
   'url': 'https://booth.pm/ja/items/5064979',
   'image_url': 'https://booth.pximg.net/c/300x300_a2_g5/b77a5545-e3a9-4a9d-9b8a-aa6877424661/i/5064979/61232b6d-7267-401e-ab66-56af39d4ee18_base_resized.jpg',
   'price': '¥ 500'},
  {'name': '★汎用☆ロングリボンシニョンキャップ★',
   'url': 'https://booth.pm/ja/items/5064954',
   'image_url': 'https://booth.pximg.net/c/300x300_a2_g5/61992e36-7e4b-45bb-bc18-020ac510aff6/i/5064954/9c8665ca-5817-456b-9ece-bdd72afe7ed5_base_resized.jpg',
   'price': '¥ 400~'},
  {'name': '落ち着き「ピュア・ロングワンピ」アッシュ -Ash-対応衣装',
   'url': 'https://booth.pm/ja/items/5064929',
   'image_url': 'https://booth.pximg.net/c/300x300_a2_g5/0d177966-270e-4e56-86ec-b06aa39b0d28/i/5064929/74fdf2f9-c2a9-4f1e-93ec-1a5374a0f0bd_base_resized.jpg',
   'price': '¥ 1,500'},
  {'name': ' [VRC Hair] Nya! 猫耳 ショートカット',
   'url': 'ht