In [8]:
import requests
import json
import time
import os
from pymongo import MongoClient, UpdateOne
USERNAME = os.getenv("DANBOORU_USERNAME")
API_KEY = os.getenv("DANBOORU_API_KEY")
BASE_URL = "https://danbooru.donmai.us" 
MONGO_URI = os.getenv("MONGO_URI")
DB_NAME = 'Danbooru' #Replace it with your own database name
COLLECTION_NAME = 'AveMyGO' #Replace it with your own collection name
client = MongoClient(MONGO_URI)
db = client[DB_NAME]
collection = db[COLLECTION_NAME]
TAG_QUERY = 'bang_dream!_it\'s_mygo!!!!!' #Replace it with your own tag to query


In [9]:
def get_profile():
    url = f"{BASE_URL}/profile.json"
    response = requests.get(
        url,
        auth=(USERNAME, API_KEY)
    )
    response.raise_for_status()
    return response.json()
if __name__ == "__main__":
    profile = get_profile()

In [10]:
def fetch_posts(tags, page=None, limit=200):
    params = {"tags": tags,"limit": limit}
    if page:
        params["page"] = page
    response = requests.get(
        f"{BASE_URL}/posts.json",
        params=params,
        auth=(USERNAME, API_KEY)
    )
    response.raise_for_status()
    return response.json()
def fetch_all_posts(tags, max_pages=160):
    all_posts = []
    page = None
    for _ in range(max_pages):
        posts = fetch_posts(tags, page=page)
        if not posts:
            break
        all_posts.extend(posts)
        min_id = min(post["id"] for post in posts)
        page = f"b{min_id}"
        time.sleep(1)  
    return all_posts

In [None]:
posts = fetch_all_posts(TAG_QUERY, max_pages=165) #increase max_pages to fetch more posts
print(f"Fetched {len(posts)} posts")

Fetched 32740 posts


In [5]:
tags = ['tag_string_general','tag_string_character','tag_string_copyright','tag_string_meta']
for post in posts:
    del post['tag_string']
    for tag in tags:
        post[tag] = post[tag].split(" ")

In [9]:
def normalize_post(post: dict) -> dict:
    doc = post.copy()
    doc["_id"] = doc.pop("id")  # ensure deduplication
    return doc
def upsert_posts(posts):
    operations = []
    for post in posts:
        doc = normalize_post(post)
        operations.append(
            UpdateOne(
                {"_id": doc["_id"]},
                {"$set": doc},
                upsert=True
            )
        )
    if operations:
        collection.bulk_write(operations, ordered=False)

In [10]:
upsert_posts(posts)