From c6d16ed0ca980faa0bfa540cf81fc454a7d5482a Mon Sep 17 00:00:00 2001 From: yokarion Date: Sat, 17 May 2025 22:49:52 +0200 Subject: [PATCH 01/10] added features: organized usage, e621_tag_failed tag --- plugins/e621_tagger/e621_tagger.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/plugins/e621_tagger/e621_tagger.py b/plugins/e621_tagger/e621_tagger.py index 4b66bdb4..d0aa528d 100644 --- a/plugins/e621_tagger/e621_tagger.py +++ b/plugins/e621_tagger/e621_tagger.py @@ -54,6 +54,10 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N if any(tag["name"] == "e621_tagged" for tag in image.get("tags", [])): return + + if any(tag["name"] == "e621_tag_failed" for tag in image.get("tags", [])): + return + try: time.sleep(2) # Rate limiting response = requests.get( @@ -64,7 +68,18 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N response.raise_for_status() post_data = response.json().get("post", {}) except Exception as e: - log.error(f"e621 API error: {str(e)}") + log.error(f"Marking as failed. e621 API error: {str(e)}") + e621_tag_failed_tag = get_or_create_tag(stash, "e621_tag_failed") + image_tags_on_e621_fail_ids = [e621_tag_failed_tag["id"]] + + for tag in image.get("tags"): + image_tags_on_e621_fail_ids.append(tag["id"]) + + stash.update_image({ + "id": image_id, + "tag_ids": list(set(image_tags_on_e621_fail_ids)) + }) + return if not post_data: @@ -104,6 +119,7 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N try: stash.update_image({ "id": image_id, + "organized": True, "urls": [post_url], "tag_ids": list(set(tag_ids)), "studio_id": studio_id, @@ -115,6 +131,7 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N log.error(f"Update failed: {str(e)}") + def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict: """Find or create tag with hierarchy handling""" # Validate tag name @@ -205,12 +222,13 @@ def scrape_image(client: StashInterface, image_id: str) -> None: # Plugin setup and execution # In the main execution block: if __name__ == "__main__": + log.info("Starting tagger...") json_input = json.loads(sys.stdin.read()) stash = StashInterface(json_input["server_connection"]) config = stash.get_configuration().get("plugins", {}) settings = { - "SkipTags": "e621_tagged", # Add automatic filtering + "SkipTags": "e621_tagged, e621_tag_failed", # Add automatic filtering "ExcludeOrganized": False } settings.update(config.get("e621_tagger", {})) @@ -218,13 +236,17 @@ def scrape_image(client: StashInterface, image_id: str) -> None: log.info(settings) # Get e621_tagged ID for filtering - e621_tag = get_or_create_tag(stash, "e621_tagged") + e621_tagged_tag = get_or_create_tag(stash, "e621_tagged") + e621_tag_failed_tag = get_or_create_tag(stash, "e621_tag_failed") # Existing tags + automatic e621_tagged exclusion skip_tags = [t.strip() for t in settings["SkipTags"].split(",") if t.strip()] - skip_tags.append(e621_tag["id"]) # Filter by ID instead of name + skip_tags.append(e621_tagged_tag["id"]) # Filter by ID instead of name + skip_tags.append(e621_tag_failed_tag["id"]) # Filter by ID instead of name + log.info("Getting images...") images = get_all_images(stash, skip_tags, settings["ExcludeOrganized"]) + log.info(f"Got ${str(len(images))} images"); # Rest of the loop remains the same for i, image in enumerate(images, 1): From fe0031b77a0c3143c5e047adf614e3506ab7b3ad Mon Sep 17 00:00:00 2001 From: yokarion Date: Sat, 17 May 2025 23:18:02 +0200 Subject: [PATCH 02/10] prevent OOM on big databases --- plugins/e621_tagger/e621_tagger.py | 159 ++++++++++++----------------- 1 file changed, 63 insertions(+), 96 deletions(-) diff --git a/plugins/e621_tagger/e621_tagger.py b/plugins/e621_tagger/e621_tagger.py index d0aa528d..f5bd37b7 100644 --- a/plugins/e621_tagger/e621_tagger.py +++ b/plugins/e621_tagger/e621_tagger.py @@ -8,58 +8,61 @@ from stashapi.stashapp import StashInterface - def get_all_images( - client: StashInterface, - skip_tags: list[str], - exclude_organized: bool + client: StashInterface, + skip_tags: list[int], + exclude_organized: bool, + per_page: int = 100, ) -> list[dict]: """ - Get all images with proper tag exclusion and organization filter + Generator to fetch images in pages from the stash API. """ - image_filter = {} - pagination = { - "page": 1, - "per_page": -1, # -1 gets all results at once - "sort": "created_at", - "direction": "ASC", - } - - # Convert tag names to IDs - tag_ids = [] - for tag_name in skip_tags: - tag = get_or_create_tag(client, tag_name) - if tag: - tag_ids.append(tag["id"]) - - if tag_ids: - image_filter["tags"] = { - "value": [], - "excludes": tag_ids, - "modifier": "INCLUDES_ALL", - "depth": -1, + page = 1 + while True: + image_filter = {} + pagination = { + "page": page, + "per_page": per_page, + "sort": "created_at", + "direction": "ASC", } - if exclude_organized: - image_filter["organized"] = False # Correct field name + if skip_tags: + image_filter["tags"] = { + "value": [], + "excludes": skip_tags, + "modifier": "INCLUDES_ALL", + "depth": -1, + } + + if exclude_organized: + image_filter["organized"] = False + + images = client.find_images(f=image_filter, filter=pagination) + if not images: + # no more pages + break + + log.info(f"Fetched page {page} with {len(images)} images") + for img in images: + yield img - # Maintain original parameter structure - return client.find_images(f=image_filter, filter=pagination) + # move to next page + page += 1 def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> None: """Process e621 metadata and update Stash records""" - # Skip already processed images + # same as before... image = stash.find_image(image_id) - if any(tag["name"] == "e621_tagged" for tag in image.get("tags", [])): + if any(t["name"] == "e621_tagged" for t in image.get("tags", [])): return - - if any(tag["name"] == "e621_tag_failed" for tag in image.get("tags", [])): + if any(t["name"] == "e621_tag_failed" for t in image.get("tags", [])): return try: - time.sleep(2) # Rate limiting + time.sleep(0.5) response = requests.get( f"https://e621.net/posts.json?md5={image_md5}", headers={"User-Agent": "Stash-e621-Tagger/1.0"}, @@ -69,53 +72,38 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N post_data = response.json().get("post", {}) except Exception as e: log.error(f"Marking as failed. e621 API error: {str(e)}") - e621_tag_failed_tag = get_or_create_tag(stash, "e621_tag_failed") - image_tags_on_e621_fail_ids = [e621_tag_failed_tag["id"]] - - for tag in image.get("tags"): - image_tags_on_e621_fail_ids.append(tag["id"]) - - stash.update_image({ - "id": image_id, - "tag_ids": list(set(image_tags_on_e621_fail_ids)) - }) - + e621_tag_failed = get_or_create_tag(stash, "e621_tag_failed") + fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in image.get("tags", [])] + stash.update_image({"id": image_id, "tag_ids": list(set(fail_ids))}) return if not post_data: return - # Create essential entities e621_tag = get_or_create_tag(stash, "e621_tagged") post_url = f"https://e621.net/posts/{post_data['id']}" - # Process tags tag_ids = [e621_tag["id"]] - for category in ["general", "species", "character", "artist", "copyright"]: - for tag in post_data.get("tags", {}).get(category, []): - # Clean and validate tag + for cat in ["general", "species", "character", "artist", "copyright"]: + for tag in post_data.get("tags", {}).get(cat, []): clean_tag = tag.strip() if not clean_tag: continue - stash_tag = get_or_create_tag(stash, clean_tag) if stash_tag: tag_ids.append(stash_tag["id"]) - # Process studio studio_id = None if artists := post_data.get("tags", {}).get("artist"): studio = get_or_create_studio(stash, artists[0]) studio_id = studio["id"] - # Process performers performer_ids = [] - for char_tag in post_data.get("tags", {}).get("character", []): - performer_name = char_tag.split('_(')[0] - performer = get_or_create_performer(stash, performer_name) - performer_ids.append(performer["id"]) + for char in post_data.get("tags", {}).get("character", []): + name = char.split('_(')[0] + perf = get_or_create_performer(stash, name) + performer_ids.append(perf["id"]) - # Update image try: stash.update_image({ "id": image_id, @@ -125,13 +113,11 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N "studio_id": studio_id, "performer_ids": performer_ids }) - - log.info("Image updated: ${image_id}") + log.info(f"Image updated: {image_id}") except Exception as e: log.error(f"Update failed: {str(e)}") - def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict: """Find or create tag with hierarchy handling""" # Validate tag name @@ -183,6 +169,7 @@ def get_or_create_performer(stash: StashInterface, name: str) -> dict: def scrape_image(client: StashInterface, image_id: str) -> None: """Main scraping handler""" + # same logic as before for MD5 extraction and process_e621_post call image = client.find_image(image_id) if not image or not image.get("visual_files"): return @@ -190,70 +177,50 @@ def scrape_image(client: StashInterface, image_id: str) -> None: file_data = image["visual_files"][0] filename = file_data["basename"] filename_md5 = filename.split('.')[0] - final_md5 = None - # First try filename-based MD5 if re.match(r"^[a-f0-9]{32}$", filename_md5): final_md5 = filename_md5 log.info(f"Using filename MD5: {final_md5}") else: - # Fallback to content-based MD5 try: - file_path = file_data["path"] - log.info(f"Generating MD5 from file content: {file_path}") - md5_hash = hashlib.md5() - with open(file_path, "rb") as f: - # Read file in 64kb chunks for memory efficiency + with open(file_data["path"], "rb") as f: for chunk in iter(lambda: f.read(65536), b""): md5_hash.update(chunk) - final_md5 = md5_hash.hexdigest() log.info(f"Generated content MD5: {final_md5}") except Exception as e: log.error(f"Failed to generate MD5: {str(e)}") return - if final_md5: - process_e621_post(client, image_id, final_md5) - else: - log.warning("No valid MD5 available for processing") + process_e621_post(client, image_id, final_md5) + -# Plugin setup and execution -# In the main execution block: if __name__ == "__main__": - log.info("Starting tagger...") + log.info("Starting tagger with pagination...") json_input = json.loads(sys.stdin.read()) stash = StashInterface(json_input["server_connection"]) config = stash.get_configuration().get("plugins", {}) settings = { - "SkipTags": "e621_tagged, e621_tag_failed", # Add automatic filtering + "SkipTags": "e621_tagged, e621_tag_failed", "ExcludeOrganized": False } settings.update(config.get("e621_tagger", {})) - log.info(settings) + e621_tagged = get_or_create_tag(stash, "e621_tagged") + e621_failed = get_or_create_tag(stash, "e621_tag_failed") - # Get e621_tagged ID for filtering - e621_tagged_tag = get_or_create_tag(stash, "e621_tagged") - e621_tag_failed_tag = get_or_create_tag(stash, "e621_tag_failed") - - # Existing tags + automatic e621_tagged exclusion skip_tags = [t.strip() for t in settings["SkipTags"].split(",") if t.strip()] - skip_tags.append(e621_tagged_tag["id"]) # Filter by ID instead of name - skip_tags.append(e621_tag_failed_tag["id"]) # Filter by ID instead of name - - log.info("Getting images...") - images = get_all_images(stash, skip_tags, settings["ExcludeOrganized"]) - log.info(f"Got ${str(len(images))} images"); + skip_tags = [st for st in skip_tags] + skip_tags.extend([e621_tagged["id"], e621_failed["id"]]) - # Rest of the loop remains the same - for i, image in enumerate(images, 1): - image_tag_names = [tag["name"] for tag in image.get("tags", [])] - if any(tag in image_tag_names for tag in skip_tags): + log.info("Fetching images in pages...") + for idx, image in enumerate(get_all_images(stash, skip_tags, settings["ExcludeOrganized"], per_page=100), start=1): + current_tags = [t["name"] for t in image.get("tags", [])] + if any(t in current_tags for t in skip_tags): log.info(f"Skipping image {image['id']} - contains skip tag") continue - log.progress(i/len(images)) + log.progress(idx) scrape_image(stash, image["id"]) From 4d6bae61140ccc1089d24bcaeb46f24a185cc3ed Mon Sep 17 00:00:00 2001 From: yokarion Date: Sat, 17 May 2025 23:22:23 +0200 Subject: [PATCH 03/10] Update e621_tagger.yml --- plugins/e621_tagger/e621_tagger.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/e621_tagger/e621_tagger.yml b/plugins/e621_tagger/e621_tagger.yml index 3ad01863..9b8ecc28 100644 --- a/plugins/e621_tagger/e621_tagger.yml +++ b/plugins/e621_tagger/e621_tagger.yml @@ -1,6 +1,6 @@ name: e621_tagger description: Finding images and videos on e621 and tagging them. -version: 0.1 +version: 0.2 url: https://github.com/stashapp/CommunityScripts/ exec: - python From facaf75bb6002ab16508fe5a743f5b406cf17281 Mon Sep 17 00:00:00 2001 From: yokarion Date: Thu, 11 Sep 2025 21:01:52 +0200 Subject: [PATCH 04/10] e621_tagger - fixed pagination and progress bar --- plugins/e621_tagger/e621_tagger.py | 103 ++++++++++++++++------------- 1 file changed, 56 insertions(+), 47 deletions(-) diff --git a/plugins/e621_tagger/e621_tagger.py b/plugins/e621_tagger/e621_tagger.py index f5bd37b7..18754433 100644 --- a/plugins/e621_tagger/e621_tagger.py +++ b/plugins/e621_tagger/e621_tagger.py @@ -10,14 +10,15 @@ def get_all_images( client: StashInterface, - skip_tags: list[int], + skip_tag_ids: list[int], exclude_organized: bool, per_page: int = 100, ) -> list[dict]: """ - Generator to fetch images in pages from the stash API. + Fetch all images (returns a stable list snapshot). Uses numeric tag IDs in skip_tag_ids. """ page = 1 + all_images = [] while True: image_filter = {} pagination = { @@ -27,10 +28,10 @@ def get_all_images( "direction": "ASC", } - if skip_tags: + if skip_tag_ids: image_filter["tags"] = { "value": [], - "excludes": skip_tags, + "excludes": skip_tag_ids, "modifier": "INCLUDES_ALL", "depth": -1, } @@ -40,20 +41,16 @@ def get_all_images( images = client.find_images(f=image_filter, filter=pagination) if not images: - # no more pages break log.info(f"Fetched page {page} with {len(images)} images") - for img in images: - yield img - - # move to next page + all_images.extend(images) page += 1 + return all_images + def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> None: - """Process e621 metadata and update Stash records""" - # same as before... image = stash.find_image(image_id) if any(t["name"] == "e621_tagged" for t in image.get("tags", [])): return @@ -66,7 +63,7 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N response = requests.get( f"https://e621.net/posts.json?md5={image_md5}", headers={"User-Agent": "Stash-e621-Tagger/1.0"}, - timeout=10 + timeout=10, ) response.raise_for_status() post_data = response.json().get("post", {}) @@ -100,27 +97,27 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N performer_ids = [] for char in post_data.get("tags", {}).get("character", []): - name = char.split('_(')[0] + name = char.split("_(")[0] perf = get_or_create_performer(stash, name) performer_ids.append(perf["id"]) try: - stash.update_image({ - "id": image_id, - "organized": True, - "urls": [post_url], - "tag_ids": list(set(tag_ids)), - "studio_id": studio_id, - "performer_ids": performer_ids - }) + stash.update_image( + { + "id": image_id, + "organized": True, + "urls": [post_url], + "tag_ids": list(set(tag_ids)), + "studio_id": studio_id, + "performer_ids": performer_ids, + } + ) log.info(f"Image updated: {image_id}") except Exception as e: log.error(f"Update failed: {str(e)}") def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict: - """Find or create tag with hierarchy handling""" - # Validate tag name tag_name = tag_name.strip() if not tag_name: log.error("Attempted to create tag with empty name") @@ -129,15 +126,17 @@ def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict: existing = stash.find_tags(f={"name": {"value": tag_name, "modifier": "EQUALS"}}) if existing: return existing[0] - + parts = tag_name.split(":") parent_id = None for i in range(len(parts)): - current_name = ":".join(parts[:i+1]).strip() + current_name = ":".join(parts[: i + 1]).strip() if not current_name: continue - - existing = stash.find_tags(f={"name": {"value": current_name, "modifier": "EQUALS"}}) + + existing = stash.find_tags( + f={"name": {"value": current_name, "modifier": "EQUALS"}} + ) if not existing: create_data = {"name": current_name} if parent_id: @@ -155,28 +154,27 @@ def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict: parent_id = existing[0]["id"] return {"id": parent_id} + def get_or_create_studio(stash: StashInterface, name: str) -> dict: - """Find or create studio""" studios = stash.find_studios(f={"name": {"value": name, "modifier": "EQUALS"}}) return studios[0] if studios else stash.create_studio({"name": name}) def get_or_create_performer(stash: StashInterface, name: str) -> dict: - """Find or create performer""" - performers = stash.find_performers(f={"name": {"value": name, "modifier": "EQUALS"}}) + performers = stash.find_performers( + f={"name": {"value": name, "modifier": "EQUALS"}} + ) return performers[0] if performers else stash.create_performer({"name": name}) def scrape_image(client: StashInterface, image_id: str) -> None: - """Main scraping handler""" - # same logic as before for MD5 extraction and process_e621_post call image = client.find_image(image_id) if not image or not image.get("visual_files"): return file_data = image["visual_files"][0] filename = file_data["basename"] - filename_md5 = filename.split('.')[0] + filename_md5 = filename.split(".")[0] if re.match(r"^[a-f0-9]{32}$", filename_md5): final_md5 = filename_md5 @@ -197,30 +195,41 @@ def scrape_image(client: StashInterface, image_id: str) -> None: if __name__ == "__main__": - log.info("Starting tagger with pagination...") + log.info("Starting tagger with stable pagination snapshot...") json_input = json.loads(sys.stdin.read()) stash = StashInterface(json_input["server_connection"]) config = stash.get_configuration().get("plugins", {}) - settings = { - "SkipTags": "e621_tagged, e621_tag_failed", - "ExcludeOrganized": False - } + settings = {"SkipTags": "e621_tagged, e621_tag_failed", "ExcludeOrganized": False} settings.update(config.get("e621_tagger", {})) + # ensure e621 tags exist and get their ids e621_tagged = get_or_create_tag(stash, "e621_tagged") e621_failed = get_or_create_tag(stash, "e621_tag_failed") - skip_tags = [t.strip() for t in settings["SkipTags"].split(",") if t.strip()] - skip_tags = [st for st in skip_tags] - skip_tags.extend([e621_tagged["id"], e621_failed["id"]]) - - log.info("Fetching images in pages...") - for idx, image in enumerate(get_all_images(stash, skip_tags, settings["ExcludeOrganized"], per_page=100), start=1): - current_tags = [t["name"] for t in image.get("tags", [])] - if any(t in current_tags for t in skip_tags): + # resolve skip tag NAMES from settings to tag IDs + skip_tag_names = [n.strip() for n in settings["SkipTags"].split(",") if n.strip()] + skip_tag_ids = [] + for name in skip_tag_names: + found = stash.find_tags(f={"name": {"value": name, "modifier": "EQUALS"}}) + if found: + skip_tag_ids.append(found[0]["id"]) + # always include the internal e621 tags (ensure ints) + skip_tag_ids.extend([e621_tagged["id"], e621_failed["id"]]) + + log.info("Fetching images in pages (stable snapshot)...") + images = get_all_images( + stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=10 + ) + total = len(images) or 1 + + for idx, image in enumerate(images, start=1): + progress = idx / total + log.progress(progress) + + current_tag_ids = [t["id"] for t in image.get("tags", [])] + if any(tid in current_tag_ids for tid in skip_tag_ids): log.info(f"Skipping image {image['id']} - contains skip tag") continue - log.progress(idx) scrape_image(stash, image["id"]) From c3639cc1cfd14a7604f5ed220246ff533737cecc Mon Sep 17 00:00:00 2001 From: yokarion Date: Thu, 11 Sep 2025 21:14:12 +0200 Subject: [PATCH 05/10] e621_tagger - scenes --- plugins/e621_tagger/e621_tagger.py | 251 +++++++++++++++++++++++------ 1 file changed, 201 insertions(+), 50 deletions(-) diff --git a/plugins/e621_tagger/e621_tagger.py b/plugins/e621_tagger/e621_tagger.py index 18754433..d80e1a83 100644 --- a/plugins/e621_tagger/e621_tagger.py +++ b/plugins/e621_tagger/e621_tagger.py @@ -6,17 +6,18 @@ import requests import stashapi.log as log from stashapi.stashapp import StashInterface +from typing import List + + +MD5_RE = re.compile(r"^[a-f0-9]{32}$") def get_all_images( client: StashInterface, - skip_tag_ids: list[int], + skip_tag_ids: List[int], exclude_organized: bool, per_page: int = 100, -) -> list[dict]: - """ - Fetch all images (returns a stable list snapshot). Uses numeric tag IDs in skip_tag_ids. - """ +) -> List[dict]: page = 1 all_images = [] while True: @@ -43,25 +44,80 @@ def get_all_images( if not images: break - log.info(f"Fetched page {page} with {len(images)} images") + log.info(f"Fetched image page {page} with {len(images)} images") all_images.extend(images) page += 1 return all_images -def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> None: - image = stash.find_image(image_id) - if any(t["name"] == "e621_tagged" for t in image.get("tags", [])): - return +def get_all_scenes( + client: StashInterface, + skip_tag_ids: List[int], + exclude_organized: bool, + per_page: int = 100, +) -> List[dict]: + page = 1 + all_scenes = [] + while True: + scene_filter = {} + pagination = { + "page": page, + "per_page": per_page, + "sort": "created_at", + "direction": "ASC", + } + + if skip_tag_ids: + scene_filter["tags"] = { + "value": [], + "excludes": skip_tag_ids, + "modifier": "INCLUDES_ALL", + "depth": -1, + } + + if exclude_organized: + scene_filter["organized"] = False + + scenes = client.find_scenes(f=scene_filter, filter=pagination) + if not scenes: + break + + log.info(f"Fetched scene page {page} with {len(scenes)} scenes") + all_scenes.extend(scenes) + page += 1 + + return all_scenes + - if any(t["name"] == "e621_tag_failed" for t in image.get("tags", [])): +def process_e621_post_for_item( + stash: StashInterface, item_type: str, item_id: str, item_md5: str +) -> None: + """ + item_type: "image" or "scene" + Update the corresponding object on success, or tag as failed on API error. + """ + # Fetch latest object to check tags + if item_type == "image": + obj = stash.find_image(item_id) + already_tagged = any(t["name"] == "e621_tagged" for t in obj.get("tags", [])) + already_failed = any( + t["name"] == "e621_tag_failed" for t in obj.get("tags", []) + ) + else: + obj = stash.find_scene(item_id) + already_tagged = any(t["name"] == "e621_tagged" for t in obj.get("tags", [])) + already_failed = any( + t["name"] == "e621_tag_failed" for t in obj.get("tags", []) + ) + + if already_tagged or already_failed: return try: time.sleep(0.5) response = requests.get( - f"https://e621.net/posts.json?md5={image_md5}", + f"https://e621.net/posts.json?md5={item_md5}", headers={"User-Agent": "Stash-e621-Tagger/1.0"}, timeout=10, ) @@ -70,8 +126,11 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N except Exception as e: log.error(f"Marking as failed. e621 API error: {str(e)}") e621_tag_failed = get_or_create_tag(stash, "e621_tag_failed") - fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in image.get("tags", [])] - stash.update_image({"id": image_id, "tag_ids": list(set(fail_ids))}) + fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in obj.get("tags", [])] + if item_type == "image": + stash.update_image({"id": item_id, "tag_ids": list(set(fail_ids))}) + else: + stash.update_scene({"id": item_id, "tag_ids": list(set(fail_ids))}) return if not post_data: @@ -81,7 +140,7 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N post_url = f"https://e621.net/posts/{post_data['id']}" tag_ids = [e621_tag["id"]] - for cat in ["general", "species", "character", "artist", "copyright"]: + for cat in ["general", "species", "character", "artist", "copyright", "meta"]: for tag in post_data.get("tags", {}).get(cat, []): clean_tag = tag.strip() if not clean_tag: @@ -102,17 +161,20 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N performer_ids.append(perf["id"]) try: - stash.update_image( - { - "id": image_id, - "organized": True, - "urls": [post_url], - "tag_ids": list(set(tag_ids)), - "studio_id": studio_id, - "performer_ids": performer_ids, - } - ) - log.info(f"Image updated: {image_id}") + update_payload = { + "id": item_id, + "organized": True, + "urls": [post_url], + "tag_ids": list(set(tag_ids)), + "studio_id": studio_id, + "performer_ids": performer_ids, + } + if item_type == "image": + stash.update_image(update_payload) + log.info(f"Image updated: {item_id}") + else: + stash.update_scene(update_payload) + log.info(f"Scene updated: {item_id}") except Exception as e: log.error(f"Update failed: {str(e)}") @@ -173,29 +235,92 @@ def scrape_image(client: StashInterface, image_id: str) -> None: return file_data = image["visual_files"][0] - filename = file_data["basename"] - filename_md5 = filename.split(".")[0] + filename = file_data.get("basename", "") + filename_md5 = filename.split(".")[0] if filename else "" - if re.match(r"^[a-f0-9]{32}$", filename_md5): + if MD5_RE.match(filename_md5): final_md5 = filename_md5 - log.info(f"Using filename MD5: {final_md5}") + log.info(f"Using filename MD5 for image: {final_md5}") + else: + # try if API provided checksum/md5 field on image + if image.get("checksum"): + final_md5 = image["checksum"] + log.info(f"Using image checksum: {final_md5}") + elif image.get("md5"): + final_md5 = image["md5"] + log.info(f"Using image md5: {final_md5}") + else: + try: + md5_hash = hashlib.md5() + with open(file_data["path"], "rb") as f: + for chunk in iter(lambda: f.read(65536), b""): + md5_hash.update(chunk) + final_md5 = md5_hash.hexdigest() + log.info(f"Generated content MD5 for image: {final_md5}") + except Exception as e: + log.error(f"Failed to generate MD5 for image: {str(e)}") + return + + process_e621_post_for_item(client, "image", image_id, final_md5) + + +def scrape_scene(client: StashInterface, scene_id: str) -> None: + """ + Attempt to find a stable MD5 for a scene/video: + - prefer scene.checksum or scene.md5 + - then files[0].checksum + - then files[0].basename parsed for md5 + - fallback: compute MD5 from files[0].path + """ + scene = client.find_scene(scene_id) + if not scene: + return + + final_md5 = None + + # direct fields + if scene.get("checksum") and MD5_RE.match(scene.get("checksum")): + final_md5 = scene.get("checksum") + log.info(f"Using scene checksum: {final_md5}") + elif scene.get("md5") and MD5_RE.match(scene.get("md5")): + final_md5 = scene.get("md5") + log.info(f"Using scene md5: {final_md5}") else: - try: - md5_hash = hashlib.md5() - with open(file_data["path"], "rb") as f: - for chunk in iter(lambda: f.read(65536), b""): - md5_hash.update(chunk) - final_md5 = md5_hash.hexdigest() - log.info(f"Generated content MD5: {final_md5}") - except Exception as e: - log.error(f"Failed to generate MD5: {str(e)}") + files = scene.get("files") or scene.get("scene_files") or [] + if files: + file_data = files[0] + # try file-level checksum + if file_data.get("checksum") and MD5_RE.match(file_data.get("checksum")): + final_md5 = file_data.get("checksum") + log.info(f"Using file checksum for scene: {final_md5}") + else: + basename = file_data.get("basename", "") + filename_md5 = basename.split(".")[0] if basename else "" + if MD5_RE.match(filename_md5): + final_md5 = filename_md5 + log.info(f"Using filename MD5 for scene: {final_md5}") + else: + # attempt to compute + try: + md5_hash = hashlib.md5() + with open(file_data["path"], "rb") as f: + for chunk in iter(lambda: f.read(65536), b""): + md5_hash.update(chunk) + final_md5 = md5_hash.hexdigest() + log.info(f"Generated content MD5 for scene: {final_md5}") + except Exception as e: + log.error(f"Failed to generate MD5 for scene: {str(e)}") + return + else: + log.error(f"No files found for scene {scene_id}; cannot compute md5") return - process_e621_post(client, image_id, final_md5) + if final_md5: + process_e621_post_for_item(client, "scene", scene_id, final_md5) if __name__ == "__main__": - log.info("Starting tagger with stable pagination snapshot...") + log.info("Starting tagger with stable pagination snapshot (images + scenes)...") json_input = json.loads(sys.stdin.read()) stash = StashInterface(json_input["server_connection"]) @@ -209,7 +334,7 @@ def scrape_image(client: StashInterface, image_id: str) -> None: # resolve skip tag NAMES from settings to tag IDs skip_tag_names = [n.strip() for n in settings["SkipTags"].split(",") if n.strip()] - skip_tag_ids = [] + skip_tag_ids: List[int] = [] for name in skip_tag_names: found = stash.find_tags(f={"name": {"value": name, "modifier": "EQUALS"}}) if found: @@ -219,17 +344,43 @@ def scrape_image(client: StashInterface, image_id: str) -> None: log.info("Fetching images in pages (stable snapshot)...") images = get_all_images( - stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=10 + stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=50 + ) + log.info("Fetching scenes in pages (stable snapshot)...") + scenes = get_all_scenes( + stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=50 ) - total = len(images) or 1 - for idx, image in enumerate(images, start=1): - progress = idx / total - log.progress(progress) + # build unified list with type so we can preserve progress and skipping logic + unified = [] + for img in images: + unified.append({"type": "image", "obj": img}) + for sc in scenes: + unified.append({"type": "scene", "obj": sc}) + + total = len(unified) or 1 + for idx, entry in enumerate(unified, start=1): + # report start-of-item progress (0..1). avoid sending 1.0 until the very end. + log.progress(float(idx - 1) / float(total)) + + item_type = entry["type"] + item = entry["obj"] + item_id = item["id"] - current_tag_ids = [t["id"] for t in image.get("tags", [])] + current_tag_ids = [t["id"] for t in item.get("tags", [])] if any(tid in current_tag_ids for tid in skip_tag_ids): - log.info(f"Skipping image {image['id']} - contains skip tag") + log.info(f"Skipping {item_type} {item_id} - contains skip tag") + # reflect the skipped item as completed + log.progress(float(idx) / float(total)) continue - scrape_image(stash, image["id"]) + if item_type == "image": + scrape_image(stash, item_id) + else: + scrape_scene(stash, item_id) + + # mark this item done + log.progress(float(idx) / float(total)) + + # ensure UI shows complete when finished + log.progress(1.0) From d478cd9ddee4b32df09df64b5df122427a3b34b3 Mon Sep 17 00:00:00 2001 From: yokarion Date: Thu, 11 Sep 2025 21:24:05 +0200 Subject: [PATCH 06/10] e621_tagger.py - added meta tags, removed character tags (they already set as performers) --- plugins/e621_tagger/e621_tagger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/e621_tagger/e621_tagger.py b/plugins/e621_tagger/e621_tagger.py index d80e1a83..3cdac16f 100644 --- a/plugins/e621_tagger/e621_tagger.py +++ b/plugins/e621_tagger/e621_tagger.py @@ -140,7 +140,7 @@ def process_e621_post_for_item( post_url = f"https://e621.net/posts/{post_data['id']}" tag_ids = [e621_tag["id"]] - for cat in ["general", "species", "character", "artist", "copyright", "meta"]: + for cat in ["general", "species", "artist", "copyright", "meta"]: for tag in post_data.get("tags", {}).get(cat, []): clean_tag = tag.strip() if not clean_tag: From d18638a82ca17f8ceda42c57fb157f8d6af0468f Mon Sep 17 00:00:00 2001 From: yokarion Date: Thu, 11 Sep 2025 21:24:36 +0200 Subject: [PATCH 07/10] e621_tagger.yml --- plugins/e621_tagger/e621_tagger.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/e621_tagger/e621_tagger.yml b/plugins/e621_tagger/e621_tagger.yml index 9b8ecc28..727921d1 100644 --- a/plugins/e621_tagger/e621_tagger.yml +++ b/plugins/e621_tagger/e621_tagger.yml @@ -1,6 +1,6 @@ name: e621_tagger description: Finding images and videos on e621 and tagging them. -version: 0.2 +version: 0.3 url: https://github.com/stashapp/CommunityScripts/ exec: - python From 12f2ab4339e36561060298032ae126af57278466 Mon Sep 17 00:00:00 2001 From: yokarion Date: Thu, 11 Sep 2025 21:53:16 +0200 Subject: [PATCH 08/10] e621_tagger.py fixed pagination --- plugins/e621_tagger/e621_tagger.py | 150 +++++++++++++---------------- 1 file changed, 65 insertions(+), 85 deletions(-) diff --git a/plugins/e621_tagger/e621_tagger.py b/plugins/e621_tagger/e621_tagger.py index 3cdac16f..1472ea2e 100644 --- a/plugins/e621_tagger/e621_tagger.py +++ b/plugins/e621_tagger/e621_tagger.py @@ -4,99 +4,97 @@ import json import time import requests +import itertools import stashapi.log as log from stashapi.stashapp import StashInterface from typing import List - MD5_RE = re.compile(r"^[a-f0-9]{32}$") -def get_all_images( +def _build_filter(skip_tag_ids, exclude_organized): + f = {} + if skip_tag_ids: + f["tags"] = { + "value": [], + "excludes": skip_tag_ids, + "modifier": "INCLUDES_ALL", + "depth": -1, + } + if exclude_organized: + f["organized"] = False + return f + + +def count_images( + client: StashInterface, skip_tag_ids: list, exclude_organized: bool +) -> int: + image_filter = _build_filter(skip_tag_ids, exclude_organized) + pagination = {"page": 1, "per_page": 0, "sort": "created_at", "direction": "ASC"} + total, _ = client.find_images(f=image_filter, filter=pagination, get_count=True) + return total + + +def count_scenes( + client: StashInterface, skip_tag_ids: list, exclude_organized: bool +) -> int: + scene_filter = _build_filter(skip_tag_ids, exclude_organized) + pagination = {"page": 1, "per_page": 0, "sort": "created_at", "direction": "ASC"} + total, _ = client.find_scenes(f=scene_filter, filter=pagination, get_count=True) + return total + + +def stream_images( client: StashInterface, skip_tag_ids: List[int], exclude_organized: bool, per_page: int = 100, -) -> List[dict]: +): page = 1 - all_images = [] + base_filter = _build_filter(skip_tag_ids, exclude_organized) while True: - image_filter = {} pagination = { "page": page, "per_page": per_page, "sort": "created_at", "direction": "ASC", } - - if skip_tag_ids: - image_filter["tags"] = { - "value": [], - "excludes": skip_tag_ids, - "modifier": "INCLUDES_ALL", - "depth": -1, - } - - if exclude_organized: - image_filter["organized"] = False - - images = client.find_images(f=image_filter, filter=pagination) + images = client.find_images(f=base_filter, filter=pagination) if not images: break - log.info(f"Fetched image page {page} with {len(images)} images") - all_images.extend(images) + for img in images: + yield ("image", img) page += 1 - return all_images - -def get_all_scenes( +def stream_scenes( client: StashInterface, skip_tag_ids: List[int], exclude_organized: bool, per_page: int = 100, -) -> List[dict]: +): page = 1 - all_scenes = [] + base_filter = _build_filter(skip_tag_ids, exclude_organized) while True: - scene_filter = {} pagination = { "page": page, "per_page": per_page, "sort": "created_at", "direction": "ASC", } - - if skip_tag_ids: - scene_filter["tags"] = { - "value": [], - "excludes": skip_tag_ids, - "modifier": "INCLUDES_ALL", - "depth": -1, - } - - if exclude_organized: - scene_filter["organized"] = False - - scenes = client.find_scenes(f=scene_filter, filter=pagination) + scenes = client.find_scenes(f=base_filter, filter=pagination) if not scenes: break - log.info(f"Fetched scene page {page} with {len(scenes)} scenes") - all_scenes.extend(scenes) + for sc in scenes: + yield ("scene", sc) page += 1 - return all_scenes - def process_e621_post_for_item( stash: StashInterface, item_type: str, item_id: str, item_md5: str ) -> None: - """ - item_type: "image" or "scene" - Update the corresponding object on success, or tag as failed on API error. - """ # Fetch latest object to check tags if item_type == "image": obj = stash.find_image(item_id) @@ -242,7 +240,6 @@ def scrape_image(client: StashInterface, image_id: str) -> None: final_md5 = filename_md5 log.info(f"Using filename MD5 for image: {final_md5}") else: - # try if API provided checksum/md5 field on image if image.get("checksum"): final_md5 = image["checksum"] log.info(f"Using image checksum: {final_md5}") @@ -265,20 +262,12 @@ def scrape_image(client: StashInterface, image_id: str) -> None: def scrape_scene(client: StashInterface, scene_id: str) -> None: - """ - Attempt to find a stable MD5 for a scene/video: - - prefer scene.checksum or scene.md5 - - then files[0].checksum - - then files[0].basename parsed for md5 - - fallback: compute MD5 from files[0].path - """ scene = client.find_scene(scene_id) if not scene: return final_md5 = None - # direct fields if scene.get("checksum") and MD5_RE.match(scene.get("checksum")): final_md5 = scene.get("checksum") log.info(f"Using scene checksum: {final_md5}") @@ -289,7 +278,6 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None: files = scene.get("files") or scene.get("scene_files") or [] if files: file_data = files[0] - # try file-level checksum if file_data.get("checksum") and MD5_RE.match(file_data.get("checksum")): final_md5 = file_data.get("checksum") log.info(f"Using file checksum for scene: {final_md5}") @@ -300,7 +288,6 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None: final_md5 = filename_md5 log.info(f"Using filename MD5 for scene: {final_md5}") else: - # attempt to compute try: md5_hash = hashlib.md5() with open(file_data["path"], "rb") as f: @@ -320,7 +307,7 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None: if __name__ == "__main__": - log.info("Starting tagger with stable pagination snapshot (images + scenes)...") + log.info("Starting tagger with stable pagination snapshot (streamed)...") json_input = json.loads(sys.stdin.read()) stash = StashInterface(json_input["server_connection"]) @@ -328,49 +315,44 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None: settings = {"SkipTags": "e621_tagged, e621_tag_failed", "ExcludeOrganized": False} settings.update(config.get("e621_tagger", {})) - # ensure e621 tags exist and get their ids e621_tagged = get_or_create_tag(stash, "e621_tagged") e621_failed = get_or_create_tag(stash, "e621_tag_failed") - # resolve skip tag NAMES from settings to tag IDs skip_tag_names = [n.strip() for n in settings["SkipTags"].split(",") if n.strip()] skip_tag_ids: List[int] = [] for name in skip_tag_names: found = stash.find_tags(f={"name": {"value": name, "modifier": "EQUALS"}}) if found: skip_tag_ids.append(found[0]["id"]) - # always include the internal e621 tags (ensure ints) skip_tag_ids.extend([e621_tagged["id"], e621_failed["id"]]) - log.info("Fetching images in pages (stable snapshot)...") - images = get_all_images( - stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=50 - ) - log.info("Fetching scenes in pages (stable snapshot)...") - scenes = get_all_scenes( - stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=50 - ) + per_page = 50 + + log.info("Counting images (no storage)...") + num_images = count_images(stash, skip_tag_ids, settings["ExcludeOrganized"]) + log.info("Counting scenes (no storage)...") + num_scenes = count_scenes(stash, skip_tag_ids, settings["ExcludeOrganized"]) + + total = (num_images + num_scenes) or 1 - # build unified list with type so we can preserve progress and skipping logic - unified = [] - for img in images: - unified.append({"type": "image", "obj": img}) - for sc in scenes: - unified.append({"type": "scene", "obj": sc}) + log.info(f"Total items (images + scenes): {total}") - total = len(unified) or 1 - for idx, entry in enumerate(unified, start=1): - # report start-of-item progress (0..1). avoid sending 1.0 until the very end. + stream = itertools.chain( + stream_images( + stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=per_page + ), + stream_scenes( + stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=per_page + ), + ) + + for idx, (item_type, item) in enumerate(stream, start=1): log.progress(float(idx - 1) / float(total)) - item_type = entry["type"] - item = entry["obj"] item_id = item["id"] - current_tag_ids = [t["id"] for t in item.get("tags", [])] if any(tid in current_tag_ids for tid in skip_tag_ids): log.info(f"Skipping {item_type} {item_id} - contains skip tag") - # reflect the skipped item as completed log.progress(float(idx) / float(total)) continue @@ -379,8 +361,6 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None: else: scrape_scene(stash, item_id) - # mark this item done log.progress(float(idx) / float(total)) - # ensure UI shows complete when finished log.progress(1.0) From e6e13d5c435834e0a561397d422010e2e2bf6ed0 Mon Sep 17 00:00:00 2001 From: yokarion Date: Fri, 12 Sep 2025 02:31:52 +0200 Subject: [PATCH 09/10] e621_tagger.py - fix pagination (again) --- plugins/e621_tagger/e621_tagger.py | 191 ++++++++++++++++++++++------- 1 file changed, 147 insertions(+), 44 deletions(-) diff --git a/plugins/e621_tagger/e621_tagger.py b/plugins/e621_tagger/e621_tagger.py index 1472ea2e..5f7306cb 100644 --- a/plugins/e621_tagger/e621_tagger.py +++ b/plugins/e621_tagger/e621_tagger.py @@ -7,7 +7,7 @@ import itertools import stashapi.log as log from stashapi.stashapp import StashInterface -from typing import List +from typing import List, Optional, Tuple MD5_RE = re.compile(r"^[a-f0-9]{32}$") @@ -94,7 +94,11 @@ def stream_scenes( def process_e621_post_for_item( stash: StashInterface, item_type: str, item_id: str, item_md5: str -) -> None: +) -> bool: + """ + CHANGED: return boolean indicating whether the item was updated/marked (True) or left untouched (False). + This lets the caller (main loop) increment progress only when an item actually changed state. + """ # Fetch latest object to check tags if item_type == "image": obj = stash.find_image(item_id) @@ -110,7 +114,7 @@ def process_e621_post_for_item( ) if already_tagged or already_failed: - return + return False # nothing to do try: time.sleep(0.5) @@ -125,14 +129,19 @@ def process_e621_post_for_item( log.error(f"Marking as failed. e621 API error: {str(e)}") e621_tag_failed = get_or_create_tag(stash, "e621_tag_failed") fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in obj.get("tags", [])] - if item_type == "image": - stash.update_image({"id": item_id, "tag_ids": list(set(fail_ids))}) - else: - stash.update_scene({"id": item_id, "tag_ids": list(set(fail_ids))}) - return + try: + if item_type == "image": + stash.update_image({"id": item_id, "tag_ids": list(set(fail_ids))}) + else: + stash.update_scene({"id": item_id, "tag_ids": list(set(fail_ids))}) + return True + except Exception as e2: + log.error(f"Failed to mark as failed: {str(e2)}") + return False if not post_data: - return + # not found on e621: leave untouched so it can be retried later (or user may decide to mark failed) + return False e621_tag = get_or_create_tag(stash, "e621_tagged") post_url = f"https://e621.net/posts/{post_data['id']}" @@ -173,8 +182,10 @@ def process_e621_post_for_item( else: stash.update_scene(update_payload) log.info(f"Scene updated: {item_id}") + return True except Exception as e: log.error(f"Update failed: {str(e)}") + return False def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict: @@ -227,10 +238,13 @@ def get_or_create_performer(stash: StashInterface, name: str) -> dict: return performers[0] if performers else stash.create_performer({"name": name}) -def scrape_image(client: StashInterface, image_id: str) -> None: +def scrape_image(client: StashInterface, image_id: str) -> bool: + """ + PAGINATION: return True if item was updated/marked (so main loop can count progress). + """ image = client.find_image(image_id) if not image or not image.get("visual_files"): - return + return False file_data = image["visual_files"][0] filename = file_data.get("basename", "") @@ -256,15 +270,18 @@ def scrape_image(client: StashInterface, image_id: str) -> None: log.info(f"Generated content MD5 for image: {final_md5}") except Exception as e: log.error(f"Failed to generate MD5 for image: {str(e)}") - return + return False - process_e621_post_for_item(client, "image", image_id, final_md5) + return process_e621_post_for_item(client, "image", image_id, final_md5) -def scrape_scene(client: StashInterface, scene_id: str) -> None: +def scrape_scene(client: StashInterface, scene_id: str) -> bool: + """ + PAGINATION: return True if item was updated/marked (so main loop can count progress). + """ scene = client.find_scene(scene_id) if not scene: - return + return False final_md5 = None @@ -297,17 +314,16 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None: log.info(f"Generated content MD5 for scene: {final_md5}") except Exception as e: log.error(f"Failed to generate MD5 for scene: {str(e)}") - return + return False else: log.error(f"No files found for scene {scene_id}; cannot compute md5") - return + return False - if final_md5: - process_e621_post_for_item(client, "scene", scene_id, final_md5) + return process_e621_post_for_item(client, "scene", scene_id, final_md5) if __name__ == "__main__": - log.info("Starting tagger with stable pagination snapshot (streamed)...") + log.info("Starting tagger with scanning passes until no work left...") json_input = json.loads(sys.stdin.read()) stash = StashInterface(json_input["server_connection"]) @@ -337,30 +353,117 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None: log.info(f"Total items (images + scenes): {total}") - stream = itertools.chain( - stream_images( - stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=per_page - ), - stream_scenes( - stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=per_page - ), - ) - - for idx, (item_type, item) in enumerate(stream, start=1): - log.progress(float(idx - 1) / float(total)) - - item_id = item["id"] - current_tag_ids = [t["id"] for t in item.get("tags", [])] - if any(tid in current_tag_ids for tid in skip_tag_ids): - log.info(f"Skipping {item_type} {item_id} - contains skip tag") - log.progress(float(idx) / float(total)) - continue - - if item_type == "image": - scrape_image(stash, item_id) - else: - scrape_scene(stash, item_id) + processed_count = 0 + pass_num = 0 + # Loop passes until a full pass processes zero items. + while True: + pass_num += 1 + log.info(f"Starting scanning pass #{pass_num}") + pass_processed = 0 + + # Scan images by pages + page = 1 + while True: + pagination = { + "page": page, + "per_page": per_page, + "sort": "created_at", + "direction": "ASC", + } + images = stash.find_images(f=_build_filter(skip_tag_ids, settings["ExcludeOrganized"]), filter=pagination) + log.info(f"[pass {pass_num}] fetched image page {page}, count={len(images)}") + if not images: + break + for img in images: + item_id = img.get("id") + if not item_id: + log.error(f"[pass {pass_num}] image without id on page {page}") + continue + + # Defensive fetch of current tags to avoid race conditions + current = stash.find_image(item_id) + current_tag_ids = [t["id"] for t in current.get("tags", [])] + if any(tid in current_tag_ids for tid in skip_tag_ids): + # Shouldn't usually happen because filter excluded them, but handle gracefully. + log.info(f"[pass {pass_num}] skipping image {item_id} - now has skip tag") + processed_count += 1 + pass_processed += 1 + log.progress(float(processed_count) / float(total)) + continue + + # Attempt to process; scrape_image now returns True if it updated/marked the item. + try: + updated = scrape_image(stash, item_id) + except Exception as e: + log.error(f"[pass {pass_num}] scrape_image exception for {item_id}: {str(e)}") + updated = False + + if updated: + processed_count += 1 + pass_processed += 1 + log.info(f"[pass {pass_num}] processed image {item_id} (processed_count={processed_count})") + log.progress(float(processed_count) / float(total)) + # If not updated, it will remain in future passes. Continue scanning. + + # If fewer than per_page results, we're at the end of current snapshot + if len(images) < per_page: + break + page += 1 + + # Scan scenes by pages + page = 1 + while True: + pagination = { + "page": page, + "per_page": per_page, + "sort": "created_at", + "direction": "ASC", + } + scenes = stash.find_scenes(f=_build_filter(skip_tag_ids, settings["ExcludeOrganized"]), filter=pagination) + log.info(f"[pass {pass_num}] fetched scene page {page}, count={len(scenes)}") + if not scenes: + break + for sc in scenes: + item_id = sc.get("id") + if not item_id: + log.error(f"[pass {pass_num}] scene without id on page {page}") + continue + + # Defensive fetch + current = stash.find_scene(item_id) + current_tag_ids = [t["id"] for t in current.get("tags", [])] + if any(tid in current_tag_ids for tid in skip_tag_ids): + log.info(f"[pass {pass_num}] skipping scene {item_id} - now has skip tag") + processed_count += 1 + pass_processed += 1 + log.progress(float(processed_count) / float(total)) + continue + + try: + updated = scrape_scene(stash, item_id) + except Exception as e: + log.error(f"[pass {pass_num}] scrape_scene exception for {item_id}: {str(e)}") + updated = False + + if updated: + processed_count += 1 + pass_processed += 1 + log.info(f"[pass {pass_num}] processed scene {item_id} (processed_count={processed_count})") + log.progress(float(processed_count) / float(total)) + + if len(scenes) < per_page: + break + page += 1 + + log.info(f"Pass #{pass_num} finished. items processed this pass: {pass_processed}") + + # If no items processed in a full pass, we're done + if pass_processed == 0: + log.info("No items processed in last pass; finishing scan.") + break - log.progress(float(idx) / float(total)) + # Small sleep to avoid hammering API and to let the DB settle between passes + time.sleep(0.2) + # ensure progress finished log.progress(1.0) From f435f079a82499fec4f0f03cd3a04f2377495a8b Mon Sep 17 00:00:00 2001 From: yokarion Date: Fri, 12 Sep 2025 02:32:09 +0200 Subject: [PATCH 10/10] e621_tagger.yml - fix pagination (again) --- plugins/e621_tagger/e621_tagger.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/e621_tagger/e621_tagger.yml b/plugins/e621_tagger/e621_tagger.yml index 727921d1..911080ff 100644 --- a/plugins/e621_tagger/e621_tagger.yml +++ b/plugins/e621_tagger/e621_tagger.yml @@ -1,6 +1,6 @@ name: e621_tagger description: Finding images and videos on e621 and tagging them. -version: 0.3 +version: 0.4 url: https://github.com/stashapp/CommunityScripts/ exec: - python