Skip to content
191 changes: 147 additions & 44 deletions plugins/e621_tagger/e621_tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import itertools
import stashapi.log as log
from stashapi.stashapp import StashInterface
from typing import List
from typing import List, Optional, Tuple

MD5_RE = re.compile(r"^[a-f0-9]{32}$")

Expand Down Expand Up @@ -94,7 +94,11 @@ def stream_scenes(

def process_e621_post_for_item(
stash: StashInterface, item_type: str, item_id: str, item_md5: str
) -> None:
) -> bool:
"""
CHANGED: return boolean indicating whether the item was updated/marked (True) or left untouched (False).
This lets the caller (main loop) increment progress only when an item actually changed state.
"""
# Fetch latest object to check tags
if item_type == "image":
obj = stash.find_image(item_id)
Expand All @@ -110,7 +114,7 @@ def process_e621_post_for_item(
)

if already_tagged or already_failed:
return
return False # nothing to do

try:
time.sleep(0.5)
Expand All @@ -125,14 +129,19 @@ def process_e621_post_for_item(
log.error(f"Marking as failed. e621 API error: {str(e)}")
e621_tag_failed = get_or_create_tag(stash, "e621_tag_failed")
fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in obj.get("tags", [])]
if item_type == "image":
stash.update_image({"id": item_id, "tag_ids": list(set(fail_ids))})
else:
stash.update_scene({"id": item_id, "tag_ids": list(set(fail_ids))})
return
try:
if item_type == "image":
stash.update_image({"id": item_id, "tag_ids": list(set(fail_ids))})
else:
stash.update_scene({"id": item_id, "tag_ids": list(set(fail_ids))})
return True
except Exception as e2:
log.error(f"Failed to mark as failed: {str(e2)}")
return False

if not post_data:
return
# not found on e621: leave untouched so it can be retried later (or user may decide to mark failed)
return False

e621_tag = get_or_create_tag(stash, "e621_tagged")
post_url = f"https://e621.net/posts/{post_data['id']}"
Expand Down Expand Up @@ -173,8 +182,10 @@ def process_e621_post_for_item(
else:
stash.update_scene(update_payload)
log.info(f"Scene updated: {item_id}")
return True
except Exception as e:
log.error(f"Update failed: {str(e)}")
return False


def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict:
Expand Down Expand Up @@ -227,10 +238,13 @@ def get_or_create_performer(stash: StashInterface, name: str) -> dict:
return performers[0] if performers else stash.create_performer({"name": name})


def scrape_image(client: StashInterface, image_id: str) -> None:
def scrape_image(client: StashInterface, image_id: str) -> bool:
"""
PAGINATION: return True if item was updated/marked (so main loop can count progress).
"""
image = client.find_image(image_id)
if not image or not image.get("visual_files"):
return
return False

file_data = image["visual_files"][0]
filename = file_data.get("basename", "")
Expand All @@ -256,15 +270,18 @@ def scrape_image(client: StashInterface, image_id: str) -> None:
log.info(f"Generated content MD5 for image: {final_md5}")
except Exception as e:
log.error(f"Failed to generate MD5 for image: {str(e)}")
return
return False

process_e621_post_for_item(client, "image", image_id, final_md5)
return process_e621_post_for_item(client, "image", image_id, final_md5)


def scrape_scene(client: StashInterface, scene_id: str) -> None:
def scrape_scene(client: StashInterface, scene_id: str) -> bool:
"""
PAGINATION: return True if item was updated/marked (so main loop can count progress).
"""
scene = client.find_scene(scene_id)
if not scene:
return
return False

final_md5 = None

Expand Down Expand Up @@ -297,17 +314,16 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None:
log.info(f"Generated content MD5 for scene: {final_md5}")
except Exception as e:
log.error(f"Failed to generate MD5 for scene: {str(e)}")
return
return False
else:
log.error(f"No files found for scene {scene_id}; cannot compute md5")
return
return False

if final_md5:
process_e621_post_for_item(client, "scene", scene_id, final_md5)
return process_e621_post_for_item(client, "scene", scene_id, final_md5)


if __name__ == "__main__":
log.info("Starting tagger with stable pagination snapshot (streamed)...")
log.info("Starting tagger with scanning passes until no work left...")
json_input = json.loads(sys.stdin.read())
stash = StashInterface(json_input["server_connection"])

Expand Down Expand Up @@ -337,30 +353,117 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None:

log.info(f"Total items (images + scenes): {total}")

stream = itertools.chain(
stream_images(
stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=per_page
),
stream_scenes(
stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=per_page
),
)

for idx, (item_type, item) in enumerate(stream, start=1):
log.progress(float(idx - 1) / float(total))

item_id = item["id"]
current_tag_ids = [t["id"] for t in item.get("tags", [])]
if any(tid in current_tag_ids for tid in skip_tag_ids):
log.info(f"Skipping {item_type} {item_id} - contains skip tag")
log.progress(float(idx) / float(total))
continue

if item_type == "image":
scrape_image(stash, item_id)
else:
scrape_scene(stash, item_id)
processed_count = 0
pass_num = 0
# Loop passes until a full pass processes zero items.
while True:
pass_num += 1
log.info(f"Starting scanning pass #{pass_num}")
pass_processed = 0

# Scan images by pages
page = 1
while True:
pagination = {
"page": page,
"per_page": per_page,
"sort": "created_at",
"direction": "ASC",
}
images = stash.find_images(f=_build_filter(skip_tag_ids, settings["ExcludeOrganized"]), filter=pagination)
log.info(f"[pass {pass_num}] fetched image page {page}, count={len(images)}")
if not images:
break
for img in images:
item_id = img.get("id")
if not item_id:
log.error(f"[pass {pass_num}] image without id on page {page}")
continue

# Defensive fetch of current tags to avoid race conditions
current = stash.find_image(item_id)
current_tag_ids = [t["id"] for t in current.get("tags", [])]
if any(tid in current_tag_ids for tid in skip_tag_ids):
# Shouldn't usually happen because filter excluded them, but handle gracefully.
log.info(f"[pass {pass_num}] skipping image {item_id} - now has skip tag")
processed_count += 1
pass_processed += 1
log.progress(float(processed_count) / float(total))
continue

# Attempt to process; scrape_image now returns True if it updated/marked the item.
try:
updated = scrape_image(stash, item_id)
except Exception as e:
log.error(f"[pass {pass_num}] scrape_image exception for {item_id}: {str(e)}")
updated = False

if updated:
processed_count += 1
pass_processed += 1
log.info(f"[pass {pass_num}] processed image {item_id} (processed_count={processed_count})")
log.progress(float(processed_count) / float(total))
# If not updated, it will remain in future passes. Continue scanning.

# If fewer than per_page results, we're at the end of current snapshot
if len(images) < per_page:
break
page += 1

# Scan scenes by pages
page = 1
while True:
pagination = {
"page": page,
"per_page": per_page,
"sort": "created_at",
"direction": "ASC",
}
scenes = stash.find_scenes(f=_build_filter(skip_tag_ids, settings["ExcludeOrganized"]), filter=pagination)
log.info(f"[pass {pass_num}] fetched scene page {page}, count={len(scenes)}")
if not scenes:
break
for sc in scenes:
item_id = sc.get("id")
if not item_id:
log.error(f"[pass {pass_num}] scene without id on page {page}")
continue

# Defensive fetch
current = stash.find_scene(item_id)
current_tag_ids = [t["id"] for t in current.get("tags", [])]
if any(tid in current_tag_ids for tid in skip_tag_ids):
log.info(f"[pass {pass_num}] skipping scene {item_id} - now has skip tag")
processed_count += 1
pass_processed += 1
log.progress(float(processed_count) / float(total))
continue

try:
updated = scrape_scene(stash, item_id)
except Exception as e:
log.error(f"[pass {pass_num}] scrape_scene exception for {item_id}: {str(e)}")
updated = False

if updated:
processed_count += 1
pass_processed += 1
log.info(f"[pass {pass_num}] processed scene {item_id} (processed_count={processed_count})")
log.progress(float(processed_count) / float(total))

if len(scenes) < per_page:
break
page += 1

log.info(f"Pass #{pass_num} finished. items processed this pass: {pass_processed}")

# If no items processed in a full pass, we're done
if pass_processed == 0:
log.info("No items processed in last pass; finishing scan.")
break

log.progress(float(idx) / float(total))
# Small sleep to avoid hammering API and to let the DB settle between passes
time.sleep(0.2)

# ensure progress finished
log.progress(1.0)
2 changes: 1 addition & 1 deletion plugins/e621_tagger/e621_tagger.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: e621_tagger
description: Finding images and videos on e621 and tagging them.
version: 0.3
version: 0.4
url: https://github.com/stashapp/CommunityScripts/
exec:
- python
Expand Down