In [1]:
def rename_old_videos_to_new_format(dry_run: bool = True):
  """
  Rename existing old-format videos ('title.mp4') to new safe format ('title [id].mp4').

  Why this exists:
      Earlier downloads used "%(title)s.%(ext)s", which caused overwriting when titles repeated.
      After adopting "%(title).120B [%(id)s].%(ext)s", existing files should be renamed to match
      the new pattern for consistency and to avoid future collisions.

  Behavior:
      - Reads metadata.json to map each video title ‚Üí ID.
      - If a file "<title>.mp4" exists in 'downloads/videos', renames it to
        "<title>.120B [<id>].mp4".
      - Skips titles with multiple IDs (duplicates) to avoid ambiguity.
      - Dry run (default) just prints what would be renamed.
  """

  import json
  from pathlib import Path

  meta_path = Path("downloads/metadata.json")
  videos_dir = Path("downloads/videos")

  if not meta_path.exists():
    print("‚ùå metadata.json missing.")
    return

  metadata = json.loads(meta_path.read_text(encoding="utf-8"))

  # build mapping from title ‚Üí list of IDs
  from collections import defaultdict
  title_to_ids = defaultdict(list)
  for v in metadata:
    title = (v.get("title") or "").strip()
    if title:
      title_to_ids[title].append(v["id"])

  rename_count = 0

  print("\n=== Renaming old-format files ===")
  for title, ids in title_to_ids.items():
    old_path = videos_dir / f"{title}.mp4"

    # only rename unique titles
    if len(ids) == 1 and old_path.exists():
      new_name = f"{title}.120B [{ids[0]}].mp4"
      new_path = videos_dir / new_name
      if new_path.exists():
        print(f"‚ö†Ô∏è Skipping '{title}' ‚Äî target already exists.")
        continue

      print(f"üîÅ '{old_path.name}'  ‚Üí  '{new_path.name}'")
      rename_count += 1

      if not dry_run:
        old_path.rename(new_path)

    elif len(ids) > 1 and old_path.exists():
      print(f"‚ö†Ô∏è Skipping duplicate title '{title}' (multiple IDs).")

  if dry_run:
    print(f"\nüí° Dry-run mode: {rename_count} files would be renamed.")
  else:
    print(f"\n‚úÖ Renamed {rename_count} files successfully.")

In [2]:
def patch_inconsistent_downloads(dry_run: bool = True):
  """
  Patch mechanism to fix duplicate-titled and missing-file inconsistencies (new filename format).

  Why this exists:
      Older versions of this downloader saved videos using only "%(title)s.%(ext)s".
      This caused two problems:
        1. Duplicate-titled videos overwrote earlier files.
        2. downloaded.json marked videos as done even when their .mp4 files were missing.
      After switching to "%(title).120B [%(id)s].%(ext)s", this patch ensures consistency by:
        - Removing all IDs for duplicate-titled videos (since they may be incomplete or overwritten).
        - Removing IDs whose corresponding new-format .mp4 file does not exist.

  Args:
      dry_run (bool): If True (default), only reports affected titles/IDs.
                      If False, rewrites 'downloaded.json'.
  """

  import json
  from collections import defaultdict
  from pathlib import Path

  meta_path = Path("downloads/metadata.json")
  done_path = Path("downloads/downloaded.json")
  videos_dir = Path("downloads/videos")

  if not meta_path.exists() or not done_path.exists():
    print("‚ùå metadata.json or downloaded.json missing.")
    return

  metadata = json.loads(meta_path.read_text(encoding="utf-8"))
  downloaded = set(json.loads(done_path.read_text(encoding="utf-8")))

  # --- group by title ---
  by_title = defaultdict(list)
  for v in metadata:
    title = (v.get("title") or "").strip()
    if title:
      by_title[title].append(v)

  duplicates = {t: vids for t, vids in by_title.items() if len(vids) > 1}
  remove_ids = set()

  print("\n=== Duplicate-titled videos ===")
  if duplicates:
    for title, vids in duplicates.items():
      # check existence of any of the new-format files
      exists_any = any((videos_dir / f"{title}.120B [{v['id']}].mp4").exists() for v in vids)
      ids = [v["id"] for v in vids]
      remove_ids.update(ids)
      print(f"üé¨ '{title}' ({len(ids)} videos) | File exists: {'‚úÖ' if exists_any else '‚ùå'}")
    print(f"\n‚ö†Ô∏è {len(duplicates)} duplicate titles found.")
  else:
    print("‚úÖ No duplicate titles found.")

  # --- check for missing unique videos ---
  print("\n=== Missing files for unique titles ===")
  missing_unique = []
  for title, vids in by_title.items():
    if len(vids) == 1:
      v = vids[0]
      file_path = videos_dir / f"{title}.120B [{v['id']}].mp4"
      if not file_path.exists():
        remove_ids.add(v["id"])
        missing_unique.append((title, v["id"]))
        print(f"‚ùå '{title}' ‚Äî file not found, ID: {v['id']}")

  if not missing_unique:
    print("‚úÖ All unique-titled videos have corresponding .mp4 files.")

  print(f"\nTotal IDs to remove: {len(remove_ids)}")

  if not dry_run:
    new_downloaded = sorted(set(downloaded) - remove_ids)
    done_path.write_text(json.dumps(new_downloaded, indent=4), encoding="utf-8")
    print(f"‚úçÔ∏è Updated '{done_path}' with {len(new_downloaded)} remaining IDs.")
  else:
    print("\nüí° Dry-run mode: No files changed. Use dry_run=False to apply changes.")

In [3]:
rename_old_videos_to_new_format(dry_run=True)


=== Renaming old-format files ===

üí° Dry-run mode: 0 files would be renamed.


In [4]:
patch_inconsistent_downloads(dry_run=True)


=== Duplicate-titled videos ===
üé¨ 'Azaadi Ka Ambruth Mahotsav' (3 videos) | File exists: ‚ùå

‚ö†Ô∏è 1 duplicate titles found.

=== Missing files for unique titles ===
‚ùå 'Azaadi Ka Ambruth Mahothsav' ‚Äî file not found, ID: z_ngORJjSEU
‚ùå '‡≤Æ‡≤§‡≥ç‡≤∏‡≥ç‡≤Ø ‡≤®‡≤æ‡≤∞‡≤æ‡≤Ø‡≤£ ‡≤ï‡≤≤‡≥ç‡≤Ø‡≤æ‡≤£‡≥ã‡≤§‡≥ç‡≤∏‡≤µ....' ‚Äî file not found, ID: HKfKG1-uuyY
‚ùå 'Azaadi Ka Ambruth Mahothsav...' ‚Äî file not found, ID: tOJnxjBsBNw
‚ùå '‡≤∂‡≥ç‡≤∞‡≥Ä ‡≤ö‡≤æ‡≤Æ‡≥Å‡≤Ç‡≤°‡≥á‡≤∂‡≥ç‡≤µ‡≤∞‡≤ø ‡≤™‡≤æ‡≤≤‡≤Ø‡≤Æ‡≤æ‡≤Ç/Sri chamundeshwari palayamam/Raaga : ‡≤¨‡≤ø‡≤≤‡≤π‡≤∞‡≤ø ,thaala: ‡≤Ü‡≤¶‡≤ø, Vilambakaala....' ‚Äî file not found, ID: K6DZuf9fHPU
‚ùå 'Mangala Nidhi program' ‚Äî file not found, ID: zrHvVy0DsKw
‚ùå '‡≤∂‡≥ç‡≤∞‡≥Ä ‡≤ö‡≤æ‡≤Æ‡≥Å‡≤Ç‡≤°‡≥á‡≤∂‡≥ç‡≤µ‡≤∞‡≤ø ‡≤™‡≤æ‡≤≤‡≤Ø‡≤Æ‡≤æ‡≤Ç / ‡≤µ‡≤ø‡≤≤‡≤Ç‡≤¨‡≤ï‡≤æ‡≤≤‡≤¶ ‡≤ï‡≥Ä‡≤∞‡≥ç‡≤§‡≤®‡≥Ü/ ‡≤∞‡≤æ‡≤ó : ‡≤¨‡≤ø‡≤≤‡≤π‡≤∞‡≤ø/‡≤§‡≤æ‡≤≥: ‡≤Ü‡≤¶‡≤ø ‡≤§‡≤æ‡≤≥.' ‚Äî file not found, ID: v0RyCwPiUTs
‚ùå '‡≤∂‡≥ç‡≤∞‡≥Ä ‡≤ö‡≤æ‡≤Æ‡≥Å‡≤Ç‡≤°‡≥á‡≤∂‡≥ç‡≤µ‡≤∞‡≤ø ‡≤™‡≤æ‡≤≤‡≤