In [1]:
def check_download_consistency():
  """
  Verify download consistency for all videos listed in 'downloads/metadata.json'.

  This function ensures that:
    1. Each video entry in metadata.json has its corresponding .mp4 file in 'downloads/videos'.
    2. Each .mp4 file contains an audio track (verifies with ffprobe).

  It uses yt-dlp's prepare_filename() to determine the exact expected filenames,
  guaranteeing a perfect match with yt-dlp's naming convention.

  Prints a summary showing missing or audio-less files, if any.
  Does not modify any files or data.
  """

  import json
  import subprocess
  from pathlib import Path
  from yt_dlp import YoutubeDL

  meta_path = Path("downloads/metadata.json")
  videos_dir = Path("downloads/videos")

  if not meta_path.exists():
    print("❌ metadata.json missing.")
    return
  if not videos_dir.exists():
    print("❌ videos directory missing.")
    return

  metadata = json.loads(meta_path.read_text(encoding="utf-8"))

  ydl_opts = {"outtmpl": str(videos_dir / "%(title).120B [%(id)s].%(ext)s")}
  ydl = YoutubeDL(ydl_opts)

  print("\n=== Checking download consistency ===")

  missing_files = []
  no_audio_files = []
  total = len(metadata)

  def has_audio(path: Path) -> bool:
    """Return True if file has at least one audio stream."""
    try:
      result = subprocess.run(
          [
              "ffprobe", "-v", "error",
              "-select_streams", "a",
              "-show_entries", "stream=index",
              "-of", "csv=p=0",
              str(path)
          ],
          capture_output=True, text=True
      )
      return bool(result.stdout.strip())
    except FileNotFoundError:
      print("⚠️ ffprobe not found — cannot verify audio tracks.")
      return True  # assume OK if ffprobe missing

  for v in metadata:
    info = {"title": v["title"], "id": v["id"], "ext": "mp4"}
    file_path = Path(ydl.prepare_filename(info))

    if not file_path.exists():
      missing_files.append(file_path)
      print(f"❌ File not found: {file_path}")
      continue

    if not has_audio(file_path):
      no_audio_files.append(file_path)
      print(f"⚠️ No audio track found: {file_path}")

  if not missing_files and not no_audio_files:
    print(f"✅ All {total} videos are present and contain audio tracks.")
  else:
    print("\n--- Summary ---")
    if missing_files:
      print(f"❌ Missing files: {len(missing_files)}")
    if no_audio_files:
      print(f"⚠️ Videos without audio: {len(no_audio_files)}")
    print(f"Total checked: {total}")

In [3]:
check_download_consistency()


=== Checking download consistency ===
⚠️ No audio track found: downloads\videos\Azaadi Ka Ambruth Mahotsav [E0usTcu97as].mp4
⚠️ No audio track found: downloads\videos\Azaadi Ka Ambruth Mahotsav [Mbm0iOrdqGU].mp4
⚠️ No audio track found: downloads\videos\Azaadi Ka Ambruth Mahotsav [6hcJ63Tb5ec].mp4
⚠️ No audio track found: downloads\videos\Azaadi Ka Ambruth Mahothsav [z_ngORJjSEU].mp4
⚠️ No audio track found: downloads\videos\ಮತ್ಸ್ಯ ನಾರಾಯಣ ಕಲ್ಯಾಣೋತ್ಸವ.... [HKfKG1-uuyY].mp4
⚠️ No audio track found: downloads\videos\Azaadi Ka Ambruth Mahothsav... [tOJnxjBsBNw].mp4
⚠️ No audio track found: downloads\videos\ಶ್ರೀ ಚಾಮುಂಡೇಶ್ವರಿ ಪಾಲಯಮಾಂ⧸Sri chamundeshwari palayamam⧸Raaga ： ಬಿಲ [K6DZuf9fHPU].mp4
⚠️ No audio track found: downloads\videos\Mangala Nidhi program [zrHvVy0DsKw].mp4
⚠️ No audio track found: downloads\videos\ಶ್ರೀ ಚಾಮುಂಡೇಶ್ವರಿ ಪಾಲಯಮಾಂ ⧸ ವಿಲಂಬಕಾಲದ ಕೀರ್ತನ [v0RyCwPiUTs].mp4
⚠️ No audio track found: downloads\videos\ಶ್ರೀ ಚಾಮುಂಡೇಶ್ವರಿ ಪಾಲಯಮಾಂ⧸Sri   Paalayamam⧸ Vilambakaala Keerthane｜ Bilaah 