In [16]:
# pip install yt-dlp

# FFmpeg must be on PATH. Install using one of the following:
#   winget install Gyan.FFmpeg
#   choco install ffmpeg

In [17]:
import os
import json
import sys
import time
from pathlib import Path
from yt_dlp import YoutubeDL

In [18]:
# === CONFIG ===
CHANNEL_URL = "https://www.youtube.com/@shastrisisters3044"

OUTPUT_DIR = Path("downloads")
VIDEOS_DIR = OUTPUT_DIR / "videos"
VIDEOS_DIR.mkdir(parents=True, exist_ok=True)

META_FILE = OUTPUT_DIR / "metadata.json"
DONE_FILE = OUTPUT_DIR / "downloaded.json"

OUT_TMPL = str(VIDEOS_DIR / "%(title).120B [%(id)s].%(ext)s")

In [19]:
# Common yt-dlp options (no progress here)
YDL_BASE = {
    "ignoreerrors": True,
    "quiet": True,
    "no_warnings": True,
    "noprogress": True,
    "restrictfilenames": False,
    "windowsfilenames": True,
    "outtmpl": OUT_TMPL,
    "outtmpl_na_placeholder": "NA",
    "merge_output_format": "mp4",
    "retries": 10,
    "fragment_retries": 10,
    "retry_sleep": lambda n: min(2 ** n, 30),
    "force_ip": "0.0.0.0",
}

In [20]:
def get_metadata():
  """Fetch all video metadata from the channel."""
  ydl_opts = {
      **YDL_BASE,
      "extract_flat": False,
      "extractor_args": {"youtubetab": {"approximate_date": ["true"], "tab": ["videos"]}},
  }
  with YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(CHANNEL_URL, download=False)
  entries = (info or {}).get("entries", []) or []

  meta = []
  for e in entries:
    if not e or not e.get("id"):
      continue
    meta.append({
        "id": e.get("id"),
        "title": e.get("title"),
        "url": e.get("webpage_url") or f"https://www.youtube.com/watch?v={e.get('id')}",
        "upload_date": e.get("upload_date"),
        "description": e.get("description"),
        "duration": e.get("duration"),
        "thumbnail": e.get("thumbnail"),
    })
  return meta

In [21]:
def show_progress(d):
  if d.get("status") == "downloading":
    pct = (d.get("_percent_str") or "").strip()
    spd = (d.get("_speed_str") or "").strip()
    eta = (d.get("_eta_str") or "").strip()
    tot = d.get("_total_bytes_str") or d.get("_total_bytes_estimate_str") or "?"
    print(f"\r‚¨áÔ∏è  {pct} of {tot} at {spd}, ETA {eta}", end="", flush=True)
  elif d.get("status") == "finished":
    print("\nüèÉ‚Äç‚ôÇÔ∏è‚Äç‚û°Ô∏è Download complete. Merging...\n", flush=True)

In [22]:
def expected_output_path(info_dict):
  """Return the final video file path that yt-dlp will create."""
  with YoutubeDL({**YDL_BASE}) as ydl:
    vid_fn = ydl.prepare_filename(info_dict).rsplit(".", 1)[0] + ".mp4"
  return Path(vid_fn)

In [23]:
def download_video(video, threads: int = 2):
  """Download one video; returns True if merged file exists at end."""
  ydl_opts = {
      **YDL_BASE,
      "progress_hooks": [show_progress],
      "noprogress": False,
      "format": "bestvideo*[vcodec^=avc1]/bestvideo+bestaudio/best",
      "concurrent_fragment_downloads": max(1, int(threads)),
      "writeinfojson": False,
  }

  vid_path_guess = expected_output_path({
      "title": video["title"],
      "id": video["id"],
      "ext": "mp4"
  })

  with YoutubeDL(ydl_opts) as ydl:
    ydl.download([video["url"]])

  return vid_path_guess.exists()

In [24]:
def prepare_metadata():
  """Create META_FILE and DONE_FILE if missing."""
  if not META_FILE.exists():
    print("üìã Generating metadata...")
    metadata = get_metadata()
    META_FILE.write_text(json.dumps(metadata, indent=4, ensure_ascii=False), encoding="utf-8")
    print(f"‚úÖ Metadata saved to {META_FILE}")

    DONE_FILE.write_text(json.dumps([], indent=4), encoding="utf-8")
    print("‚ÑπÔ∏è Metadata ready. Proceed to next cell for downloading.")
  else:
    print("‚úÖ Metadata already exists. Proceed to next cell for downloading.")

In [25]:
def show_metadata_status():
  """Display metadata and download status."""
  import pandas as pd
  metadata = json.loads(META_FILE.read_text(encoding="utf-8"))
  downloaded = set()
  if DONE_FILE.exists():
    downloaded = set(json.loads(DONE_FILE.read_text(encoding="utf-8")))

  df = pd.DataFrame(metadata)
  df["downloaded"] = df["id"].map(lambda vid: "‚úÖ" if vid in downloaded else "‚ùå")
  with pd.option_context("display.max_rows", None):
    display(df[["id", "title", "upload_date", "duration", "downloaded"]])

In [26]:
def download_videos(threads: int = 2, max_videos: int | None = None):
  """
  Download remaining videos.
    threads: concurrent fragment downloads per video.
    max_videos: cap for a short test run (None = all).
  """
  metadata = json.loads(META_FILE.read_text(encoding="utf-8"))
  downloaded = set(json.loads(DONE_FILE.read_text(encoding="utf-8"))) if DONE_FILE.exists() else set()

  remaining = [v for v in metadata if v.get("id") and v["id"] not in downloaded]

  if len(remaining) == 0:
    print("‚úÖ All videos have already been downloaded. No remaining videos.")
    return

  if max_videos is not None:
    remaining = remaining[:max_videos]

  print(f"üé¨ {len(remaining)} videos remaining out of {len(metadata)}")

  for i, video in enumerate(remaining, 1):
    try:
      print(f"\n[{i}/{len(remaining)}] {video['title']}  ({video['id']})")
      ok = download_video(video, threads=threads)
      if ok:
        print(f"‚úÖ Merged successfully: {video['title']} ({video['id']})")
        downloaded.add(video["id"])
        DONE_FILE.write_text(json.dumps(sorted(downloaded), indent=4), encoding="utf-8")
      else:
        print(f"‚ö†Ô∏è Merge/file missing for: {video['title']} ({video['id']}) ‚Äî not marking as done.")
    except KeyboardInterrupt:
      print("\nüõë Interrupted by user. Progress saved.")
      break
    except Exception as e:
      print(f"\n‚ö†Ô∏è Error downloading {video['title']} ({video['id']}): {e}")

  print("\nüèÅ Finished. Safe to rerun; completed IDs are skipped.")

In [27]:
prepare_metadata()

‚úÖ Metadata already exists. Proceed to next cell for downloading.


In [28]:
show_metadata_status()

Unnamed: 0,id,title,upload_date,duration,downloaded
0,E0usTcu97as,Azaadi Ka Ambruth Mahotsav,20220806,222,‚úÖ
1,Mbm0iOrdqGU,Azaadi Ka Ambruth Mahotsav,20220804,182,‚úÖ
2,6hcJ63Tb5ec,Azaadi Ka Ambruth Mahotsav,20220803,149,‚úÖ
3,z_ngORJjSEU,Azaadi Ka Ambruth Mahothsav,20220802,142,‚úÖ
4,HKfKG1-uuyY,‡≤Æ‡≤§‡≥ç‡≤∏‡≥ç‡≤Ø ‡≤®‡≤æ‡≤∞‡≤æ‡≤Ø‡≤£ ‡≤ï‡≤≤‡≥ç‡≤Ø‡≤æ‡≤£‡≥ã‡≤§‡≥ç‡≤∏‡≤µ....,20220802,234,‚úÖ
5,tOJnxjBsBNw,Azaadi Ka Ambruth Mahothsav...,20220801,239,‚úÖ
6,K6DZuf9fHPU,‡≤∂‡≥ç‡≤∞‡≥Ä ‡≤ö‡≤æ‡≤Æ‡≥Å‡≤Ç‡≤°‡≥á‡≤∂‡≥ç‡≤µ‡≤∞‡≤ø ‡≤™‡≤æ‡≤≤‡≤Ø‡≤Æ‡≤æ‡≤Ç/Sri chamundeshwari p...,20220620,270,‚úÖ
7,zrHvVy0DsKw,Mangala Nidhi program,20211230,499,‚úÖ
8,v0RyCwPiUTs,‡≤∂‡≥ç‡≤∞‡≥Ä ‡≤ö‡≤æ‡≤Æ‡≥Å‡≤Ç‡≤°‡≥á‡≤∂‡≥ç‡≤µ‡≤∞‡≤ø ‡≤™‡≤æ‡≤≤‡≤Ø‡≤Æ‡≤æ‡≤Ç / ‡≤µ‡≤ø‡≤≤‡≤Ç‡≤¨‡≤ï‡≤æ‡≤≤‡≤¶ ‡≤ï‡≥Ä‡≤∞‡≥ç‡≤§‡≤®‡≥Ü/...,20210615,297,‚úÖ
9,eNslNnj7uq0,‡≤∂‡≥ç‡≤∞‡≥Ä ‡≤ö‡≤æ‡≤Æ‡≥Å‡≤Ç‡≤°‡≥á‡≤∂‡≥ç‡≤µ‡≤∞‡≤ø ‡≤™‡≤æ‡≤≤‡≤Ø‡≤Æ‡≤æ‡≤Ç/Sri Paalayamam/ Vi...,20210604,272,‚úÖ


In [29]:
download_videos(threads=5, max_videos=0)

‚úÖ All videos have already been downloaded. No remaining videos.
