In [1]:
# pip install yt-dlp

In [2]:
import os
import json
from yt_dlp import YoutubeDL

In [3]:
# === CONFIG ===
CHANNEL_URL = "https://www.youtube.com/@shastrisisters3044"

OUTPUT_DIR = "downloads"

VIDEOS_DIR = os.path.join(OUTPUT_DIR, "videos")
os.makedirs(VIDEOS_DIR, exist_ok=True)

META_FILE = os.path.join(OUTPUT_DIR, "metadata.json")
DONE_FILE = os.path.join(OUTPUT_DIR, "downloaded.json")

In [4]:
def get_metadata():
  """Fetch all video metadata from channel (no download)."""

  ydl_opts = {
      'ignoreerrors': True,
      'quiet': True,
      'extract_flat': False,
  }

  with YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(CHANNEL_URL, download=False)

  entries = info.get('entries', [])

  metadata = [{
      'id': e.get('id'),
      'title': e.get('title'),
      'url': e.get('webpage_url'),
      'upload_date': e.get('upload_date'),
      'description': e.get('description'),
      'duration': e.get('duration'),
      'thumbnail': e.get('thumbnail'),
  } for e in entries if e]

  return metadata

In [5]:
def show_progress(d):
  if d["status"] == "downloading":
    percent = d.get("_percent_str", "").strip()
    speed = d.get("_speed_str", "").strip()
    eta = d.get("_eta_str", "").strip()
    total = d.get("_total_bytes_str", "") or d.get("_total_bytes_estimate_str", "")
    print(f"\r‚¨áÔ∏è  {percent} of {total} at {speed}, ETA {eta}", end="", flush=True)

  elif d["status"] == "finished":
    print("\n‚úÖ Download complete. Merging...\n")

In [6]:
def download_video(video, threads: int = 2):
  """Download one video.

  Parameters:
      video (dict): Video metadata entry.
      threads (int): Number of concurrent fragment downloads (default: 2).
  """

  ydl_opts = {
      "ignoreerrors": True,
      "quiet": True,
      "no_warnings": True,
      "progress_hooks": [show_progress],

      "format": "bestvideo+bestaudio/best",
      "merge_output_format": "mp4",
      "concurrent_fragment_downloads": threads,  # ‚Üê dynamic threads

      "outtmpl": os.path.join(VIDEOS_DIR, "%(title)s.%(ext)s"),
      "writethumbnail": True,
      "writeinfojson": False,
  }

  with YoutubeDL(ydl_opts) as ydl:
    ydl.download([video["url"]])

In [7]:
def prepare_metadata():
  """
  Generate and save channel metadata if not already present.
  Creates META_FILE and DONE_FILE if missing.
  """
  if not os.path.exists(META_FILE):
    print("üìã Generating metadata...")

    metadata = get_metadata()

    with open(META_FILE, "w", encoding="utf-8") as f:
      json.dump(metadata, f, indent=4, ensure_ascii=False)

    print(f"‚úÖ Metadata saved to {META_FILE}")

    with open(DONE_FILE, "w", encoding="utf-8") as f:
      json.dump([], f)

    print("‚ÑπÔ∏è Metadata ready. Proceed to next cell for downloading.")
  else:
    print("‚úÖ Metadata already exists. Proceed to next cell for downloading.")

In [8]:
def show_metadata_status():
  """
  Display metadata as a DataFrame showing video info and download status.
  """
  import pandas as pd
  import json
  import os

  # Load metadata
  with open(META_FILE, "r", encoding="utf-8") as f:
    metadata = json.load(f)
  df = pd.DataFrame(metadata)

  # Load downloaded IDs
  downloaded = set()
  if os.path.exists(DONE_FILE):
    with open(DONE_FILE, "r", encoding="utf-8") as f:
      downloaded = set(json.load(f))

  # Add 'downloaded' column with ‚úÖ / ‚ùå
  df["downloaded"] = df["id"].apply(lambda vid: "‚úÖ" if vid in downloaded else "‚ùå")

  # Show selected columns
  with pd.option_context("display.max_rows", None):
    display(df[["id", "title", "upload_date", "description", "downloaded"]])

In [9]:
def download_videos(threads: int = 2):
  """
  Download remaining videos.

  Parameters:
      threads (int): Number of concurrent fragment downloads (default: 2).
  """

  # Load metadata
  with open(META_FILE, "r", encoding="utf-8") as f:
    metadata = json.load(f)

  # Load already-downloaded video IDs
  with open(DONE_FILE, "r", encoding="utf-8") as f:
    downloaded = set(json.load(f))

  # Find remaining videos
  remaining = [v for v in metadata if v["id"] not in downloaded]

  print(f"üé¨ {len(remaining)} videos remaining out of {len(metadata)}")

  # Download each remaining video
  for video in remaining:
    try:
      download_video(video, threads)   # ‚Üê pass threads here
      downloaded.add(video["id"])

      # Update tracking file
      with open(DONE_FILE, "w", encoding="utf-8") as f:
        json.dump(list(downloaded), f, indent=4)

    except KeyboardInterrupt:
      print("\nüõë Interrupted by user. Progress saved.")
      break
    except Exception as e:
      print(f"\n‚ö†Ô∏è Error downloading {video['title']}: {e}")

  print("\nüèÅ All done or stopped. You can rerun safely ‚Äî it will skip completed videos.")

In [10]:
prepare_metadata()

‚úÖ Metadata already exists. Proceed to next cell for downloading.


In [11]:
show_metadata_status()

Unnamed: 0,id,title,upload_date,description,downloaded
0,E0usTcu97as,Azaadi Ka Ambruth Mahotsav,20220806,Azaadi Ka Ambruth Mahotsav episode 5,‚ùå
1,Mbm0iOrdqGU,Azaadi Ka Ambruth Mahotsav,20220804,Azaadi Ka Ambruth Mahotsav episode 3,‚ùå
2,6hcJ63Tb5ec,Azaadi Ka Ambruth Mahotsav,20220803,Azaadi Ka Ambruth Mahotsav episode 4,‚ùå
3,z_ngORJjSEU,Azaadi Ka Ambruth Mahothsav,20220802,Azaadi Ka Ambruth Mahothsav episode 2,‚ùå
4,HKfKG1-uuyY,‡≤Æ‡≤§‡≥ç‡≤∏‡≥ç‡≤Ø ‡≤®‡≤æ‡≤∞‡≤æ‡≤Ø‡≤£ ‡≤ï‡≤≤‡≥ç‡≤Ø‡≤æ‡≤£‡≥ã‡≤§‡≥ç‡≤∏‡≤µ....,20220802,,‚ùå
5,tOJnxjBsBNw,Azaadi Ka Ambruth Mahothsav...,20220801,Azaadi Ka Ambruth Mahothsav episode 1,‚ùå
6,K6DZuf9fHPU,‡≤∂‡≥ç‡≤∞‡≥Ä ‡≤ö‡≤æ‡≤Æ‡≥Å‡≤Ç‡≤°‡≥á‡≤∂‡≥ç‡≤µ‡≤∞‡≤ø ‡≤™‡≤æ‡≤≤‡≤Ø‡≤Æ‡≤æ‡≤Ç/Sri chamundeshwari p...,20220620,‡≤µ‡≤ø‡≤≥‡≤Ç‡≤¨ ‡≤ï‡≤æ‡≤≤‡≤¶ ‡≤ï‡≥Ä‡≤∞‡≥ç‡≤§‡≤®‡≥Ü/ bilahari,‚ùå
7,zrHvVy0DsKw,Mangala Nidhi program,20211230,Marriage Functions,‚ùå
8,v0RyCwPiUTs,‡≤∂‡≥ç‡≤∞‡≥Ä ‡≤ö‡≤æ‡≤Æ‡≥Å‡≤Ç‡≤°‡≥á‡≤∂‡≥ç‡≤µ‡≤∞‡≤ø ‡≤™‡≤æ‡≤≤‡≤Ø‡≤Æ‡≤æ‡≤Ç / ‡≤µ‡≤ø‡≤≤‡≤Ç‡≤¨‡≤ï‡≤æ‡≤≤‡≤¶ ‡≤ï‡≥Ä‡≤∞‡≥ç‡≤§‡≤®‡≥Ü/...,20210615,Vilambakaala Keerthana.\nRaaga: Bilaahari\ntha...,‚ùå
9,eNslNnj7uq0,‡≤∂‡≥ç‡≤∞‡≥Ä ‡≤ö‡≤æ‡≤Æ‡≥Å‡≤Ç‡≤°‡≥á‡≤∂‡≥ç‡≤µ‡≤∞‡≤ø ‡≤™‡≤æ‡≤≤‡≤Ø‡≤Æ‡≤æ‡≤Ç/Sri Paalayamam/ Vi...,20210604,Vilambakaala Keerthan. Bilaahari: Raaga.,‚ùå


In [12]:
download_videos(threads=5)

üé¨ 67 videos remaining out of 67
[download] 100% of   33.75MiB in 00:00:34 at 993.99KiB/s 
‚úÖ Download complete. Merging...

‚¨áÔ∏è  16.3% of    3.08MiB at 95.01KiB/s, ETA 00:27ETA 00:27

[download] Got error: HTTPSConnectionPool(host='rr5---sn-npoe7nz7.googlevideo.com', port=443): Read timed out.


[download] 100% of   33.75MiB                            
‚úÖ Download complete. Merging...

[download] 100% of    2.45MiB in 00:00:42 at 59.22KiB/s  
‚úÖ Download complete. Merging...

‚¨áÔ∏è  2.2% of   22.11MiB at 13.28KiB/s, ETA 27:47 ETA 27:47   
üõë Interrupted by user. Progress saved.

üèÅ All done or stopped. You can rerun safely ‚Äî it will skip completed videos.
