In [None]:
import configparser

config = configparser.ConfigParser()
config.read('config.ini')

google_api_key = config['google']['api']


In [None]:
import os
import json
import re
from youtube_transcript_api import YouTubeTranscriptApi
from googleapiclient.discovery import build

# YouTube API setup
youtube = build("youtube", "v3", developerKey=google_api_key)

def sanitize_filename(name):
    """Remove any unsafe characters for filenames."""
    name = re.sub(r'[\\/:"*?<>|]+', "", name)
    return name.strip().replace(" ", "_")

def get_playlist_name(playlist_id):
    """Retrieve the playlist title."""
    try:
        playlist_response = youtube.playlists().list(
            part="snippet",
            id=playlist_id
        ).execute()
        items = playlist_response.get("items", [])
        if items:
            return items[0]["snippet"]["title"]
    except Exception as e:
        print(f"Error retrieving playlist name: {e}")
    return "captions"

def get_video_captions(video_id):
    """Retrieve video captions along with their timestamps."""
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        captions = [{"start": segment["start"], "text": segment["text"]} for segment in transcript]
        return captions
    except Exception as e:
        print(f"Error retrieving captions for {video_id}: {e}")
        return []

def get_video_details(video_id):
    """Retrieve video title, URL, and published time."""
    try:
        video_response = youtube.videos().list(
            part="snippet",
            id=video_id
        ).execute()
        items = video_response.get("items", [])
        if items:
            snippet = items[0]["snippet"]
            return {
                "title": snippet["title"],
                "url": f"https://www.youtube.com/watch?v={video_id}",
                "published_time": snippet["publishedAt"]
            }
    except Exception as e:
        print(f"Error retrieving details for video {video_id}: {e}")
    return {"title": "Unknown", "url": "", "published_time": "N/A"}

def get_playlist_videos(playlist_id):
    """Retrieve videos from a playlist along with their details and captions."""
    videos = []
    next_page_token = None

    while True:
        playlist_items = youtube.playlistItems().list(
            part="snippet",
            playlistId=playlist_id,
            maxResults=50,
            pageToken=next_page_token
        ).execute()

        for item in playlist_items["items"]:
            video_id = item["snippet"]["resourceId"]["videoId"]
            video_details = get_video_details(video_id)
            captions = get_video_captions(video_id)

            videos.append({
                "title": video_details["title"],
                "url": video_details["url"],
                "published_time": video_details["published_time"],
                "captions": captions
            })

        next_page_token = playlist_items.get("nextPageToken")
        if not next_page_token:
            break

    return videos

def save_videos_to_json(videos, output_filename):
    """Save retrieved video data to a JSON file."""
    with open(output_filename, "w", encoding="utf-8") as f:
        json.dump(videos, f, ensure_ascii=False, indent=4)
    print(f"✅ Saved {len(videos)} videos to {output_filename}")

In [None]:
play_list_list=[
'PLHutrxqbP1ByyYAbjx61pU61T2fDPes0N',
'PLHutrxqbP1Bzsez_rFcrwXGq8YRvKjTav',
'PLHutrxqbP1Bzx4wbSCGhCh572Ylmcw8__',
'PLHutrxqbP1Bwt_eNlpY-CcA9PIniD7fNv',
'PLHutrxqbP1Bw6AY6y5TNEYI4-n2MmYHDE',
'PLHutrxqbP1BzSwuckHTU0by4BVlsIijCx',
'PLHutrxqbP1Bz3U6K21sclYPboutLb8srE',
'PLHutrxqbP1Bx-zZxtex-EgwMFsjjjwYGO',
'PLHutrxqbP1ByGhrCv0tZADlIQxwHEbhdL',
'PLHutrxqbP1BzsyictQ1vxdo4C2bTMt--o',
'PLHutrxqbP1Bwjac98fuVeqLfnzDVYqW5g',
'PLHutrxqbP1ByxEciTO-k67JE4zYuxaf3Y',
'PLHutrxqbP1BzIhQcPNony7TtaOOD8NUGC',
'PLHutrxqbP1BxcI_ujQx-KvZnH9uFsaAz-',
'PLHutrxqbP1Bx1WX20-nTVwLV7yPwBnfhz',
'PLHutrxqbP1ByHc4Vyck20Jg3TSRGFEtVg',
'PLHutrxqbP1BzyutPkuzTXPwAroHS9j8Vd',
'PLHutrxqbP1ByXVJyAJe5zrnNkZA11xJI0',
'PLHutrxqbP1Byiv4uuGLS9toMVDHzYg3Zi',
'PLHutrxqbP1Bwc7Q0yTWnAlod_tbHFbUBJ',
'PLHutrxqbP1BxK4KzYL8tJmMhV2yhGZNlY',
'PLHutrxqbP1BxlkXC7-88PQUjJn65Ejbav',
'PLHutrxqbP1BzetP6z9Ty_F4wYvcPVs7TI',
'PLHutrxqbP1BzzTi8odV40RhLZQjK8Iy6_'
    
]

for playlist_id in play_list_list:

    # Get the playlist name to name the file accordingly
    playlist_title = get_playlist_name(playlist_id)
    sanitized_title = sanitize_filename(playlist_title)
    output_filename = f"{sanitized_title}.json"
    
    videos = get_playlist_videos(playlist_id)
    save_videos_to_json(videos, output_filename)