<a href="https://colab.research.google.com/github/parth31533/YT-Project/blob/main/APIsMerger_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
import os
import re
from datetime import datetime
import zipfile
import assemblyai as aai

# Replace with your YouTube Data API v3 key
API_KEY = "AIzaSyD3yF_r1J0DkcbKNtTBwzQlmMN_LWSWRlk"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"

def get_channel_video_links_and_dates(channel_id):
    """Fetch all video links, titles, and release dates from a YouTube channel."""
    youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=API_KEY)
    video_links = []
    video_titles = []
    video_dates = []

    # Fetch uploads playlist ID for the channel
    request = youtube.channels().list(
        part="contentDetails",
        id=channel_id
    )
    response = request.execute()

    if not response["items"]:
        print("Channel not found!")
        return [], [], []

    uploads_playlist_id = response["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]

    # Fetch videos from the uploads playlist
    next_page_token = None
    while True:
        playlist_request = youtube.playlistItems().list(
            part="snippet",
            playlistId=uploads_playlist_id,
            maxResults=50,
            pageToken=next_page_token
        )
        playlist_response = playlist_request.execute()

        for item in playlist_response["items"]:
            video_id = item["snippet"]["resourceId"]["videoId"]
            video_title = item["snippet"]["title"]
            video_date = item["snippet"]["publishedAt"]

            video_links.append(f"https://www.youtube.com/watch?v={video_id}")
            video_titles.append(video_title)
            video_dates.append(video_date)

        next_page_token = playlist_response.get("nextPageToken")
        if not next_page_token:
            break

    return video_links, video_titles, video_dates

def sanitize_filename(filename):
    """Sanitize filenames to be compatible with the operating system."""
    return re.sub(r'[\\/*?"<>|]', "_", filename)

def extract_transcripts(video_links, video_titles, video_dates):
    """Extract transcripts for all videos and save each to a separate text file."""
    output_folder = "video_transcripts"
    os.makedirs(output_folder, exist_ok=True)
    zip_filename = "youtube_transcripts.zip"

    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for link, title, date in zip(video_links, video_titles, video_dates):
            video_id = link.split("v=")[-1]
            sanitized_title = sanitize_filename(title)
            video_date = datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ")
            file_name = f"{sanitized_title}_{video_date.strftime('%Y-%m-%d')}.txt"
            file_path = os.path.join(output_folder, file_name)

            try:
                transcript = YouTubeTranscriptApi.get_transcript(video_id)
                with open(file_path, "w", encoding="utf-8") as file:
                    for line in transcript:
                        file.write(f"{line['text']}\n")
                zipf.write(file_path, os.path.basename(file_path))
                print(f"Transcript saved for: {title}")
                os.remove(file_path)

            except (TranscriptsDisabled, NoTranscriptFound) as e:
                print(f"Could not fetch transcript for {link}: {e}")
                # Log the video link or title for videos with disabled subtitles
                with open("failed_videos.txt", "a", encoding="utf-8") as f:
                    f.write(f"{title} ({link})\n")

def process_failed_videos():
    """Process failed videos by using AssemblyAI API for transcript extraction."""
    failed_videos_file = "failed_videos.txt"
    if not os.path.exists(failed_videos_file):
        print("No failed videos to process.")
        return

    with open(failed_videos_file, "r", encoding="utf-8") as f:
        failed_videos = f.readlines()

    aai.api_key = "d773b67f986746528b961cd5772004b1"  # Set your AssemblyAI API key

    for video in failed_videos:
        video_link = video.strip()
        video_id = video_link.split("v=")[-1]

        try:
            # Use AssemblyAI to transcribe the video
            print(f"Requesting transcript for {video_link} using AssemblyAI...")
            transcript_response = aai.transcribe(audio_url=video_link)

            # Wait for the transcription to complete
            while transcript_response["status"] != "completed":
                print(f"Waiting for transcript for {video_link} to complete...")
                transcript_response = aai.transcribe_status(transcript_response["id"])

            transcript_text = transcript_response["text"]
            # Save transcript to a text file
            sanitized_title = sanitize_filename(video_link)
            file_name = f"{sanitized_title}.txt"
            file_path = os.path.join("video_transcripts", file_name)
            with open(file_path, "w", encoding="utf-8") as file:
                file.write(transcript_text)

            # Add the file to the zip archive
            with zipfile.ZipFile("youtube_transcripts.zip", 'a', zipfile.ZIP_DEFLATED) as zipf:
                zipf.write(file_path, os.path.basename(file_path))
            print(f"Transcript saved for: {video_link}")

            os.remove(file_path)  # Remove the file after adding to the zip

        except Exception as e:
            print(f"Error processing {video_link} with AssemblyAI: {e}")
            # Optionally log errors if necessary
            with open("failed_assemblyai_videos.txt", "a", encoding="utf-8") as logf:
                logf.write(f"{video_link}\n")

# Example usage
channel_id = "UCsfp0zw1hNxpy_wDig8oExA"  # Replace with your YouTube channel ID
video_links, video_titles, video_dates = get_channel_video_links_and_dates(channel_id)
extract_transcripts(video_links, video_titles, video_dates)
process_failed_videos()


Could not fetch transcript for https://www.youtube.com/watch?v=AU_m12Nuk4k: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=AU_m12Nuk4k! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!
Transcript saved for: Bitcoin Vs. Quantum - Stock Market LIVE, Live Trading, Stocks To Buy NOW
Transcript saved for: MONDAY MANIA - Middle East Conflict - Stock Market LIVE, Live Trading, Stocks To Buy NOW
Could not fetch transcript for https://www.youtube.com/watch?v=u7uOUHbhqXw: 
Could not retrieve a transcript for the video https://www.youtube.co