In [2]:
import os
import subprocess
import pandas as pd
from pytube import YouTube

def download_video(url, output_path):
    yt = YouTube(url)
    video_stream = yt.streams.filter(file_extension="mp4", progressive=True).first()
    video_stream.download(output_path)
    return os.path.join(output_path, video_stream.default_filename)

def extract_frames(video_path, output_path, frame_interval=10):
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    output_pattern = os.path.join(output_path, "frame_%04d.jpg")

    cmd = [
        "ffmpeg",
        "-i", video_path,
        "-vf", f"fps=1/{frame_interval}",
        output_pattern
    ]

    subprocess.run(cmd)

if __name__ == "__main__":
    # Load your dataset with YouTube URLs
    dataset_path = r"T:\sem_3\Neural Networks and Deep Learning\project\NNDL\good_news.csv"
    df = pd.read_csv(dataset_path)

    # Specify the base output directory for frames
    base_output_directory = r"T:\sem_3\Neural Networks and Deep Learning\project\NNDL\good_news_data_img\good_news_data_images"


    # Iterate through each row in the dataset
    for index, row in df.iterrows():
        youtube_url = row["video_url"]

        try:
            video_path = download_video(youtube_url, base_output_directory)

            # Create a unique output folder for each video
            video_id = youtube_url.split("v=")[1]
            output_directory = os.path.join(base_output_directory, f"video_{video_id}")
            
            extract_frames(video_path, output_directory)

            print(f"Processing video {index + 1}/{len(df)}: {youtube_url}")
        except Exception as e:
            print(f"Error processing video {index + 1}/{len(df)}: {youtube_url}")
            print(f"Error message: {str(e)}")


Processing video 1/265: https://www.youtube.com/watch?v=9qpMuFrTEKo
Processing video 2/265: https://www.youtube.com/watch?v=F5pgG1M_h_U
Error processing video 3/265: https://www.youtube.com/watch?v=NDjNX3nEfYo
Error message: NDjNX3nEfYo is age restricted, and can't be accessed without logging in.
Processing video 4/265: https://www.youtube.com/watch?v=5pvaLA4X9GE
Processing video 5/265: https://www.youtube.com/watch?v=vq0L_4y-hVs
Processing video 6/265: https://www.youtube.com/watch?v=oilZ1hNZPRM
Error processing video 7/265: https://www.youtube.com/watch?v=VQLi2GYVULc
Error message: VQLi2GYVULc is age restricted, and can't be accessed without logging in.
Processing video 8/265: https://www.youtube.com/watch?v=Eg08rJGKjtA
Processing video 9/265: https://www.youtube.com/watch?v=JNvpcGV1frQ
Processing video 10/265: https://www.youtube.com/watch?v=cqvVL8IurMw
Error processing video 11/265: https://www.youtube.com/watch?v=o1zIgTwENPg
Error message: o1zIgTwENPg is age restricted, and can't 