In [1]:
import googleapiclient.discovery
from pytubefix import YouTube, Search
from pytubefix.cli import on_progress
from pytubefix.contrib.search import Filter
import pandas as pd
import os
import emoji

# Set up YouTube API
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = 'API_KEY'
youtube = googleapiclient.discovery.build(api_service_name, api_version, developerKey=DEVELOPER_KEY)


def build_search_filters():
    """Build search filters for YouTube shorts."""
    return {
        'type': Filter.get_type("Shorts"),
        'sort_by': Filter.get_sort_by("Relevance")
    }


def search_videos_by_keyword(keyword, max_results):
    """Search YouTube videos based on a keyword and return video details as a DataFrame."""
    filters = build_search_filters()
    results = Search(keyword, filters=filters)

    video_details = [
        {
            "video_id": video.video_id,
            "title": video.title,
            "watch_url": video.watch_url,
            "views": video.views,
            "likes": video.likes,
            "keywords": video.keywords
        }
        for video in results.shorts[:max_results]
    ]

    return pd.DataFrame(video_details)


def download_video(video_url, download_path="C:\\downloads"):
    """Download a YouTube video by its URL."""
    yt = YouTube(video_url, on_progress_callback=on_progress)
    ys = yt.streams.get_highest_resolution()
    file_path = ys.download(output_path=download_path)
    print(f"Downloaded: {yt.title}")
    return file_path


def download_videos(video_details, download_path="C:\\downloads"):
    """Download a list of YouTube videos based on the DataFrame of video details."""
    for video in video_details['watch_url']:
        download_video(video, download_path=download_path)


def get_comments(video_id, max_comments=200):
    """Fetch comments for a YouTube video by its ID and return as a DataFrame."""
    comments = []
    request = youtube.commentThreads().list(part="snippet", videoId=video_id, maxResults=100)

    while request and len(comments) < max_comments:
        response = request.execute()
        for item in response['items']:
            if len(comments) >= max_comments:
                break
            comment = item['snippet']['topLevelComment']['snippet']
            comments.append([comment['likeCount'], comment['textOriginal'], comment['videoId']])

        nextPageToken = response.get('nextPageToken')
        if not nextPageToken:
            break
        request = youtube.commentThreads().list(part="snippet", videoId=video_id, maxResults=100, pageToken=nextPageToken)

    return pd.DataFrame(comments, columns=['like_count', 'text', 'video_id'])


def fetch_comments_for_videos(video_ids, max_comments=200):
    """Fetch and combine comments for multiple videos."""
    all_comments = pd.DataFrame()
    for video_id in video_ids:
        comments_df = get_comments(video_id, max_comments)
        all_comments = pd.concat([all_comments, comments_df], ignore_index=True)
    return all_comments


def convert_emojis_to_text(df, column='text'):
    """Convert emojis in a specified DataFrame column to text representation."""
    df[column] = df[column].apply(lambda x: emoji.demojize(x) if isinstance(x, str) else x)
    return df


def save_to_csv(df, file_path):
    """Save a DataFrame to a CSV file."""
    df.to_csv(file_path, index=False, encoding='utf-8')
    print(f"Exported data to {file_path}")


def main(keyword, download_path="C:\\downloads", csv_file_path="C:\\downloads\\youtube_video_comments.csv"):
    """Main function to search videos, download, fetch comments, and export results."""
    max_results=15
    video_details_df = search_videos_by_keyword(keyword,max_results)
    download_videos(video_details_df, download_path)

    # Fetch comments for each video
    all_comments = fetch_comments_for_videos(video_details_df['video_id'])

    # Merge video details with comments
    final_df = video_details_df.merge(all_comments, on='video_id', how='left')

    # Convert emojis in comments to text
    final_df = convert_emojis_to_text(final_df)

    # Export final DataFrame to CSV
    save_to_csv(final_df, csv_file_path)


# Run the main function
if __name__ == "__main__":
    keyword = 'sad videos'
    main(keyword)


Downloaded: whatsapp status / relationship sad lines / short video
Downloaded: HEART TOUCHING 😭😢 Whatsapp status#shorts billionaire attitude status 🔥#motivational #quotes
Downloaded: Thats Some Real Sad Shit😢 #shorts #sadgym #tiktokchallenge #sadtiktoks #sadedits #depressed #cry
Downloaded: Who hurt this man 💔 | Heart Broken | Sad Quotes | Whatsapp Sad Status
Downloaded: Boys Pain🥺💔💯 #motivation #shortsvideo #viral #viralshorts #youtubeshorts #viralvideo  #4kresolution
Downloaded: Pain Of Every Student 😭😭💔😭😭 Status | Boy Life Sad Status Video | Whatsapp Status Video
Downloaded: Times When The Boys Cried😢part 2
Downloaded: The Life of Death 🥺 || Emotional & Sad Status 😢 || #shorts
Downloaded: Stray - Its just a game #shorts #stray #sad #painhub
Downloaded: ❗SAD❗ Moments that make you cry 🥹 #shorts #shortsfeed
Downloaded: Meri duniya ko💔 • sad ignore status • sad girl shayari
Downloaded: Deep quotes | Deep | Quotes | Sad | Tiktok #shorts #facts #deep
Downloaded: Love Story ❤️| An Emotion