## Getting API Keys

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

YT_KEY = os.getenv("YOUTUBE_API_KEY")

## Importing Necessary Libraries

In [2]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from youtube_transcript_api import YouTubeTranscriptApi
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

## 🔑 Authenticate with YouTube Data API v3

In [3]:
YOUTUBE_API_KEY = YT_KEY
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
analyzer = SentimentIntensityAnalyzer()

## 🔥 Get Trending Videos (from YouTube)

In [4]:
def get_trending_videos(region_code='US', max_results=50):
    request = youtube.videos().list(
        part="snippet,statistics",
        chart="mostPopular",
        regionCode=region_code,
        maxResults=max_results
    )
    response = request.execute()
    videos = [{
        'video_id': item['id'],
        'title': item['snippet']['title'],
        'channel': item['snippet']['channelTitle'],
        'description': item['snippet']['description']
    } for item in response['items']]
    return videos

## 🧠 Extract Transcripts

In [5]:
def get_transcript(video_id):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return ' '.join([entry['text'] for entry in transcript])
    except:
        return ""

## 💬 Fetch Comments

In [6]:
def get_comments(video_id, max_comments=5):
    try:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=max_comments,
            textFormat="plainText"
        )
        response = request.execute()
        if 'items' in response:
            return [item['snippet']['topLevelComment']['snippet']['textDisplay'] for item in response['items']]
        else:
            return []
    except HttpError as e:
        if e.resp.status == 403:
            print(f"🚫 Comments disabled for video: {video_id}")
        else:
            print(f"⚠️ Error for video {video_id}: {e}")
        return []

## 🧪 Put It Together

In [7]:
%%time
desired_video_count = 15
collected = 0
data = []

all_videos = get_trending_videos(max_results=50)

for vid in all_videos:
    if collected >= desired_video_count:
        break

    video_id = vid['video_id']
    comments = get_comments(video_id, max_comments=50)

    if not comments:
        continue  # Skip video" if no usable comments

    transcript = get_transcript(video_id)
    transcript_sentiment = analyzer.polarity_scores(transcript)['compound']

    video_url = f"https://www.youtube.com/watch?v={video_id}"

    for comment in comments:
        data.append({
            'video_title': vid['title'],
            'channel': vid['channel'],
            'comment': comment,
            'comment_sentiment': analyzer.polarity_scores(comment)['compound'],
            'transcript_sentiment': transcript_sentiment,
            'video_url': video_url  # ✅ added here
        })

    collected += 1

df = pd.DataFrame(data)
df.to_csv("../data/youtube_data.csv", index=False)
print("✅ Finished collecting data.")


✅ Finished collecting data.
CPU times: user 342 ms, sys: 43.1 ms, total: 385 ms
Wall time: 11.5 s


In [8]:
df.tail(15)

Unnamed: 0,video_title,channel,comment,comment_sentiment,transcript_sentiment,video_url
735,Update On Willow. She’s Been Struggling!,Kristi Cates,Stay strong willow. ❤ to you,0.8176,0.0,https://www.youtube.com/watch?v=yubV23utfEA
736,Update On Willow. She’s Been Struggling!,Kristi Cates,Because I don't have to be able to get it to t...,0.0,0.0,https://www.youtube.com/watch?v=yubV23utfEA
737,Update On Willow. She’s Been Struggling!,Kristi Cates,Sweet willow prayers for you and woman.,0.4588,0.0,https://www.youtube.com/watch?v=yubV23utfEA
738,Update On Willow. She’s Been Struggling!,Kristi Cates,I am praying for Willow. Please give her a hug...,0.9712,0.0,https://www.youtube.com/watch?v=yubV23utfEA
739,Update On Willow. She’s Been Struggling!,Kristi Cates,🙏🏾🙏🏾🙏🏾🙏🏾 Lord please help Willow. I am going t...,0.8957,0.0,https://www.youtube.com/watch?v=yubV23utfEA
740,Update On Willow. She’s Been Struggling!,Kristi Cates,poor Willow! Hang in there Kristi. We know h...,0.674,0.0,https://www.youtube.com/watch?v=yubV23utfEA
741,Update On Willow. She’s Been Struggling!,Kristi Cates,WILLOW b Google be lying,-0.5267,0.0,https://www.youtube.com/watch?v=yubV23utfEA
742,Update On Willow. She’s Been Struggling!,Kristi Cates,OMG I'm crying. I love Willow.,0.2732,0.0,https://www.youtube.com/watch?v=yubV23utfEA
743,Update On Willow. She’s Been Struggling!,Kristi Cates,You got this Queen Willow. I'm praying for you.❤,0.7717,0.0,https://www.youtube.com/watch?v=yubV23utfEA
744,Update On Willow. She’s Been Struggling!,Kristi Cates,Oh please god no! I can’t lose sweet Willow!!!...,0.9094,0.0,https://www.youtube.com/watch?v=yubV23utfEA


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 750 entries, 0 to 749
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   video_title           750 non-null    object 
 1   channel               750 non-null    object 
 2   comment               750 non-null    object 
 3   comment_sentiment     750 non-null    float64
 4   transcript_sentiment  750 non-null    float64
 5   video_url             750 non-null    object 
dtypes: float64(2), object(4)
memory usage: 35.3+ KB


In [10]:
df["video_title"].value_counts()

video_title
FULL SEGMENT: Randy Orton ROCKS John Cena with the RKO: Raw highlights, April 21, 2025    50
Can We Beat Josh Allen & Jon Rahm in a Golf Match?                                        50
Carrie Underwood Praises with "How Great Thou Art!" | American Idol                       50
skibidi toilet 78                                                                         50
Cardinal Dolan on significance of Pope Francis dying after Easter                         50
Who could replace Pope Francis? Experts explain what could happen next                    50
Weapons | Teaser                                                                          50
Cleetus Gets a Talladega Masterclass from Dale Jr.                                        50
Original Movies in 2025: Oops, All Flops!                                                 50
NHL Highlights | Oilers vs. Kings | Gm 1 | April 21, 2025                                 50
Possible candidates to become Pope have already been discu

## 💾 Save to CSV

In [33]:
df.to_csv("../data/youtube_data.csv", index=False)