## Getting API Keys

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

YT_KEY = os.getenv("YOUTUBE_API_KEY")

## Importing Necessary Libraries

In [2]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

## 🔑 Authenticate with YouTube Data API v3

In [3]:
YOUTUBE_API_KEY = YT_KEY
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)
analyzer = SentimentIntensityAnalyzer()

## 🔥 Get Trending Videos (from YouTube)

In [4]:
def get_trending_videos(region_code='US', max_results=50):
    request = youtube.videos().list(
        part="snippet,statistics",
        chart="mostPopular",
        regionCode=region_code,
        maxResults=max_results
    )
    response = request.execute()
    videos = [{
        'video_id': item['id'],
        'title': item['snippet']['title'],
        'channel': item['snippet']['channelTitle'],
        'description': item['snippet']['description']
    } for item in response['items']]
    return videos

## 💬 Fetch Comments

In [5]:
def get_comments(video_id, max_comments=5):
    try:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=max_comments,
            textFormat="plainText"
        )
        response = request.execute()
        if 'items' in response:
            return [item['snippet']['topLevelComment']['snippet']['textDisplay'] for item in response['items']]
        else:
            return []
    except HttpError as e:
        if e.resp.status == 403:
            print(f"🚫 Comments disabled for video: {video_id}")
        else:
            print(f"⚠️ Error for video {video_id}: {e}")
        return []

## 🧪 Put It Together - Included Sentiment Polarity Score per comment

In [None]:
%%time
desired_video_count = 15
collected = 0
data = []

all_videos = get_trending_videos(max_results=50)

for vid in all_videos:
    if collected >= desired_video_count:
        break

    video_id = vid['video_id']
    comments = get_comments(video_id, max_comments=50)

    if not comments:
        continue  # Skip video" if no usable comments
    
    video_url = f"https://www.youtube.com/watch?v={video_id}"

    for comment in comments:
        data.append({
            'video_title': vid['title'],
            'channel': vid['channel'],
            'comment': comment,

            # SENTIMENT POLARITY SCORE
            'comment_sentiment': analyzer.polarity_scores(comment)['compound'],
            'video_url': video_url
        })

    collected += 1

df = pd.DataFrame(data)
df.to_csv("../data/youtube_data.csv", index=False)
print("✅ Finished collecting data.")


✅ Finished collecting data.
CPU times: user 109 ms, sys: 13.2 ms, total: 122 ms
Wall time: 1.92 s


### This is how our `semi-structured` data looks

In [None]:
data[:5] # Data is in a dictionary / json format

[{'video_title': 'The Elder Scrolls IV: Oblivion Remastered - Full Reveal Stream',
  'channel': 'Bethesda Softworks',
  'comment': 'Came for the thumbnail, stayed for the vibes',
  'comment_sentiment': 0.0,
  'video_url': 'https://www.youtube.com/watch?v=Ed_E2crglcw'},
 {'video_title': 'The Elder Scrolls IV: Oblivion Remastered - Full Reveal Stream',
  'channel': 'Bethesda Softworks',
  'comment': 'That part at 2:45 Instant goosebumps',
  'comment_sentiment': 0.0,
  'video_url': 'https://www.youtube.com/watch?v=Ed_E2crglcw'},
 {'video_title': 'The Elder Scrolls IV: Oblivion Remastered - Full Reveal Stream',
  'channel': 'Bethesda Softworks',
  'comment': 'I guess Bethesda realized they should stop re releasing Skyrim, and picked a new champion. I wonder how many times Oblivion will be re released and re packaged over the next decade.',
  'comment_sentiment': 0.4019,
  'video_url': 'https://www.youtube.com/watch?v=Ed_E2crglcw'},
 {'video_title': 'The Elder Scrolls IV: Oblivion Remastere

### This is how it looks as `structured data`

In [7]:
df.tail(15)

Unnamed: 0,video_title,channel,comment,comment_sentiment,video_url
734,"The Last Of Us - Well, They Really Did It",The Critical Drinker,"It’s game accurate, but Joel was the only reas...",0.0,https://www.youtube.com/watch?v=7US7_rtOkJY
735,"The Last Of Us - Well, They Really Did It",The Critical Drinker,Abby became more like a villain in this than i...,-0.2665,https://www.youtube.com/watch?v=7US7_rtOkJY
736,"The Last Of Us - Well, They Really Did It",The Critical Drinker,I never played the games and I did not see Joe...,-0.0336,https://www.youtube.com/watch?v=7US7_rtOkJY
737,"The Last Of Us - Well, They Really Did It",The Critical Drinker,Last of us 1 one of best ever games with best ...,0.222,https://www.youtube.com/watch?v=7US7_rtOkJY
738,"The Last Of Us - Well, They Really Did It",The Critical Drinker,literally the ugliest cast of characters in mo...,-0.25,https://www.youtube.com/watch?v=7US7_rtOkJY
739,"The Last Of Us - Well, They Really Did It",The Critical Drinker,It doesn't make sense why Joel is out on patro...,-0.6757,https://www.youtube.com/watch?v=7US7_rtOkJY
740,"The Last Of Us - Well, They Really Did It",The Critical Drinker,yep i figured they kill him off when he agreed...,-0.4385,https://www.youtube.com/watch?v=7US7_rtOkJY
741,"The Last Of Us - Well, They Really Did It",The Critical Drinker,you REALLY think pedro pasquell is a good acto...,0.4199,https://www.youtube.com/watch?v=7US7_rtOkJY
742,"The Last Of Us - Well, They Really Did It",The Critical Drinker,Why didn’t Abby stop omg,-0.296,https://www.youtube.com/watch?v=7US7_rtOkJY
743,"The Last Of Us - Well, They Really Did It",The Critical Drinker,Ellie looks like dome kind of alien halfbreed.,0.3612,https://www.youtube.com/watch?v=7US7_rtOkJY


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 749 entries, 0 to 748
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   video_title        749 non-null    object 
 1   channel            749 non-null    object 
 2   comment            749 non-null    object 
 3   comment_sentiment  749 non-null    float64
 4   video_url          749 non-null    object 
dtypes: float64(1), object(4)
memory usage: 29.4+ KB


In [9]:
df["video_title"].value_counts()

video_title
Can We Beat Josh Allen & Jon Rahm in a Golf Match?                                        50
Carrie Underwood Praises with "How Great Thou Art!" | American Idol                       50
Watch the Destiny 2: The Edge of Fate Reveal on May 6 at 9 AM PT.                         50
Cardinal Dolan on significance of Pope Francis dying after Easter                         50
skibidi toilet 78                                                                         50
Who could replace Pope Francis? Experts explain what could happen next                    50
Weapons | Teaser                                                                          50
NHL Highlights | Oilers vs. Kings | Gm 1 | April 21, 2025                                 50
Squad is IN DANGER! 😳 #SquadVsMonsters                                                    50
Cleetus Gets a Talladega Masterclass from Dale Jr.                                        50
Original Movies in 2025: Oops, All Flops!                 

## 💾 Save to CSV

In [10]:
df.to_csv("../data/youtube_data.csv", index=False)