### Real-Time Public Sentiment Analysis on YouTube Movie Trailers Comments
### Task 1: Data Scraping from YouTube

In [None]:
from googleapiclient.discovery import build
import pandas as pd
from tqdm import tqdm
import time

API_KEY = "api_key"
youtube = build('youtube', 'v3', developerKey=API_KEY)

# Movies and TV shows to scrape
queries = [
    "Avengers Endgame Trailer",
    "Fast & Furious 7 Trailer",
    "Interstellar Trailer",
    "Oppenheimer Trailer",
    "Game of Thrones Trailer",
    "Breaking Bad Trailer",
    "Peaky Blinders Trailer",
    "Stranger Things Trailer",
    "Squid Game Trailer",
]

# videos to fetch per query
VIDEOS_PER_QUERY = 12

# comments per video
COMMENTS_PER_VIDEO = 1000


def search_videos(query, max_results=5):
    """Search YouTube and return a list of video IDs."""
    try:
        request = youtube.search().list(
            q=query,
            part="id",
            type="video",
            maxResults=max_results
        )
        response = request.execute()
        video_ids = [item['id']['videoId'] for item in response['items']]
        return video_ids
    except Exception as e:
        print(f"Error searching videos for query '{query}': {e}")
        return []

def get_comments(video_id, max_comments=500):
    """Fetch top-level comments for a given video ID."""
    comments = []
    try:
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            maxResults=1000,
            textFormat="plainText"
        )
        response = request.execute()

        while response:
            for item in response['items']:
                comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
                comments.append(comment)

            if 'nextPageToken' in response and len(comments) < max_comments:
                request = youtube.commentThreads().list(
                    part="snippet",
                    videoId=video_id,
                    maxResults=1000,
                    pageToken=response['nextPageToken'],
                    textFormat="plainText"
                )
                response = request.execute()
            else:
                break

            if len(comments) >= max_comments:
                break

    except Exception as e:
        print(f"Skipping video {video_id} due to error: {e}")

    return comments[:max_comments]

all_comments = []

for query in queries:
    print(f"\nSearching for videos with query: {query}")
    video_ids = search_videos(query, max_results=VIDEOS_PER_QUERY)

    for vid in tqdm(video_ids, desc=f"Fetching comments for query '{query}'"):
        comments = get_comments(vid, max_comments=COMMENTS_PER_VIDEO)
        for c in comments:
            all_comments.append({'video_id': vid, 'video_title': query, 'comment': c})
        time.sleep(1)

# convert to DataFrame
df = pd.DataFrame(all_comments)

# remove duplicates
df.drop_duplicates(subset='comment', inplace=True)

# csv
df.to_csv('youtube_comments.csv', index=False)
print(f"\nTotal comments collected: {len(df)}")
print("Data saved to youtube_comments.csv")


Searching for videos with query: Avengers Endgame Trailer


Fetching comments for query 'Avengers Endgame Trailer':  50%|█████     | 6/12 [00:17<00:15,  2.54s/it]

Skipping video gonAaBYuB-g due to error: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId=gonAaBYuB-g&maxResults=1000&textFormat=plainText&key=AIzaSyB8h9GYcaQ2YEbdWx2pDuYGoPHXisllhok&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.". Details: "[{'message': 'The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.', 'domain': 'youtube.commentThread', 'reason': 'commentsDisabled', 'location': 'videoId', 'locationType': 'parameter'}]">


Fetching comments for query 'Avengers Endgame Trailer': 100%|██████████| 12/12 [00:33<00:00,  2.78s/it]



Searching for videos with query: Fast & Furious 7 Trailer


Fetching comments for query 'Fast & Furious 7 Trailer': 100%|██████████| 12/12 [00:26<00:00,  2.22s/it]



Searching for videos with query: Interstellar Trailer


Fetching comments for query 'Interstellar Trailer': 100%|██████████| 12/12 [00:26<00:00,  2.21s/it]



Searching for videos with query: Oppenheimer Trailer


Fetching comments for query 'Oppenheimer Trailer': 100%|██████████| 12/12 [00:26<00:00,  2.22s/it]



Searching for videos with query: Game of Thrones Trailer


Fetching comments for query 'Game of Thrones Trailer':  17%|█▋        | 2/12 [00:05<00:27,  2.72s/it]

Skipping video bjqEWgDVPe0 due to error: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId=bjqEWgDVPe0&maxResults=1000&textFormat=plainText&key=AIzaSyB8h9GYcaQ2YEbdWx2pDuYGoPHXisllhok&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.". Details: "[{'message': 'The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.', 'domain': 'youtube.commentThread', 'reason': 'commentsDisabled', 'location': 'videoId', 'locationType': 'parameter'}]">


Fetching comments for query 'Game of Thrones Trailer':  50%|█████     | 6/12 [00:15<00:16,  2.74s/it]

Skipping video gcTkNV5Vg1E due to error: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId=gcTkNV5Vg1E&maxResults=1000&textFormat=plainText&key=AIzaSyB8h9GYcaQ2YEbdWx2pDuYGoPHXisllhok&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.". Details: "[{'message': 'The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.', 'domain': 'youtube.commentThread', 'reason': 'commentsDisabled', 'location': 'videoId', 'locationType': 'parameter'}]">


Fetching comments for query 'Game of Thrones Trailer': 100%|██████████| 12/12 [00:29<00:00,  2.49s/it]



Searching for videos with query: Breaking Bad Trailer


Fetching comments for query 'Breaking Bad Trailer': 100%|██████████| 12/12 [00:25<00:00,  2.09s/it]



Searching for videos with query: Peaky Blinders Trailer


Fetching comments for query 'Peaky Blinders Trailer':  92%|█████████▏| 23/25 [00:51<00:03,  1.97s/it]

Skipping video jImOqxQ0kkM due to error: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId=jImOqxQ0kkM&maxResults=1000&textFormat=plainText&key=AIzaSyB8h9GYcaQ2YEbdWx2pDuYGoPHXisllhok&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.". Details: "[{'message': 'The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.', 'domain': 'youtube.commentThread', 'reason': 'commentsDisabled', 'location': 'videoId', 'locationType': 'parameter'}]">


Fetching comments for query 'Peaky Blinders Trailer': 100%|██████████| 25/25 [00:54<00:00,  2.16s/it]



Searching for videos with query: Stranger Things Trailer


Fetching comments for query 'Stranger Things Trailer': 100%|██████████| 12/12 [00:39<00:00,  3.32s/it]



Searching for videos with query: Squid Game Trailer


Fetching comments for query 'Squid Game Trailer': 100%|██████████| 12/12 [00:41<00:00,  3.42s/it]



Total comments collected: 66803
Data saved to youtube_comments.csv
