In [1]:
# for data collection and extraction
from googleapiclient.discovery import build
import pandas as pd
import time
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("YOUTUBE_API_KEY")

def get_youtube_client():
    return build("youtube", "v3", developerKey=api_key)

youtube = get_youtube_client()

# for testing activation
request = youtube.channels().list(
    part="snippet, statistics",
    forUsername="GoogleDevelopers"
)
response = request.execute()

print("API Key activated")
print(response)

API Key activated
{'kind': 'youtube#channelListResponse', 'etag': 'jnXPevtMWjWJr2AiL_Vlc1opiZ0', 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5}, 'items': [{'kind': 'youtube#channel', 'etag': 'sngn-Wlf3mxG5qC-ZtnHoZB0N1c', 'id': 'UC_x5XG1OV2P6uZZ5FSM9Ttw', 'snippet': {'title': 'Google for Developers', 'description': 'Subscribe to join a community of creative developers and learn the latest in Google technology — from AI and cloud, to mobile and web.\n\nExplore more at developers.google.com\n\n', 'customUrl': '@googledevelopers', 'publishedAt': '2007-08-23T00:34:43Z', 'thumbnails': {'default': {'url': 'https://yt3.ggpht.com/2eI1TjX447QZFDe6R32K0V2mjbVMKT5mIfQR-wK5bAsxttS_7qzUDS1ojoSKeSP0NuWd6sl7qQ=s88-c-k-c0x00ffffff-no-rj', 'width': 88, 'height': 88}, 'medium': {'url': 'https://yt3.ggpht.com/2eI1TjX447QZFDe6R32K0V2mjbVMKT5mIfQR-wK5bAsxttS_7qzUDS1ojoSKeSP0NuWd6sl7qQ=s240-c-k-c0x00ffffff-no-rj', 'width': 240, 'height': 240}, 'high': {'url': 'https://yt3.ggpht.com/2eI1TjX447QZFDe6R32

In [8]:
# full stats for a yt vid
def get_vid_stats(video_id):
    stats_req = youtube.videos().list(
        part="statistics,contentDetails,snippet", id=video_id
    )
    stats_item = stats_req.execute()["items"][0]

    statistics = stats_item["statistics"]
    snippet = stats_item["snippet"]
    content_details = stats_item["contentDetails"]

    return {
        "video_id": video_id,
        "title": snippet.get("title"),
        "description": snippet.get("description"),
        "channel_id": snippet.get("channelId"),
        "channel_title": snippet.get("channelTitle"),
        "category_id": snippet.get("categoryId"),
        "views": int(statistics.get("viewCount", 0)),
        "likes": int(statistics.get("likeCount", 0)),
        "comments": int(statistics.get("commentCount", 0)),
        "duration": content_details.get("duration"),
        "published_at": snippet.get("publishedAt")
    }

In [12]:
# search vidoes by keyword with date filter 2020
def search_videos(query, max_results=100, start_year=2020):
    video_data = []
    next_page_token = None
    collected = 0
    published_after = f"{start_year}-01-01T00:00:00Z"

    while collected < max_results:
        request = youtube.search().list(
            part="snippet",
            q=query,
            type="video",
            maxResults=min(50, max_results - collected),
            pageToken=next_page_token,
            publishedAfter=published_after
        )
        response = request.execute()

        for item in response["items"]:
            video_id = item["id"]["videoId"]
            data = get_vid_stats(video_id)
            video_data.append(data)
            collected += 1
            if collected >= max_results:
                break

        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break

        time.sleep(1) # quota issues to avoid

    return pd.DataFrame(video_data)

In [13]:
# multiple keywords used simultaneously
def collect_keywords(keywords, max_results_per_keyword=100, start_year=2020):
    all_data = pd.DataFrame()

    for keyword in keywords:
        print(f"Collecting videos for keyword: {keyword}")
        df = search_videos(keyword, max_results=max_results_per_keyword, start_year=start_year)
        df["keyword"] = keyword
        all_data = pd.concat([all_data, df], ignore_index=True)

    return all_data

def csv_save(df, filename="ytraw.csv"):
    df.to_csv(filename, index=False)

In [14]:
# collecting raw data (testing/collection)

keywords = ["skyrim","rimworld","minecraft"] # input your keywords

df = collect_keywords(keywords, max_results_per_keyword=100, start_year=2020)

csv_save(df, "ytraw.csv")

df.head() # can be changed to preview data in different ways

Collecting videos for keyword: skyrim
Collecting videos for keyword: rimworld
Collecting videos for keyword: minecraft


Unnamed: 0,video_id,title,description,channel_id,channel_title,category_id,views,likes,comments,duration,published_at,keyword
0,ISSjUbAVJa0,"Okay fine, I'll try Skyrim",Skyrim.. the game everyone says is their favor...,UCO9US9tHbfyTD3eGJEbD1Ig,Bind,22,2238569,75971,7808,PT38M23S,2025-04-14T20:00:58Z,skyrim
1,ST5mY0HE_7o,SKYRIM Full Movie 2025: Oblivion | FullHDvideo...,Skyrim Full Movie 2025: Oblivion | FullHDvideo...,UCpiLRBQAIDhzuxnDi9Oar4A,FullHDvideos4me,1,2246428,12513,236,PT2H28S,2025-05-03T09:00:38Z,skyrim
2,V2OoTDk9JiI,These NPCs don’t know they’re talking to the D...,,UCaHAsSOlU3yuYxohUrMVtBA,Skooma Sippa,20,4716091,161780,3087,PT36S,2025-08-28T17:14:52Z,skyrim
3,2AbEyr8nIcA,"""THAT"" Jumpscared Me 💀 #skyrim #skyrimannivers...",#Skyrim #Anniversary #skyrimae \n\nThe Elder S...,UCnVbLunqMpiik_WEA3dSVaw,Salmagros,20,4069162,106650,3046,PT7S,2024-09-09T06:02:45Z,skyrim
4,B-oe9ypNSb0,New Skyrim Mods You Should Try Before Starting...,🔗 https://ene.ba/MasterCheesey\n🎮 https://ene....,UCjpUc0ZOzwHYkFIs3U4Z9fg,Master Cheesey,20,3562,288,33,PT11M33S,2025-09-11T16:00:18Z,skyrim
