In [None]:
# libraries
import os
import requests
import pandas as pd
from datetime import datetime, timezone
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

In [None]:
os.environ["YOUTUBE_API_KEY"] = "YOUR KEY" # Add your key here
API_KEY = os.getenv("YOUTUBE_API_KEY")
CHANNEL_ID = "UCldfgbzNILYZA4dmDt4Cd6A" # Secular Talk
BASE = "https://www.googleapis.com/youtube/v3"

In [None]:
# 1) Get uploads playlist ID
resp = requests.get(
    f"{BASE}/channels",
    params={"part": "contentDetails", "id": CHANNEL_ID, "key": API_KEY},
).json()

uploads_playlist = resp["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]

In [None]:
# 2) Category lookup (US)
cats = requests.get(
    f"{BASE}/videoCategories",
    params={"part": "snippet", "regionCode": "US", "key": API_KEY},
).json()

category_map = {c["id"]: c["snippet"]["title"] for c in cats["items"]}

In [None]:
# 3) Collect 2015 video IDs
cutoff = datetime(2015, 1, 1, tzinfo=timezone.utc)
video_ids = []
page_token = None

while True:
    params = {
        "part": "contentDetails",
        "playlistId": uploads_playlist,
        "maxResults": 50,
        "key": API_KEY,
    }
    if page_token:
        params["pageToken"] = page_token

    data = requests.get(f"{BASE}/playlistItems", params=params).json()

    for item in data["items"]:
        pub = item["contentDetails"]["videoPublishedAt"]
        pub_dt = datetime.fromisoformat(pub.replace("Z", "+00:00"))
        if pub_dt < cutoff:
            page_token = None
            break
        video_ids.append(item["contentDetails"]["videoId"])

    page_token = data.get("nextPageToken")
    if not page_token:
        break

In [None]:
# 4) Fetch video details (batch of 50)
rows = []

for i in range(0, len(video_ids), 50):
    chunk = ",".join(video_ids[i:i+50])
    vids = requests.get(
        f"{BASE}/videos",
        params={
            "part": "snippet,statistics,contentDetails",
            "id": chunk,
            "key": API_KEY,
        },
    ).json()

    for v in vids["items"]:
        sn = v["snippet"]
        st = v.get("statistics", {})
        cd = v["contentDetails"]
        cid = sn.get("categoryId")

        rows.append({
            "videoId": v["id"],
            "title": sn["title"],
            "description": sn["description"],
            "tags": sn.get("tags"),
            "publishedAt": sn["publishedAt"],
            "duration": cd["duration"],
            "category": category_map.get(cid),
            "viewCount": int(st["viewCount"]) if "viewCount" in st else None,
            "likeCount": int(st["likeCount"]) if "likeCount" in st else None,
            "commentCount": int(st["commentCount"]) if "commentCount" in st else None,
        })

df = pd.DataFrame(rows).sort_values("publishedAt", ascending=False)
df.head()

In [None]:
# clean date column
df['publishedAt'] = pd.to_datetime(df['publishedAt'])

In [None]:
# explore data structure
df.info()

In [None]:
# generate csv file
df.to_csv("data.csv", index=False)