## Setup

In [19]:
from googleapiclient.discovery import build
import json
import re
from collections import Counter
from datetime import datetime
from googleapiclient.discovery import build

In [22]:
with open ("../performative.txt", "r") as f:
    lines = f.readlines()
    API_KEY = lines[2].strip()
    
yt = build("youtube", "v3", developerKey=API_KEY)

## Initial testing

In [24]:
request = yt.search().list(
    part = "snippet",
    q = "Mr. Beast", 
    type = "channel", 
    maxResults = 5  
)
response = request.execute()

In [25]:
print(json.dumps(response, indent=2))

{
  "kind": "youtube#searchListResponse",
  "etag": "Ba_X79mfdpCmvR9rUHTSHE8enS4",
  "nextPageToken": "CAUQAA",
  "regionCode": "CA",
  "pageInfo": {
    "totalResults": 43362,
    "resultsPerPage": 5
  },
  "items": [
    {
      "kind": "youtube#searchResult",
      "etag": "SSdzI1tGcjTe4GVpESYvgfP5nVU",
      "id": {
        "kind": "youtube#channel",
        "channelId": "UCX6OQ3DkcsbYNE6H8uQQuVA"
      },
      "snippet": {
        "publishedAt": "2012-02-20T00:43:50Z",
        "channelId": "UCX6OQ3DkcsbYNE6H8uQQuVA",
        "title": "MrBeast",
        "description": "SUBSCRIBE FOR A COOKIE! New MrBeast or MrBeast Gaming video every single Saturday at noon eastern time!",
        "thumbnails": {
          "default": {
            "url": "https://yt3.ggpht.com/nxYrc_1_2f77DoBadyxMTmv7ZpRZapHR5jbuYe7PlPd5cIRJxtNNEYyOC0ZsxaDyJJzXrnJiuDE=s88-c-k-c0xffffffff-no-rj-mo"
          },
          "medium": {
            "url": "https://yt3.ggpht.com/nxYrc_1_2f77DoBadyxMTmv7ZpRZapHR5jbuYe7Pl

In [26]:
def get_channel_data(channei_id):
    response = yt.channels().list()

## Getting data from api

### Helpers

In [23]:
CATEGORY_MAP = {
    "1":  "Film & Animation",
    "2":  "Autos & Vehicles",
    "10": "Music",
    "15": "Pets & Animals",
    "17": "Sports",
    "19": "Travel & Events",
    "20": "Gaming",
    "22": "People & Blogs",
    "23": "Comedy",
    "24": "Entertainment",
    "25": "News & Politics",
    "26": "Howto & Style",
    "27": "Education",
    "28": "Science & Technology",
    "29": "Nonprofits & Activism",
}

In [27]:
### REQUIRES: ascending sorted list of datetime objects
def calculate_upload_gap(dates):
    if len(dates) < 2:
        return None
    gaps = [(dates[i+1] - dates[i]).days for i in range(len(dates) - 1)]
    avg_days_between_uploads = round(sum(gaps) / len(gaps), 2)
    return avg_days_between_uploads

In [28]:
def to_seconds(duration):
    if not duration:
        return None
    match = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", duration)
    if not match:
        return None
    return int(match.group(1) or 0) * 3600 + int(match.group(2) or 0) * 60 + int(match.group(3) or 0)

### Getting data

In [None]:
def get_profile(channel_id):
    response = yt.channels().list(
        part="snippet,contentDetails,topicDetails,brandingSettings", #TODO: never used brandingSettings
        id=channel_id
    ).execute()

    item = response["items"][0]
    snippet = item["snippet"]
    uploads_id = item["contentDetails"]["relatedPlaylists"]["uploads"]
    raw_topics = item.get("topicDetails", {}).get("topicCategories", [])
    topics = [url.split("/")[-1].replace("_", " ") for url in raw_topics]

    return snippet, uploads_id, topics

In [30]:
def get_recent_videos_and_dates(uploads_id):
    response = yt.playlistItems().list(
        part="snippet,contentDetails",
        playlistId=uploads_id,
        maxResults=10
    ).execute()

    videos=[
        {
            "video_id": item["contentDetails"]["videoId"],
            "published_at": item["snippet"]["publishedAt"],
        }
        for item in response["items"]
    ]

    upload_dates = sorted([datetime.fromisoformat(v["published_at"].replace("Z", "+00:00")) for v in videos])
    video_ids = [v["video_id"] for v in videos]
    return video_ids, upload_dates


In [31]:
def get_videos_data(video_ids):
    response = yt.videos().list(
        part="snippet,contentDetails",
        id=",".join(video_ids)
    ).execute()

    tags, categories, durations, titles = [], [], [], []

    for v in response["items"]:
        tags.extend(v["snippet"].get("tags", []))
        categories.append(CATEGORY_MAP.get(v["snippet"].get("categoryId"), "Unknown"))
        durations.append(to_seconds(v["contentDetails"].get("duration")))
        titles.append(v["snippet"].get("title"))
    
    return tags, categories, durations, titles

In [32]:
def main():
    channel_id = "UCD1P9oDMsLuZyfJ2PEciTwg" # this channel: https://www.youtube.com/@MikeBartner
    snippet, uploads_id, topics = get_profile(channel_id)
    video_ids, upload_dates = get_recent_videos_and_dates(uploads_id)
    avg_upload_gap = calculate_upload_gap(upload_dates)
    tags, categories, durations, titles = get_videos_data(video_ids)

    return {
        "channel_id": channel_id,
        "channel_name": snippet.get("title"),
        "description": snippet.get("description"),
        "country": snippet.get("country"),
        "defaultLanguage": snippet.get("defaultLanguage"),
        "created_date": snippet.get("publishedAt"),
        "topics": topics,
        "aggregated_tags": list(set(tags)),
        "most_common_video_category": Counter(categories).most_common(1)[0][0] if categories else None,
        "all_video_categories": list(set(categories)),
        "avg_duration_seconds": round(sum(durations) / len(durations), 2) if durations else None,
        "avg_days_between_uploads": avg_upload_gap,
        "recent_video_titles": titles,
    }


In [33]:
channel_data = main()
print(json.dumps(channel_data, indent=2))

{
  "channel_id": "UCD1P9oDMsLuZyfJ2PEciTwg",
  "channel_name": "Mike Bartner",
  "description": "Just your average hockey fan that loves to make videos",
  "country": null,
  "defaultLanguage": null,
  "created_date": "2012-03-03T02:44:12Z",
  "topics": [
    "Ice hockey",
    "Sport"
  ],
  "aggregated_tags": [],
  "most_common_video_category": "Entertainment",
  "all_video_categories": [
    "Entertainment"
  ],
  "avg_duration_seconds": 584.9,
  "avg_days_between_uploads": 0.78,
  "recent_video_titles": [
    "My Reaction To The WILD Olympic Quarterfinals",
    "My Olympic Hockey Quarterfinal Predictions",
    "10 Takeaways From The Olympics Thus Far",
    "Frustrating win! #Hockey #OlympicHockey #USAHockey #AmericanHockey #Olympics",
    "Canada boutta hang 15 goals on France #Hockey #OlympicHockey #Olympics #HockeyCanada #USAHockey",
    "What a delight to watch #NHL #Hockey #TeamCanada #Canada #McDavid#MacKinnonCelebrini",
    "Put some respect on Slaf\u2019s name!!! #NHL #Hocke