In [1]:
# =====================================================
# SETUP: Imports, API key setup, YouTube API client, 
# and function to retrieve channel statistics
# =====================================================

In [2]:
# Python version check (requires >= 3.7)
!python --version

Python 3.13.5


In [25]:
# Import YouTube API client and error handling
# !pip install google-api-python-client
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import isodate  # For parsing ISO 8601 durations
from itertools import islice
from datetime import timedelta

# Import pandas for data manipulation
import pandas as pd

In [4]:
# -----------------------------
# YouTube API credentials
# -----------------------------

In [None]:
api_key = 'AIza.......'   # Developer API key
channel_id = 'UCqECaJ8Gagnn7YCbPEzWH6g'               # Example channel ID (Taylor Swift)

# Build a YouTube API client object
youtube = build('youtube', "v3", developerKey=api_key)

In [6]:
# -----------------------------
# Function: Get channel stats
# -----------------------------

In [7]:
def get_channel_stats(youtube, channel_id):
    """
    Retrieves channel-level statistics and metadata using YouTube Data API.
    
    Args:
        youtube: Authorized YouTube API client
        channel_id (str): Unique channel ID
    
    Returns:
        dict: Channel metadata (name, subscriber count, view count, 
              video count, upload playlist ID)
    """
    # Request channel info: snippet (basic info), contentDetails (playlists),
    # and statistics (subs, views, etc.)
    request = youtube.channels().list(
        part='snippet,contentDetails,statistics',
        id=channel_id
    )
    response = request.execute()

    # Extract relevant fields from API response
    data = dict(
        Channel_name=response['items'][0]['snippet']['title'],
        Subscribers=response['items'][0]['statistics']['subscriberCount'],
        Views=response['items'][0]['statistics']['viewCount'],
        Total_videos=response['items'][0]['statistics']['videoCount'],
        playlist_id=response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    )

    return data

In [11]:
# -----------------------------
# Retrieve channel data
# -----------------------------

In [8]:
channel_statistics = get_channel_stats(youtube, channel_id)

# Convert to DataFrame for easier viewing & processing
channel_data = pd.DataFrame(channel_statistics, index=[1])
channel_data

Unnamed: 0,Channel_name,Subscribers,Views,Total_videos,playlist_id
1,Taylor Swift,61500000,40470597913,294,UUqECaJ8Gagnn7YCbPEzWH6g


In [12]:
# -----------------------------
# Data cleaning: Convert types
# -----------------------------

In [9]:
# Convert string numbers (from API) into numeric datatypes
channel_data['Subscribers'] = pd.to_numeric(channel_data['Subscribers'])
channel_data['Views'] = pd.to_numeric(channel_data['Views'])
channel_data['Total_videos'] = pd.to_numeric(channel_data['Total_videos'])

# Check column datatypes
channel_data.dtypes

Channel_name    object
Subscribers      int64
Views            int64
Total_videos     int64
playlist_id     object
dtype: object

In [10]:
# -----------------------------
# Export data to CSV
# -----------------------------
channel_data.to_csv('ts_channel_data.csv', index=False, encoding="utf-8-sig")

In [15]:
# =====================================================
# VIDEO DATA: Retrieve video IDs from channel playlist,
# fetch video details, clean data, and export to CSV
# =====================================================

In [16]:
# -----------------------------
# Function: Get all video IDs from a playlist
# -----------------------------

In [11]:
def get_video_id(youtube, playlist_id):
    """
    Retrieves all video IDs from a given YouTube playlist.
    
    Args:
        youtube: Authorized YouTube API client
        playlist_id (str): ID of the playlist (usually the channel's upload playlist)
    
    Returns:
        list: All video IDs in the playlist
    """
    # Initial API request for first page of playlist items
    request = youtube.playlistItems().list(
        part='contentDetails',
        playlistId=playlist_id
    )
    response = request.execute()

    video_ids = []
    # Collect video IDs from first page
    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])

    # Handle pagination (if there are more than 50 results)
    next_page_token = response.get('nextPageToken')
    more_pages = True

    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            # Request next page
            request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId=playlist_id,
                pageToken=next_page_token
            )
            response = request.execute()

            # Append video IDs from this page
            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])

            # Update token for next loop
            next_page_token = response.get('nextPageToken')
    
    return video_ids

In [12]:
# Retrieve all video IDs from the channel's upload playlist
video_ids = get_video_id(youtube, 'UUqECaJ8Gagnn7YCbPEzWH6g')
video_ids

['9BTFD_gX6qQ',
 'zqaLqctWP28',
 'BLFnkfAc7iY',
 '0Xh9s_3uRO8',
 'w_DD-7_zVtw',
 '58zLBjzfd4M',
 'P0haCYjysUs',
 'PQp643val70',
 '-ddfFsLHNQs',
 '0GeyBM6NS5s',
 'UAtpbCEsJlA',
 'K-8dOw7yuPo',
 'w04K8z_nfDI',
 'AFzzgQ7uCw4',
 'ZwK7hrDC5UM',
 'g_meC49cN5k',
 'yj1_GJiJovg',
 'b6zPqm0lLQs',
 'VCTze4UuC9g',
 'Sl6en1NPTYM',
 'PQTA-EVc5DI',
 'HZIg5sQrPAM',
 '0GQ_QeZxdzo',
 'KJXP13hqf2I',
 'ZwlBxvnH-So',
 'F0TUVujmtxY',
 'AtDCWnV_Vzo',
 'BpkmUfv1I4Q',
 '56TZ3B8Qxsk',
 '2hBLC7E8v3A',
 'iMMUAd66vxo',
 'vOZFiX6hDXQ',
 'uEssK8o3jKg',
 'bAi80EylyXQ',
 'iY6Qhlua8Zw',
 '_PsBoqNwYo4',
 'wRKXAAV6jh4',
 'RQMz4JDbtmI',
 'w-FkV0EM_CU',
 'oaBJlKXBvjk',
 'gOtOWeD9YJk',
 'fcVUbmdQfaE',
 'i8_w_m6HLJ0',
 'OKWfv-x2rdU',
 'OOYlWF6V8t8',
 'Mxxswu7V1Us',
 'EVbtjaWXQVg',
 'FQyEZZPbOb0',
 'SBGdvxi2JmU',
 'GZ4vaTRn0HU',
 'U2W173hRfyA',
 'O3wlMR0y4a4',
 'Atdzfj8LcuY',
 'CCUr2pNJft4',
 'HzsQHfBA3MY',
 'q3zqJs7JUCQ',
 '4wOsiM2T_xc',
 '6HIA7ouBfGY',
 '0EKbEP2L32M',
 '0hYY86DmqPY',
 'GhXsnvmUQrg',
 'b7kmP1fsGg8',
 'QBllgP

In [None]:
# -----------------------------
# Function: Get details for each video
# -----------------------------

In [37]:
# helper function to chunk a list into smaller lists of size n

def chunked(seq, n=50):
    it = iter(seq)
    while True:
        chunk = list(islice(it, n))
        if not chunk:
            break
        yield chunk

In [38]:
def get_category_mapping(youtube, region="US"):
    resp = youtube.videoCategories().list(part="snippet", regionCode=region).execute()
    return {item["id"]: item["snippet"]["title"] for item in resp.get("items", [])}

In [31]:
def parse_duration_seconds(iso_str):
    if not iso_str:
        return None
    try:
        return int(isodate.parse_duration(iso_str).total_seconds())
    except Exception:
        return None

In [39]:
def classify_video_type(snippet, content_details):
    live = snippet.get("liveBroadcastContent", "none")
    if live == "live":
        return "Live"
    if live == "upcoming":
        return "Upcoming Live"
    dur_s = parse_duration_seconds(content_details.get("duration"))
    if dur_s is not None and dur_s <= 60:
        return "Short"
    return "Regular"

In [40]:
def get_video_details(youtube, video_ids, region_for_categories="US"):
    """
    Returns a list of dicts with CategoryName, VideoType, and DurationSeconds.
    """
    category_map = get_category_mapping(youtube, region=region_for_categories)

    rows = []
    for chunk in chunked(video_ids, 50):
        resp = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=",".join(chunk)
        ).execute()

        for v in resp.get("items", []):
            snip = v.get("snippet", {})
            stats = v.get("statistics", {})
            cdet = v.get("contentDetails", {})

            cat_id = snip.get("categoryId")
            dur_s = parse_duration_seconds(cdet.get("duration"))

            rows.append({
                "VideoId": v.get("id"),
                "Title": snip.get("title"),
                "VideoType": classify_video_type(snip, cdet),
                "LiveStatus": snip.get("liveBroadcastContent", "none"),
                "CategoryId": cat_id,
                "CategoryName": category_map.get(cat_id, "Unknown"),
                "Published_date": snip.get("publishedAt"),
                "DurationSeconds": dur_s,
                "Views": stats.get("viewCount"),
                "Likes": stats.get("likeCount", 0),
                "Comments": stats.get("commentCount", 0),
            })
    return rows

In [38]:
# -----------------------------
# Frame data into DataFrame
# -----------------------------

In [41]:
video_details = get_video_details(youtube, video_ids, region_for_categories="US")
video_data = pd.DataFrame(video_details)

In [40]:
# -----------------------------
# Data cleaning & type conversion
# -----------------------------

In [42]:
video_data["Published_date"] = pd.to_datetime(video_data["Published_date"], errors="coerce")
for col in ["Views", "Likes", "Comments", "DurationSeconds"]:
    video_data[col] = pd.to_numeric(video_data[col], errors="coerce")

In [43]:
video_data = video_data[
    ["VideoId","Title","VideoType","LiveStatus","CategoryId","CategoryName",
     "Published_date","DurationSeconds","Views","Likes","Comments"]
]

video_data.head()

Unnamed: 0,VideoId,Title,VideoType,LiveStatus,CategoryId,CategoryName,Published_date,DurationSeconds,Views,Likes,Comments
0,9BTFD_gX6qQ,She’s got 4️⃣ days left to rehearse for her bi...,Short,none,10,Music,2025-09-29 13:14:31+00:00,31,833643,80122,1589
1,zqaLqctWP28,The Life of a Showgirl: The Tiny Bubbles in Ch...,Short,none,10,Music,2025-08-25 20:02:56+00:00,16,580151,47982,2314
2,BLFnkfAc7iY,"The Life of a Showgirl: Baby, That’s Show Busi...",Short,none,10,Music,2025-08-21 18:04:53+00:00,16,507088,42579,1173
3,0Xh9s_3uRO8,The Life of a Showgirl: The Shiny Bug Vinyl Co...,Short,none,10,Music,2025-08-18 18:13:43+00:00,16,615830,48458,1368
4,w_DD-7_zVtw,"And, baby, that’s show business for you. New a...",Short,none,10,Music,2025-08-14 05:00:58+00:00,8,1588760,110471,3054


In [42]:
# -----------------------------
# Export full video dataset
# -----------------------------

In [44]:
video_data.to_csv('ts_video_data.csv', index=False, encoding='utf-8-sig')