# Load libs

In [2]:
import os
import requests

import google_auth_oauthlib.flow
import googleapiclient.discovery
import googleapiclient.errors

import pandas as pd
import json
import datetime
import re

pd.set_option('display.max_columns', None)

# Load .env & handle

In [7]:
from dotenv import load_dotenv
load_dotenv()
API_KEY = os.getenv("YOUTUBE_API_KEY")

In [8]:
HANDLE = "@tuki.music_official"

## 1. Lấy thông tin channel + uploads playlist

In [9]:
ch_res = requests.get(
    "https://www.googleapis.com/youtube/v3/channels",
    params={
        "part": "snippet, statistics, contentDetails",
        "forHandle": HANDLE,
        "key": API_KEY
    }
).json()

if not ch_res.get("items"):
    raise SystemExit("Không tìm thấy channel với handle này.")

ch_item = ch_res["items"][0]
channel_id = ch_item["id"]
upload_pl = ch_item["contentDetails"]["relatedPlaylists"]["uploads"]

print("Channel ID:", channel_id)
print("Title:", ch_item["snippet"]["title"])
print("Subs:", ch_item["statistics"].get("subscriberCount"))

Channel ID: UCp74ruKv2sUiI2HqEhH8pRw
Title: tuki.(16)
Subs: 1100000


In [4]:
ch_res

{'kind': 'youtube#channelListResponse',
 'etag': 'gmB4e6iGPQXGYLe40Yrou1HXSa0',
 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5},
 'items': [{'kind': 'youtube#channel',
   'etag': '-Krf-eCpt5_PDvenghECPP5-vDY',
   'id': 'UCp74ruKv2sUiI2HqEhH8pRw',
   'snippet': {'title': 'tuki.(16)',
    'description': '高校二年生 16歳 弾き語り 誰かの心に響きますように。',
    'customUrl': '@tuki.music_official',
    'publishedAt': '2023-07-18T07:32:30.72368Z',
    'thumbnails': {'default': {'url': 'https://yt3.ggpht.com/56pJ8Rhb9L2wXD6sXgkOkFFSp29OfgvVK3GjkpuqSKv0e0bHi5p-p4S6hZbMhyaECRubkdfO0A=s88-c-k-c0x00ffffff-no-rj',
      'width': 88,
      'height': 88},
     'medium': {'url': 'https://yt3.ggpht.com/56pJ8Rhb9L2wXD6sXgkOkFFSp29OfgvVK3GjkpuqSKv0e0bHi5p-p4S6hZbMhyaECRubkdfO0A=s240-c-k-c0x00ffffff-no-rj',
      'width': 240,
      'height': 240},
     'high': {'url': 'https://yt3.ggpht.com/56pJ8Rhb9L2wXD6sXgkOkFFSp29OfgvVK3GjkpuqSKv0e0bHi5p-p4S6hZbMhyaECRubkdfO0A=s800-c-k-c0x00ffffff-no-rj',
      'width': 800,
     

In [10]:
df_channel_dbt = pd.json_normalize(ch_res, sep="__")
df_channel_dbt

Unnamed: 0,kind,etag,items,pageInfo__totalResults,pageInfo__resultsPerPage
0,youtube#channelListResponse,gmB4e6iGPQXGYLe40Yrou1HXSa0,"[{'kind': 'youtube#channel', 'etag': '-Krf-eCp...",1,5


In [9]:
# Trích xuất dữ liệu
channels = []
for item in ch_res.get("items", []):
    channels.append({
        "channel_id": item["id"],
        "title": item["snippet"]["title"],
        "description": item["snippet"]["description"],
        "customUrl": item["snippet"].get("customUrl"),
        "publishedAt": item["snippet"]["publishedAt"],
        "country": item["snippet"].get("country"),
        "thumbnail_high": item["snippet"]["thumbnails"]["high"]["url"],
        "uploads_playlist": item["contentDetails"]["relatedPlaylists"]["uploads"],
        "subscriberCount": int(item["statistics"].get("subscriberCount", 0)),
        "viewCount": int(item["statistics"].get("viewCount", 0)),
        "videoCount": int(item["statistics"].get("videoCount", 0)),
    })

# Đưa vào DataFrame
df_channels = pd.DataFrame(channels)
df_channels

Unnamed: 0,channel_id,title,description,customUrl,publishedAt,country,thumbnail_high,uploads_playlist,subscriberCount,viewCount,videoCount
0,UCp74ruKv2sUiI2HqEhH8pRw,tuki.(16),高校二年生 16歳 弾き語り 誰かの心に響きますように。,@tuki.music_official,2023-07-18T07:32:30.72368Z,JP,https://yt3.ggpht.com/56pJ8Rhb9L2wXD6sXgkOkFFS...,UUp74ruKv2sUiI2HqEhH8pRw,1100000,552984374,330


## 2. Lấy ra playlists

In [12]:
# Lấy ra toàn bộ playlists của channel
pl_items = []
page_token = None

while True:
    pl_res = requests.get(
        "https://www.googleapis.com/youtube/v3/playlists",
        params={
            "part": "snippet,contentDetails",
            "channelId": channel_id,
            "maxResults": 50,
            "pageToken": page_token,
            "key": API_KEY
        }
    ).json()

    pl_items.extend(pl_res.get("items", []))
    page_token = pl_res.get("nextPageToken")
    if not page_token:
        break

print("Số playlists:", len(pl_items))

Số playlists: 1


In [13]:
pl_items[0]

{'kind': 'youtube#playlist',
 'etag': 'VpEo9bH9SdxqLdWVmR9rcJahWo0',
 'id': 'PLs0vU1OF8rpoVRDNU1PAaG7Y1QevGDDMA',
 'snippet': {'publishedAt': '2023-12-28T10:55:13.295457Z',
  'channelId': 'UCp74ruKv2sUiI2HqEhH8pRw',
  'title': 'ミュージックビデオ',
  'description': '',
  'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/0-MocwNWSPU/default.jpg',
    'width': 120,
    'height': 90},
   'medium': {'url': 'https://i.ytimg.com/vi/0-MocwNWSPU/mqdefault.jpg',
    'width': 320,
    'height': 180},
   'high': {'url': 'https://i.ytimg.com/vi/0-MocwNWSPU/hqdefault.jpg',
    'width': 480,
    'height': 360},
   'standard': {'url': 'https://i.ytimg.com/vi/0-MocwNWSPU/sddefault.jpg',
    'width': 640,
    'height': 480},
   'maxres': {'url': 'https://i.ytimg.com/vi/0-MocwNWSPU/maxresdefault.jpg',
    'width': 1280,
    'height': 720}},
  'channelTitle': 'tuki.(16)',
  'localized': {'title': 'ミュージックビデオ', 'description': ''}},
 'contentDetails': {'itemCount': 18}}

In [16]:
df_playlist_dbt = pd.json_normalize(pl_res, sep="__")
df_playlist_dbt.head(1)

Unnamed: 0,kind,etag,items,pageInfo__totalResults,pageInfo__resultsPerPage
0,youtube#playlistListResponse,Bys6gZ3JLTB8e4ymGdq-4uwndaA,"[{'kind': 'youtube#playlist', 'etag': 'VpEo9bH...",1,50


In [15]:
df_channel_dbt['items'][0]

[{'kind': 'youtube#channel',
  'etag': '-Krf-eCpt5_PDvenghECPP5-vDY',
  'id': 'UCp74ruKv2sUiI2HqEhH8pRw',
  'snippet': {'title': 'tuki.(16)',
   'description': '高校二年生 16歳 弾き語り 誰かの心に響きますように。',
   'customUrl': '@tuki.music_official',
   'publishedAt': '2023-07-18T07:32:30.72368Z',
   'thumbnails': {'default': {'url': 'https://yt3.ggpht.com/56pJ8Rhb9L2wXD6sXgkOkFFSp29OfgvVK3GjkpuqSKv0e0bHi5p-p4S6hZbMhyaECRubkdfO0A=s88-c-k-c0x00ffffff-no-rj',
     'width': 88,
     'height': 88},
    'medium': {'url': 'https://yt3.ggpht.com/56pJ8Rhb9L2wXD6sXgkOkFFSp29OfgvVK3GjkpuqSKv0e0bHi5p-p4S6hZbMhyaECRubkdfO0A=s240-c-k-c0x00ffffff-no-rj',
     'width': 240,
     'height': 240},
    'high': {'url': 'https://yt3.ggpht.com/56pJ8Rhb9L2wXD6sXgkOkFFSp29OfgvVK3GjkpuqSKv0e0bHi5p-p4S6hZbMhyaECRubkdfO0A=s800-c-k-c0x00ffffff-no-rj',
     'width': 800,
     'height': 800}},
   'localized': {'title': 'tuki.(16)',
    'description': '高校二年生 16歳 弾き語り 誰かの心に響きますように。'},
   'country': 'JP'},
  'contentDetails': {'relatedP

In [19]:
# phẳng JSON -> DataFrame
df = pd.json_normalize(pl_items, sep="__")

# chuẩn hóa tên cột chính
rename_map = {
    "id": "playlist_id",
    "snippet__channelId": "channel_id",
    "snippet__channelTitle": "channel_title",
    "snippet__title": "playlist_title",
    "snippet__description": "playlist_desc",
    "snippet__publishedAt": "published_at",
    "contentDetails__itemCount": "item_count",
    "snippet__thumbnails__default__url": "thumb_default",
    "snippet__thumbnails__medium__url": "thumb_medium",
    "snippet__thumbnails__high__url": "thumb_high",
}
df = df.rename(columns=rename_map)

# ép kiểu & parse thời gian
df["published_at"] = pd.to_datetime(df["published_at"], errors="coerce")
df["item_count"]   = pd.to_numeric(df["item_count"], errors="coerce").astype("Int64")

# chọn subset cột gọn gàng
df_playlists = df[
    ["playlist_id", "playlist_title", "playlist_desc",
     "published_at", "channel_id", "channel_title",
     "item_count", "thumb_default", "thumb_medium", "thumb_high"]
]

df_playlists.head(1)

Unnamed: 0,playlist_id,playlist_title,playlist_desc,published_at,channel_id,channel_title,item_count,thumb_default,thumb_medium,thumb_high
0,PLs0vU1OF8rpoVRDNU1PAaG7Y1QevGDDMA,ミュージックビデオ,,2023-12-28 10:55:13.295457+00:00,UCp74ruKv2sUiI2HqEhH8pRw,tuki.(16),18,https://i.ytimg.com/vi/0-MocwNWSPU/default.jpg,https://i.ytimg.com/vi/0-MocwNWSPU/mqdefault.jpg,https://i.ytimg.com/vi/0-MocwNWSPU/hqdefault.jpg
