In [40]:
from googleapiclient.discovery import build
import datetime
import time
API_KEY = 'AIzaSyBBjHEU2XTcP1hn9GVEHt_f4I9jMSIFaPs'
youtube = build('youtube', 'v3', developerKey=API_KEY)


In [41]:
def get_video_data(video_id):
    request = youtube.videos().list(
        part='statistics, snippet',
        id=video_id
    )
    response = request.execute()

    if 'items' not in response or len(response['items']) == 0:
        return None  # Return None if no data

    item = response['items'][0]
    statistics = item['statistics']
    snippet = item['snippet']

    # Extracting required data
    title = snippet['title']
    views = int(statistics.get('viewCount', 0))
    likes = int(statistics.get('likeCount', 0))
    comments = int(statistics.get('commentCount', 0))
    publish_date = snippet['publishedAt']
    publish_days = (datetime.datetime.now() - datetime.datetime.fromisoformat(publish_date[:-1])).days

    return {
        'title': title,
        'views': views,
        'likes': likes,
        'comments': comments,
        'publish_days': publish_days,
    }

In [42]:
def get_video_ids_from_channel(channel_id, max_results=500):
    video_ids = []
    next_page_token = None

    while len(video_ids) < max_results:
        # Fetching at most 50 results at a time
        request = youtube.search().list(
            part='id',
            channelId=channel_id,
            maxResults=50,
            pageToken=next_page_token
        )
        response = request.execute()

        # Extract video IDs
        for item in response['items']:
            if item['id']['kind'] == 'youtube#video':
                video_ids.append(item['id']['videoId'])

        # Check if there's another page of results
        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break

    return video_ids[:max_results]

In [43]:
def batch_video_data(video_ids):
    data = []
    for i in range(0, len(video_ids), 50):
        batch_ids = video_ids[i:i + 50]
        request = youtube.videos().list(
            part='statistics, snippet',
            id=','.join(batch_ids)
        )
        response = request.execute()

        for item in response['items']:
            snippet = item['snippet']
            statistics = item['statistics']
            publish_date = snippet['publishedAt']
            publish_days = (datetime.datetime.now() - datetime.datetime.fromisoformat(publish_date[:-1])).days
            views = int(statistics.get('viewCount', 0))

            # Filter out videos with 0 views
            if views > 0:
                data.append({
                    'video_id': item['id'],  # 添加 video_id
                    'title': snippet['title'],
                    'views': views,
                    'likes': int(statistics.get('likeCount', 0)),
                    'comments': int(statistics.get('commentCount', 0)),
                    'publish_days': publish_days
                })

        # 在每次处理一批视频后延时
        time.sleep(2.0)  # 延时 2 秒

    return data


In [48]:
def get_channel_video_data(channel_id, max_results=5):
    video_ids = get_video_ids_from_channel(channel_id, max_results)
    video_data = batch_video_data(video_ids)
    return video_data

In [45]:
def get_channel_subscriber_count(channel_id):
    request = youtube.channels().list(
        part='statistics',
        id=channel_id
    )
    response = request.execute()

    if 'items' not in response or len(response['items']) == 0:
        return None

    return int(response['items'][0]['statistics'].get('subscriberCount', 0))

In [46]:
from googleapiclient.errors import HttpError
def get_channel_id_by_handle(handle):
    try:
        request = youtube.channels().list(
            part='snippet',
            forHandle=handle
        )
        response = request.execute()

        if 'items' in response and len(response['items']) > 0:
            return response['items'][0]['id']
        print("無法找到頻道，請檢查 Handle 是否正確。")
        return None
    except HttpError as e:
        print(f"API 錯誤：{e}")
        return None


In [47]:
from tqdm import tqdm
import os
import pandas as pd


def batch_main(handles, csv_file='youtube_video_data.csv'):
    all_video_data = []

    # 使用 tqdm 進度條
    for handle in tqdm(handles, desc="處理頻道進度", unit="頻道"):
        print(f"正在處理頻道: {handle}")
        # 獲取頻道 ID
        channel_id = get_channel_id_by_handle(handle)
        if not channel_id:
            print(f"無法找到頻道: {handle}")
            continue

        # 獲取頻道視頻數據
        video_data = get_channel_video_data(channel_id)
        if not video_data:
            print(f"未能獲取 {handle} 的任何視頻數據。")
            continue

        # 獲取訂閱數
        subscribers = get_channel_subscriber_count(channel_id)
        for item in video_data:
            item['subscribers'] = subscribers
        # 將數據追加到總數據中
        all_video_data.extend(video_data)

    # 使用 tqdm 進度條寫入數據
    print("正在保存數據到 CSV 文件...")
    with tqdm(total=len(all_video_data), desc="保存數據進度", unit="影片") as pbar:
        if not os.path.exists(csv_file):
            # 如果文件不存在，創建並寫入表頭
            df = pd.DataFrame(all_video_data)
            df.to_csv(csv_file, index=False, mode='w', header=True)
        else:
            # 如果文件存在，追加數據（避免重複）
            existing_df = pd.read_csv(csv_file)
            new_df = pd.DataFrame(all_video_data)
            combined_df = pd.concat([existing_df, new_df]).drop_duplicates(subset='video_id')
            combined_df.to_csv(csv_file, index=False, mode='w', header=True)

        pbar.update(len(all_video_data))

    print("所有數據已成功保存到 CSV 文件：")
    print(pd.read_csv(csv_file))

In [49]:
handles = ['@woshiquanshu']
batch_main(handles)

處理頻道進度:   0%|          | 0/1 [00:00<?, ?頻道/s]

正在處理頻道: @woshiquanshu


處理頻道進度: 100%|██████████| 1/1 [00:02<00:00,  2.91s/頻道]


正在保存數據到 CSV 文件...


保存數據進度: 100%|██████████| 5/5 [00:00<00:00, 1016.31影片/s]

所有數據已成功保存到 CSV 文件：
      video_id                            title   views  likes  comments  \
0  FgLkUspIsL0     全職獵人405話完整解說：西索本尊現身，旅團黑幫皆有行動  256441   2960       504   
1  Fy_bHNbWdRU  拳叔講哆啦A夢：哆啦A夢把房子變身無敵列車頭！開著房子去兜風！  154404   1217        31   
2  61fYLew16O8    臥底名單洩露，庫拉索的選擇，柯南劇場版20 純黑的噩夢解說   77427    653        69   
3  SHyMV8kt3F0          獵人解說2:少數服從多數的陷阱，西索的放養計劃  239759   1263       150   
4  Htyr-4aOqe0      一反常态的基德，解救业火中的向日葵，柯南剧场版19解说   71886    557        64   

   publish_days  subscribers  
0            35       261000  
1          1849       261000  
2           319       261000  
3          1053       261000  
4           325       261000  





In [None]:
'''import json
import time 
import os
from googleapiclient.discovery import build

API_KEY = 'AIzaSyAZioI1gbrRVHsB8Dylb96npimTouclBB8'
youtube = build('youtube', 'v3', developerKey=API_KEY)

# 進度儲存檔案
PROGRESS_FILE = 'search_progress.json'

def save_progress(handles, next_page_token):
    """儲存搜尋進度到檔案"""
    progress_data = {
        "handles": sorted(list(handles)),  # 確保順序且無重複
        "nextPageToken": next_page_token
    }
    with open(PROGRESS_FILE, 'w', encoding='utf-8') as f:
        json.dump(progress_data, f, ensure_ascii=False, indent=4)

def load_progress():
    """載入搜尋進度"""
    if os.path.exists(PROGRESS_FILE):
        with open(PROGRESS_FILE, 'r', encoding='utf-8') as f:
            progress_data = json.load(f)
            return set(progress_data["handles"]), progress_data["nextPageToken"]
    return set(), None

def get_handles_from_keyword(keyword, max_results=50):
    handles, next_page_token = load_progress()

    while len(handles) < max_results:
        # 搜尋頻道
        search_request = youtube.search().list(
            part='snippet',
            q=keyword,
            type='channel',
            maxResults=50,
            pageToken=next_page_token
        )
        search_response = search_request.execute()

        for item in search_response['items']:
            channel_id = item['snippet']['channelId']

            # 在每次调用频道 API 前加入延时
            time.sleep(3.0)  # 延时 1.5 秒

            # 獲取頻道資訊以提取 handle
            channel_request = youtube.channels().list(
                part='snippet',
                id=channel_id
            )
            channel_response = channel_request.execute()

            for channel in channel_response['items']:
                custom_url = channel['snippet'].get('customUrl', None)
                title = channel['snippet']['title']
                handle = f"@{custom_url}" if custom_url else f"@{title.replace(' ', '').replace('-', '')}"
                handles.add(handle)

        # 儲存進度
        next_page_token = search_response.get('nextPageToken')
        save_progress(handles, next_page_token)

        if not next_page_token:
            break

        # 在每次分页请求之间延时
        time.sleep(3.0)  

    return sorted(list(handles))[:max_results]


# 搜尋台灣相關的頻道
keyword = "台灣"
handles = get_handles_from_keyword(keyword, max_results=10)
print("找到的 Handles:")
batch_main(handles)
'''

找到的 Handles:


處理頻道進度:   0%|          | 0/10 [00:00<?, ?頻道/s]

正在處理頻道: @-df219
请求受限，正在增加延时...


處理頻道進度:  10%|█         | 1/10 [00:10<01:33, 10.35s/頻道]

無法找到頻道: @-df219
正在處理頻道: @-duoduolovestaiwan5616
请求受限，正在增加延时...


處理頻道進度:  20%|██        | 2/10 [00:20<01:21, 10.17s/頻道]

無法找到頻道: @-duoduolovestaiwan5616
正在處理頻道: @-miuraboilertaiwan3201
请求受限，正在增加延时...


處理頻道進度:  30%|███       | 3/10 [00:30<01:10, 10.11s/頻道]

無法找到頻道: @-miuraboilertaiwan3201
正在處理頻道: @-mosachianchian
请求受限，正在增加延时...


處理頻道進度:  40%|████      | 4/10 [00:40<01:00, 10.08s/頻道]

無法找到頻道: @-mosachianchian
正在處理頻道: @1000taiwan
请求受限，正在增加延时...


處理頻道進度:  50%|█████     | 5/10 [00:50<00:50, 10.07s/頻道]

無法找到頻道: @1000taiwan
正在處理頻道: @1001taiwanstories
请求受限，正在增加延时...


處理頻道進度:  60%|██████    | 6/10 [01:00<00:40, 10.06s/頻道]

無法找到頻道: @1001taiwanstories
正在處理頻道: @13妹在台灣
请求受限，正在增加延时...


處理頻道進度:  70%|███████   | 7/10 [01:10<00:30, 10.05s/頻道]

無法找到頻道: @13妹在台灣
正在處理頻道: @486系列之吃遍全台灣
请求受限，正在增加延时...


處理頻道進度:  80%|████████  | 8/10 [01:20<00:20, 10.05s/頻道]

無法找到頻道: @486系列之吃遍全台灣
正在處理頻道: @57gofun
请求受限，正在增加延时...


處理頻道進度:  90%|█████████ | 9/10 [01:30<00:10, 10.05s/頻道]

無法找到頻道: @57gofun
正在處理頻道: @ablin
请求受限，正在增加延时...


處理頻道進度: 100%|██████████| 10/10 [01:40<00:00, 10.07s/頻道]


無法找到頻道: @ablin
正在保存數據到 CSV 文件...


保存數據進度: 0影片 [00:00, ?影片/s]

所有數據已成功保存到 CSV 文件：





EmptyDataError: No columns to parse from file