In [1]:
from tempfile import mkdtemp
import codecs

import os
import re
import time
import datetime
import pandas as pd
import urllib.request
from dotenv import load_dotenv
load_dotenv()

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.select import Select

import googleapiclient.errors
from googleapiclient.discovery import build

In [12]:
# パラメータ設定
SCAN_LIMIT      = 10
YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY')

st = '2024-04-01T00:00:00+0900'
ed = '2024-05-01T00:00:00+0900'

output_folder        = "..\csv"
filename_videos_info = os.path.join(output_folder,"all_videos_info.csv")

In [15]:
# データ取得対象となるchannelId

# all_livers_info.csv から登録者が多い順にchannelIdを取得
df = pd.read_csv("../csv/all_livers_info.csv")
list_channelId = df.query("subscriberCount>=100").sort_values('subscriberCount', ascending=False)["channelId"]

## csvファイル作成
with open(filename_videos_info, mode='w', encoding='utf8') as f:
    f.write(f"channelId,videoId,publishedAt,video_type,title,duration,viewCount,likeCount,favoriteCount,commentCount,description")

In [4]:
# YouTube接続の設定
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)

In [7]:
#データの取得
for channelId in list_channelId[:SCAN_LIMIT]:

    # チャンネルのvideoIDをすべて取得
    nextPageToken = ""
    list_videoId = []

    while 1:
        response_search = youtube.search().list(
            part        = "id",
            channelId   = channelId,
            maxResults  = 50,
            publishedAfter  = st,
            publishedBefore = ed,
            type      = "video",
            pageToken = nextPageToken
        ).execute()

        for i in range(0,response_search["pageInfo"]["resultsPerPage"]):
            list_videoId.append(response_search["items"][i]["id"]["videoId"])

        try:
            nextPageToken = response_search["nextPageToken"]
        except KeyError:
            break

    # videoIDから詳細をすべて取得
    i = 0

    for videoId in list_videoId:

        response_videos = youtube.videos().list(
            part = "snippet, statistics, contentDetails, liveStreamingDetails",
            id   = videoId,
            hl   = "ja_JP",
        ).execute()
        
        title         = response_videos["items"][0]["snippet"]["title"].replace('"','')
        publishedAt   = response_videos["items"][0]["snippet"]["publishedAt"]
        duration      = response_videos["items"][0]["contentDetails"]["duration"]
        viewCount     = response_videos["items"][0]["statistics"]["viewCount"]
        favoriteCount = response_videos["items"][0]["statistics"]["favoriteCount"]
        description   = response_videos["items"][0]["snippet"]["description"].replace('"','').replace('\n','\\n').replace('\r','\\r')

        # 高評価の数は非公開のときもある
        try:
            likeCount = response_videos["items"][0]["statistics"]["likeCount"]
        except KeyError:
            likeCount = ""

        # コメントはオフになっていることがある
        try:
            commentCount  = response_videos["items"][0]["statistics"]["commentCount"]
        except KeyError:
            commentCount = ""

        # type取得
        if response_videos["items"][0].get('liveStreamingDetails') is not None:
            video_type = "live"
        elif re.search("#shorts", title) is not None:
            video_type = "short"
        else:
            video_type = "movie"

        # ファイル書き込み
        with open(filename_videos_info, mode='a', encoding='utf8') as f:
            f.write(f'\n{channelId},{videoId},{publishedAt},{video_type},"{title}",{duration},{viewCount},{likeCount},{favoriteCount},{commentCount},"{description}"')

In [18]:
list_channelId[:SCAN_LIMIT]

1559    UCH4yRBPH2pDUjPeqomx8CTQ
2494    UCevD0wKzJFpfIkvHOiQsfLQ
3802    UCkPIfBOLoO0hVPG-tI2YeGg
5073    UCyKsg-57XC9pyHbP7v3kCPQ
3135    UCxCDE6QpDCy8ZB3cVetRILw
2051    UCs86f6tbWatcKVt7emv9hfQ
3930    UCFahBR2wixu0xOex84bXFvg
2759    UCmgWMQkenFc72QnYkdxdoKA
516     UC3Ruo_5doyu514PesWGvCAg
1063    UCHXgFLFyqR-XqtxfTH3qiYA
Name: channelId, dtype: object