In [57]:
from tempfile import mkdtemp
import codecs

import os
import re
import time
import datetime
import pandas as pd
import urllib.request
from dotenv import load_dotenv
load_dotenv()

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.select import Select

import googleapiclient.errors
from googleapiclient.discovery import build

In [58]:
# パラメータ設定
SCAN_LIMIT      = 10
YOUTUBE_API_KEY = os.getenv('YOUTUBE_API_KEY')

output_folder        = "..\csv"
filename_livers_info = os.path.join(output_folder,"all_livers_info.csv")

In [60]:
# データ取得対象となるchannelId

## featureのcsvファイルに乗っているchannelIdを取得
list_csv = ["collabo.csv", "customer.csv", "gender.csv", "genre.csv", "model.csv"]
list_channelId = []

for csv in list_csv:
    df = pd.read_csv(f"../csv/feature/{csv}")
    list_channelId = list_channelId + (list(df["channelId"]))

list_channelId = list(set(list_channelId))
list_channelId = [channelId for channelId in list_channelId if re.match("vtuber", channelId) is None]

## csvファイル作成
with open(filename_livers_info, mode='w', encoding='utf8') as f:
    f.write(f"channelId,title,publishedAt,viewCount,subscriberCount,hiddenSubscriberCount,videoCount,description")

In [28]:
# YouTube接続の設定
youtube = build('youtube', 'v3', developerKey=YOUTUBE_API_KEY)

In [56]:
#データの取得
for channelId in list_channelId:

    # チャンネルの概要を取得
    response_channel = youtube.channels().list(
        part = "snippet, statistics",
        id   = channelId,
        hl   = "ja_JP",
    ).execute()

    try:
        title                 = response_channel["items"][0]["snippet"]["title"].replace('"','')
        publishedAt           = response_channel["items"][0]["snippet"]["publishedAt"]
        viewCount             = response_channel["items"][0]["statistics"]["viewCount"]
        subscriberCount       = response_channel["items"][0]["statistics"]["subscriberCount"]
        hiddenSubscriberCount = response_channel["items"][0]["statistics"]["hiddenSubscriberCount"]
        videoCount            = response_channel["items"][0]["statistics"]["videoCount"]
        description           = response_channel["items"][0]["snippet"]["description"].replace('"','').replace('\n','\\n').replace('\r','\\r')
    except KeyError :
        continue

    with open(filename_livers_info, mode='a', encoding='utf8') as f:
        f.write(f'\n{channelId},"{title}",{publishedAt},{viewCount},{subscriberCount},{hiddenSubscriberCount},{videoCount},"{description}"')

In [55]:
list_channelId = [
    "UCAwaT53ahBc7GLPW9FBec2Q",
    "UCuJp0qNfZKNiwwLyRirrCnw",
    "UCo7p8OAchZoLUPYTAUDawhQ",
    "UCIIE8BmWQKEy2hXQMnAd9Mg"
]

In [14]:
response_channel 

{'kind': 'youtube#channelListResponse',
 'etag': 'RuuXzTIr0OoDqI4S0RU6n4FqKEM',
 'pageInfo': {'totalResults': 0, 'resultsPerPage': 5}}

In [33]:
len(list_channelId)

5795

In [20]:
re.match("vtuber", "vtuber8v6abBxKPsGVhuy9zWC") is None

True

In [32]:
list_channelId = [channelId for channelId in list_channelId if re.match("vtuber", channelId) is None]

list_channelId

['UCzukBm1qWPQhXXwJeq2GGBQ',
 'UCRXIvklRzeAupxbUxMWsivg',
 'UC_z0M7u_pjXSOJnJwXKzWhw',
 'UCahDtRMtHl65Wx9oMi7Ao6g',
 'UCMi8FegmEQgMWUg53GxMRgw',
 'UCMPGW-nQZ7iB4UFx5MaBxMg',
 'UC5HFF-sJHQW2GWPumvMktGA',
 'UCyb8GTIJu9MuljeU6vs1aeA',
 'UCz1oUvsUbxiRZRMLee4zPKg',
 'UCq2wzl2Ua9GRwh1-jBMsX_A',
 'UCx36leARC6Ji0GYknUajvwg',
 'UCqo5eaC6qaGeao7Afo2scOg',
 'UCcFI1DeYapnpYDsoOfp898w',
 'UCgD5sLbepUml9BdWCIJvOpw',
 'UCEzsociuFqVwgZuMaZqaCsg',
 'UCelew9wfONsgKeEvZEaLzsQ',
 'UCcBwExkkqUGhFvYYKVy4bjQ',
 'UC386ae2QrB9qweqAx1Md6Bw',
 'UCryG9x5LYY_ujfrf81RQd_g',
 'UCetzkIRn7voCG930wFzpvXA',
 'UCFbrIKQK8DUEQUMdnvI5dRQ',
 'UCGFteZphdMTdEnlbuRpe5UA',
 'UChQCSaihjFMAjnpeIcDqR0w',
 'UC7wq4kxtd6VhSRn0jmOexVQ',
 'UCiD0WR6Wx0uk88qWAdsUs4g',
 'UCWYyxAiljvdBjUeyS4dGf8Q',
 'UCtegeFdf4ELI-7mSkB5EJEw',
 'UCbQyUW-ZZepatnnzCq6hOrg',
 'UC7HixHznciTQf1DhlbBIZmA',
 'UCQqapTULnZ7Bvm970nYU61A',
 'UC62j-S5ZMLWJFmy8LbmW3CA',
 'UCWXSUHY9Tvy1qoGjpM7fIKg',
 'UC-Y_6gEbu4sw65fru_zRD_w',
 'UCL3DBXWkXjEjfm8qepxAqWA',
 'UCJGg50qNtOT

In [35]:
list_channelId

'UCL5RerzQqPdKmfqTdwXIE2Q'

In [51]:
list_channelId = list_channelId[list_channelId.index('UCL5RerzQqPdKmfqTdwXIE2Q'):]

In [50]:
## featureのcsvファイルに乗っているchannelIdを取得
list_csv = ["collabo.csv", "customer.csv", "gender.csv", "genre.csv", "model.csv"]
list_channelId = []

for csv in list_csv:
    df = pd.read_csv(f"../csv/feature/{csv}")
    list_channelId = list_channelId + (list(df["channelId"]))

list_channelId = list(set(list_channelId))
list_channelId = [channelId for channelId in list_channelId if re.match("vtuber", channelId) is None]

In [53]:
# チャンネルの概要を取得
response_channel = youtube.channels().list(
    part = "snippet, statistics",
    id   = "UCAwaT53ahBc7GLPW9FBec2Q",
    hl   = "ja_JP",
).execute()

In [54]:
response_channel

{'kind': 'youtube#channelListResponse',
 'etag': 'aN_3l3l_C0RqM688J216iVE17-A',
 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5},
 'items': [{'kind': 'youtube#channel',
   'etag': 'zee8loQ6iAYJf2C3B6Rqk1xyOes',
   'id': 'UCAwaT53ahBc7GLPW9FBec2Q',
   'snippet': {'title': 'イクトソウ -Ikuto Sou-',
    'description': "はろー！ゼロ年代の遺産💜義務教育の敗北💜\r\nバーチャル男の娘の『イクト ソウ』です(•̥ ̫ •̥)\n\nHello! My name is Ikuto Sou.\nI'm doing virtual YouTuber!\n\n💜Twitter💜\nhttps://mobile.twitter.com/tutumi111/\n\r\n壁に話し掛ける動画ばかり上げて\r\nインターネット狂人になっていましたが、\r\nやっぱりみんなに愛されたいです💜\r\n全然メンヘラじゃないよ！愛が重たいだけ！\r\n\r\nヤンデレ大全に載る鬱ギャルゲーの\r\nレビューを読むのが好きです₍ᐢ.ˬ.ᐢ₎\r\nあとはお絵描きも…ᐡ｡• ·̫ •｡ᐡ\r\n\r\n特技は保健室登校と、\r\n限られた素材で動画編集すること💜\r\nプリセットモーションだけで踊れるよ💜\r\n▶ https://youtu.be/hNsYhCrE2eg\r\n\r\nバーチャル世界で絶対に愛されたいです！\r\n愛をくれたら愛します💜\r\n\r\n制作物💜 #ソウ馬燈動画\r\nFA💜 #ソウのお絵描きレクイエム\r\n\r\n💜Youtube\r\n\u3000https://www.youtube.com/channel/UCAwaT53ahBc7GLPW9FBec2Q\r\n💜データベース（Vポス）\r\n\u3000https://vtuber-post.com/database/detail.php?id=UCAwaT53ahBc7GLPW9FBec2Q",