In [1]:
pip install google-api-python-client

Collecting google-api-python-client
  Downloading google_api_python_client-2.26.1-py2.py3-none-any.whl (7.6 MB)
Collecting google-auth<3.0.0dev,>=1.16.0
  Downloading google_auth-2.3.0-py2.py3-none-any.whl (154 kB)
Collecting google-api-core<3.0.0dev,>=1.21.0
  Downloading google_api_core-2.1.1-py2.py3-none-any.whl (95 kB)
Collecting httplib2<1dev,>=0.15.0
  Downloading httplib2-0.20.1-py3-none-any.whl (96 kB)
Collecting google-auth-httplib2>=0.1.0
  Downloading google_auth_httplib2-0.1.0-py2.py3-none-any.whl (9.3 kB)
Collecting uritemplate<5,>=3.0.0
  Downloading uritemplate-4.1.1-py2.py3-none-any.whl (10 kB)
Collecting googleapis-common-protos<2.0dev,>=1.6.0
  Downloading googleapis_common_protos-1.53.0-py2.py3-none-any.whl (198 kB)
Collecting pyasn1-modules>=0.2.1
  Downloading pyasn1_modules-0.2.8-py2.py3-none-any.whl (155 kB)
Collecting rsa<5,>=3.1.4
  Downloading rsa-4.7.2-py3-none-any.whl (34 kB)
Collecting pyasn1<0.5.0,>=0.4.6
  Downloading pyasn1-0.4.8-py2.py3-none-any.whl (77

In [2]:
from apiclient.discovery import build

In [3]:
import json
with open('secret.json') as f:
    secret = json.load(f)

In [4]:
import pandas as pd

def video_search(youtube, q='自動化', max_results=50):
  response = youtube.search().list(
    q=q,
    part="id,snippet",
    order='viewCount',
    type='video',
    maxResults=max_results
  ).execute()

  items_id = []
  items = response['items']
  for item in items:
      item_id = {}
      item_id['video_id'] = item['id']['videoId']
      item_id['channel_id'] = item['snippet']['channelId']
      items_id.append(item_id)
  df_video = pd.DataFrame(items_id)
  return df_video

In [5]:
DEVELOPER_KEY = secret['KEY']
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"

youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

df_video = video_search(youtube, q='Python 自動化', max_results=30)
df_video[:3]

Unnamed: 0,video_id,channel_id
0,bsq2YY-XAEo,UC0xRMqPOyRNPTaL6BxhbCnQ
1,-5v7CuXm7Ns,UCGlgXjYVoHLD86TQQ799WIw
2,vRFVS_FNLwY,UCwhKqH0jDKm5vPOZ7WQ2R1A


In [6]:
channel_ids = df_video['channel_id'].unique().tolist()
channel_ids[:3]

['UC0xRMqPOyRNPTaL6BxhbCnQ',
 'UCGlgXjYVoHLD86TQQ799WIw',
 'UCwhKqH0jDKm5vPOZ7WQ2R1A']

In [29]:
subscriber_list = youtube.channels().list(
    id=','.join(channel_ids),
    part='statistics',
    fields='items(id,statistics(subscriberCount))'
).execute()
subscriber_list['items']
subscribers = []
for item in subscriber_list['items']:
    subscriber = {}
    subscriber['channel_id'] = item['id']
    subscriber['subscriber_count'] = int(item['statistics']['subscriberCount'])
    subscribers.append(subscriber)
subscribers[:3]

[{'channel_id': 'UCGlgXjYVoHLD86TQQ799WIw', 'subscriber_count': 128000},
 {'channel_id': 'UCMMjv61LfBy5J3AT8Ua0NGQ', 'subscriber_count': 138000},
 {'channel_id': 'UCGFxqqRaM97VjM5ToRfkoFg', 'subscriber_count': 5080}]

In [13]:
df_subscribers = pd.DataFrame(subscribers)
df_subscribers[:3]

Unnamed: 0,channel_id,subscriber_count
0,UCGlgXjYVoHLD86TQQ799WIw,128000
1,UCXjTiSGclQLVVU83GVrRM4w,1250000
2,UCrZOiJyMbXpZ8T5snCIJaLA,48400


In [14]:
df = pd.merge(left=df_video, right=df_subscribers, on='channel_id')
df.head(3)

Unnamed: 0,video_id,channel_id,subscriber_count
0,bsq2YY-XAEo,UC0xRMqPOyRNPTaL6BxhbCnQ,81800
1,VRFfAeW30qE,UC0xRMqPOyRNPTaL6BxhbCnQ,81800
2,KmqEyT7HznQ,UC0xRMqPOyRNPTaL6BxhbCnQ,81800


In [25]:
df_extracted = df[df['subscriber_count'] < 10000]
video_ids = df_extracted['video_id'].tolist()

In [30]:
videos_list = youtube.videos().list(
    id=','.join(video_ids),
    part='snippet,statistics',
    fields='items(id,snippet(title),statistics(viewCount))'
).execute()
items = videos_list['items']
items

[{'id': 'SPf_nINsCfw',
  'snippet': {'title': '【2021年版】Pythonで業務自動化(Excel,Outlook等)したいならこの本で決定'},
  'statistics': {'viewCount': '66113'}},
 {'id': 'nCfAtMooIQQ',
  'snippet': {'title': 'その面倒な作業、自動化してみない？【Pythonで自動化のすすめ】'},
  'statistics': {'viewCount': '36867'}},
 {'id': 'bgzHqKN6teo',
  'snippet': {'title': '面倒なExcel作業をPythonで自動化してみた【複数ファイルからデータを一瞬で抽出】'},
  'statistics': {'viewCount': '21660'}}]

In [31]:
videos_info = []
for item in items:
    video_info = {}
    video_info['video_id'] = item['id']
    video_info['title'] = item['snippet']['title']
    video_info['view_count'] = item['statistics']['viewCount']
    videos_info.append(video_info)
videos_info

[{'video_id': 'SPf_nINsCfw',
  'title': '【2021年版】Pythonで業務自動化(Excel,Outlook等)したいならこの本で決定',
  'view_count': '66113'},
 {'video_id': 'nCfAtMooIQQ',
  'title': 'その面倒な作業、自動化してみない？【Pythonで自動化のすすめ】',
  'view_count': '36867'},
 {'video_id': 'bgzHqKN6teo',
  'title': '面倒なExcel作業をPythonで自動化してみた【複数ファイルからデータを一瞬で抽出】',
  'view_count': '21660'}]

In [32]:
df_videos_info = pd.DataFrame(videos_info)
df_videos_info

Unnamed: 0,video_id,title,view_count
0,SPf_nINsCfw,"【2021年版】Pythonで業務自動化(Excel,Outlook等)したいならこの本で決定",66113
1,nCfAtMooIQQ,その面倒な作業、自動化してみない？【Pythonで自動化のすすめ】,36867
2,bgzHqKN6teo,面倒なExcel作業をPythonで自動化してみた【複数ファイルからデータを一瞬で抽出】,21660


In [33]:
results = pd.merge(left=df_extracted, right=df_videos_info, on='video_id')
results

Unnamed: 0,video_id,channel_id,subscriber_count,title,view_count
0,SPf_nINsCfw,UCGFxqqRaM97VjM5ToRfkoFg,5080,"【2021年版】Pythonで業務自動化(Excel,Outlook等)したいならこの本で決定",66113
1,nCfAtMooIQQ,UCNste53_VRtP6MGUfitOYQA,5110,その面倒な作業、自動化してみない？【Pythonで自動化のすすめ】,36867
2,bgzHqKN6teo,UCYPkI8lgfiNXmGxCglZhOwA,772,面倒なExcel作業をPythonで自動化してみた【複数ファイルからデータを一瞬で抽出】,21660


In [36]:
results = results.loc[:,['video_id', 'title', 'view_count', 'subscriber_count', 'channel_id']]
results

Unnamed: 0,video_id,title,view_count,subscriber_count,channel_id
0,SPf_nINsCfw,"【2021年版】Pythonで業務自動化(Excel,Outlook等)したいならこの本で決定",66113,5080,UCGFxqqRaM97VjM5ToRfkoFg
1,nCfAtMooIQQ,その面倒な作業、自動化してみない？【Pythonで自動化のすすめ】,36867,5110,UCNste53_VRtP6MGUfitOYQA
2,bgzHqKN6teo,面倒なExcel作業をPythonで自動化してみた【複数ファイルからデータを一瞬で抽出】,21660,772,UCYPkI8lgfiNXmGxCglZhOwA


## 処理をまとめる

In [58]:
def get_results(df_video, threshold=10000):
    channel_ids = df_video['channel_id'].unique().tolist()
    subscriber_list = youtube.channels().list(
        id=','.join(channel_ids),
        part='statistics',
        fields='items(id,statistics(subscriberCount))'
    ).execute()

    subscribers = []
    for item in subscriber_list['items']:
        subscriber = {}
        if len(item['statistics']) > 0:
            subscriber['channel_id'] = item['id']
            subscriber['subscriber_count'] = int(item['statistics']['subscriberCount'])
        else:
            subscriber['channel_id'] = item['id']
        subscribers.append(subscriber)
    df_subscribers = pd.DataFrame(subscribers)

    df = pd.merge(left=df_video, right=df_subscribers, on='channel_id')
    df_extracted = df[df['subscriber_count'] < threshold]
    video_ids = df_extracted['video_id'].tolist()

    videos_list = youtube.videos().list(
        id=','.join(video_ids),
        part='snippet,statistics',
        fields='items(id,snippet(title),statistics(viewCount))'
    ).execute()
    print(videos_list)
    items = videos_list['items']
    videos_info = []
    for item in items:
        video_info = {}
        video_info['video_id'] = item['id']
        video_info['title'] = item['snippet']['title']
        video_info['view_count'] = item['statistics']['viewCount']
        videos_info.append(video_info)

    df_videos_info = pd.DataFrame(videos_info)
    results = pd.merge(left=df_extracted, right=df_videos_info, on='video_id')
    results = results.loc[:,['video_id', 'title', 'view_count', 'subscriber_count', 'channel_id']]
    return results

df_video = video_search(youtube, q='Python 自動化', max_results=30)
get_results(df_video, threshold=10000)

{'items': [{'id': 'SPf_nINsCfw', 'snippet': {'title': '【2021年版】Pythonで業務自動化(Excel,Outlook等)したいならこの本で決定'}, 'statistics': {'viewCount': '66114'}}, {'id': 'nCfAtMooIQQ', 'snippet': {'title': 'その面倒な作業、自動化してみない？【Pythonで自動化のすすめ】'}, 'statistics': {'viewCount': '36867'}}, {'id': 'bgzHqKN6teo', 'snippet': {'title': '面倒なExcel作業をPythonで自動化してみた【複数ファイルからデータを一瞬で抽出】'}, 'statistics': {'viewCount': '21660'}}]}


Unnamed: 0,video_id,title,view_count,subscriber_count,channel_id
0,SPf_nINsCfw,"【2021年版】Pythonで業務自動化(Excel,Outlook等)したいならこの本で決定",66114,5080,UCGFxqqRaM97VjM5ToRfkoFg
1,nCfAtMooIQQ,その面倒な作業、自動化してみない？【Pythonで自動化のすすめ】,36867,5110,UCNste53_VRtP6MGUfitOYQA
2,bgzHqKN6teo,面倒なExcel作業をPythonで自動化してみた【複数ファイルからデータを一瞬で抽出】,21660,772,UCYPkI8lgfiNXmGxCglZhOwA
