In [1]:
import pandas as pd
from datetime import datetime, timedelta
from apiclient.discovery import build
YOUTUBE_DEVELOPER_KEY = 'AIzaSyBYOWoFmf3cG5Ez653Qdmw9xHmchEMz4Ys'
youtube = build('youtube', 'v3', developerKey=YOUTUBE_DEVELOPER_KEY)

In [2]:
def get_channel(channel_name):
    return youtube.search().list(q=channel_name, type='channel', part='id,snippet').execute()['items'][0]


def get_videos(channel_id, part='id,snippet', limit=10):
    res = youtube.channels().list(id=channel_id, 
                                  part='contentDetails').execute()
    playlist_id = res['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    
    videos = []
    next_page_token = None
    
    while 1:
        res = youtube.playlistItems().list(playlistId=playlist_id, 
                                           part=part, 
                                           maxResults=min(limit, 50),
                                           pageToken=next_page_token).execute()
        videos += res['items']
        next_page_token = res.get('nextPageToken')
        
        if next_page_token is None or len(videos) >= limit:
            break

    return videos

def get_videos_stats(video_ids):
    stats = []
    for i in range(0, len(video_ids), 50):
        res = youtube.videos().list(id=','.join(video_ids[i:i+50]),
                                   part='statistics').execute()
        stats += res['items']
        
    return stats

def parse_count(video):
    return video['id'],video['statistics']['viewCount']

def parse_publish_date(video):
    return video['snippet']['resourceId']['videoId'],datetime.strptime(video['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%S.000Z"),video['snippet']['title']

In [46]:
channel_name = 'Salmon House'
channel_id = get_channel(channel_name)['id']['channelId']
channel_id

'UCF4jQCA959hTvWNsXU5uHkA'

In [47]:
videos = get_videos(channel_id, limit=6500)
videos

[{'kind': 'youtube#playlistItem',
  'etag': '"p4VTdlkQv3HQeTEaXgvLePAydmU/DHrpk4pD8r3xh0wcdOijZSuA2rQ"',
  'id': 'VVVGNGpRQ0E5NTloVHZXTnNYVTV1SGtBLkJ6YUw1QUROTDRJ',
  'snippet': {'publishedAt': '2019-07-30T12:06:27.000Z',
   'channelId': 'UCF4jQCA959hTvWNsXU5uHkA',
   'title': 'แรงสะใจต้องSHARP',
   'description': 'ไม่รู้โฆษณาอะไร แต่... #แรงสะใจต้องSHARP',
   'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/BzaL5ADNL4I/default.jpg',
     'width': 120,
     'height': 90},
    'medium': {'url': 'https://i.ytimg.com/vi/BzaL5ADNL4I/mqdefault.jpg',
     'width': 320,
     'height': 180},
    'high': {'url': 'https://i.ytimg.com/vi/BzaL5ADNL4I/hqdefault.jpg',
     'width': 480,
     'height': 360},
    'standard': {'url': 'https://i.ytimg.com/vi/BzaL5ADNL4I/sddefault.jpg',
     'width': 640,
     'height': 480},
    'maxres': {'url': 'https://i.ytimg.com/vi/BzaL5ADNL4I/maxresdefault.jpg',
     'width': 1280,
     'height': 720}},
   'channelTitle': 'Salmon House',
   'playlistId': 

In [48]:
video_ids = list(map(lambda x:x['snippet']['resourceId']['videoId'], videos))
len(video_ids)

133

In [49]:
stats = get_videos_stats(video_ids)
len(stats)

133

In [50]:
most_viewed = sorted(stats, key=lambda x:int(x['statistics']['viewCount']), reverse=True)

In [51]:
counts = [parse_count(video) for video in most_viewed]
len(counts)

133

In [52]:
df_count = pd.DataFrame(data = counts , columns=['videoId','viewCount'])
df_count.head()

Unnamed: 0,videoId,viewCount
0,u06GqlNiJUY,4450118
1,mcuUJw9hMmI,1701519
2,x0cz4Ac0d78,1243176
3,7j1moCi56tc,1214482
4,Dgi35zQ8Auk,1105252


In [53]:
df_count["viewCount"] = df_count["viewCount"].astype("float")

In [54]:
df_count.sort_values(by=['viewCount'],ascending=[False]).head()

Unnamed: 0,videoId,viewCount
0,u06GqlNiJUY,4450118.0
1,mcuUJw9hMmI,1701519.0
2,x0cz4Ac0d78,1243176.0
3,7j1moCi56tc,1214482.0
4,Dgi35zQ8Auk,1105252.0


In [55]:
publish_dates = [parse_publish_date(video) for video in videos]
len(publish_dates)

133

In [56]:
df_date = pd.DataFrame(data = publish_dates , columns=['videoId','publishedAt','title'])
df_date.head()

Unnamed: 0,videoId,publishedAt,title
0,BzaL5ADNL4I,2019-07-30 12:06:27,แรงสะใจต้องSHARP
1,fSd64mVDFoU,2019-08-01 12:00:06,จะรุนแรงไปทำไม
2,B7UEhOgk5tw,2019-07-09 08:47:24,เรไร amulet ศรัทธาแก้ทุกปัญหา
3,uyoN0rZrRV4,2019-05-07 14:44:20,นักเลงกระบะซิ่งเขารู้กัน
4,AQajTmSjwaw,2019-03-27 08:30:00,หน้าใส ใจฟู ดูนี่


In [57]:
dfd = pd.merge(df_date, df_count, on='videoId', how='inner')
dfd.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 133 entries, 0 to 132
Data columns (total 4 columns):
videoId        133 non-null object
publishedAt    133 non-null datetime64[ns]
title          133 non-null object
viewCount      133 non-null float64
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 5.2+ KB


In [58]:
dfd.sort_values(by=['publishedAt'],ascending=[False]).head()

Unnamed: 0,videoId,publishedAt,title,viewCount
1,fSd64mVDFoU,2019-08-01 12:00:06,จะรุนแรงไปทำไม,8995.0
0,BzaL5ADNL4I,2019-07-30 12:06:27,แรงสะใจต้องSHARP,8127.0
2,B7UEhOgk5tw,2019-07-09 08:47:24,เรไร amulet ศรัทธาแก้ทุกปัญหา,51751.0
3,uyoN0rZrRV4,2019-05-07 14:44:20,นักเลงกระบะซิ่งเขารู้กัน,421383.0
4,AQajTmSjwaw,2019-03-27 08:30:00,หน้าใส ใจฟู ดูนี่,12395.0


In [59]:
df_count_date = pd.merge(df_count, df_date, how='inner', on='videoId')
df_count_date.head(10)

Unnamed: 0,videoId,viewCount,publishedAt,title
0,u06GqlNiJUY,4450118.0,2016-08-09 12:56:36,วิดีโอสาธิตความปลอดภัยบนรถเมล์
1,mcuUJw9hMmI,1701519.0,2015-04-01 13:00:47,โก๋แก่ presents - Bangkok 1st Time : ตอนสั่งอา...
2,x0cz4Ac0d78,1243176.0,2015-03-01 13:34:13,โก๋แก่ presents - Bangkok 1st Time : ตอนเล่นหว...
3,7j1moCi56tc,1214482.0,2015-02-16 12:55:18,โก๋แก่ presents - Bangkok 1st Time : ตอนชื่อไท...
4,Dgi35zQ8Auk,1105252.0,2015-04-11 12:55:54,SIRI VS. VERY THAI SENTENCES
5,U9EsAHwCpiU,1093167.0,2017-06-28 14:05:38,Pre-Sum Skills [Okamoto x Salmon House]
6,-wxHJYriKuk,1008033.0,2015-03-16 12:59:59,โก๋แก่ presents - Bangkok 1st Time : ตอนพ่นกำแ...
7,TU7L0VmVYEk,928809.0,2015-04-16 12:29:57,โก๋แก่ presents - Bangkok 1st Time : ตอนเจอพริ...
8,3KWCtsiZJqQ,843879.0,2018-12-04 06:00:12,รีวิวเจอพ่อตาครั้งแรก : Chocolate Ville X Salm...
9,nqOVp_lz670,717782.0,2016-04-04 12:58:42,"A cameraman, literally."


In [17]:
df_count_date.to_excel('c:\\aab\\excel\\PMC Official MV.xlsx')