In [1]:
import pandas as pd
from datetime import datetime, timedelta
from apiclient.discovery import build
YOUTUBE_DEVELOPER_KEY = 'AIzaSyBYOWoFmf3cG5Ez653Qdmw9xHmchEMz4Ys'
youtube = build('youtube', 'v3', developerKey=YOUTUBE_DEVELOPER_KEY)

In [2]:
def get_channel(channel_name):
    return youtube.search().list(q=channel_name, type='channel', part='id,snippet').execute()['items'][0]


def get_videos(channel_id, part='id,snippet', limit=10):
    res = youtube.channels().list(id=channel_id, 
                                  part='contentDetails').execute()
    playlist_id = res['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    
    videos = []
    next_page_token = None
    
    while 1:
        res = youtube.playlistItems().list(playlistId=playlist_id, 
                                           part=part, 
                                           maxResults=min(limit, 50),
                                           pageToken=next_page_token).execute()
        videos += res['items']
        next_page_token = res.get('nextPageToken')
        
        if next_page_token is None or len(videos) >= limit:
            break

    return videos

def get_videos_stats(video_ids):
    stats = []
    for i in range(0, len(video_ids), 50):
        res = youtube.videos().list(id=','.join(video_ids[i:i+50]),
                                   part='statistics').execute()
        stats += res['items']
        
    return stats

def parse_count(video):
    return video['id'],video['statistics']['viewCount']

def parse_publish_date(video):
    return video['snippet']['resourceId']['videoId'],datetime.strptime(video['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%S.000Z"),video['snippet']['title']

In [3]:
channel_name = 'TheCaroleKing'
channel_id = get_channel(channel_name)['id']['channelId']
channel_id

'UCaS5rLrxc6pQDWwpWyQyXUg'

In [4]:
videos = get_videos(channel_id, limit=6500)
videos

[{'kind': 'youtube#playlistItem',
  'etag': '"p4VTdlkQv3HQeTEaXgvLePAydmU/hICALyJPUAdCBe_gcL0uETBIUfU"',
  'id': 'VVVhUzVyTHJ4YzZwUURXd3BXeVF5WFVnLlhkZWM0SVpKWTI4',
  'snippet': {'publishedAt': '2019-06-18T15:41:06.000Z',
   'channelId': 'UCaS5rLrxc6pQDWwpWyQyXUg',
   'title': 'Girl Power - Carole King',
   'description': 'Carole King was inspired to write "Girl Power" after Mia Hamm\'s iconic win.  Released in 2007 on Rockingale Records.',
   'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/Xdec4IZJY28/default.jpg',
     'width': 120,
     'height': 90},
    'medium': {'url': 'https://i.ytimg.com/vi/Xdec4IZJY28/mqdefault.jpg',
     'width': 320,
     'height': 180},
    'high': {'url': 'https://i.ytimg.com/vi/Xdec4IZJY28/hqdefault.jpg',
     'width': 480,
     'height': 360}},
   'channelTitle': 'TheCaroleKing',
   'playlistId': 'UUaS5rLrxc6pQDWwpWyQyXUg',
   'position': 0,
   'resourceId': {'kind': 'youtube#video', 'videoId': 'Xdec4IZJY28'}}},
 {'kind': 'youtube#playlistItem

In [5]:
video_ids = list(map(lambda x:x['snippet']['resourceId']['videoId'], videos))
len(video_ids)

124

In [6]:
stats = get_videos_stats(video_ids)
len(stats)

124

In [7]:
most_viewed = sorted(stats, key=lambda x:int(x['statistics']['viewCount']), reverse=True)

In [8]:
counts = [parse_count(video) for video in most_viewed]
len(counts)

124

In [9]:
df_count = pd.DataFrame(data = counts , columns=['videoId','viewCount'])
df_count.head()

Unnamed: 0,videoId,viewCount
0,gc93thQVIV8,241628
1,cRPls7hjRfs,232062
2,vIBhzZxy3bs,158509
3,rJGPXP7i-RM,145338
4,zaPYz-6Bji4,134620


In [10]:
df_count["viewCount"] = df_count["viewCount"].astype("float")

In [11]:
df_count.sort_values(by=['viewCount'],ascending=[False]).head()

Unnamed: 0,videoId,viewCount
0,gc93thQVIV8,241628.0
1,cRPls7hjRfs,232062.0
2,vIBhzZxy3bs,158509.0
3,rJGPXP7i-RM,145338.0
4,zaPYz-6Bji4,134620.0


In [12]:
publish_dates = [parse_publish_date(video) for video in videos]
len(publish_dates)

124

In [13]:
df_date = pd.DataFrame(data = publish_dates , columns=['videoId','publishedAt','title'])
df_date.head()

Unnamed: 0,videoId,publishedAt,title
0,Xdec4IZJY28,2019-06-18 15:41:06,Girl Power - Carole King
1,5eQU8tnNBdM,2019-05-28 15:06:57,Carole King - Live at Montreux Trailer 30sec.
2,JPczvGZoW9A,2018-12-21 07:58:53,Happy Holidays from Carole King Productions
3,cRPls7hjRfs,2018-11-13 16:38:38,"""It's Too Late"" - Carole King"
4,gc93thQVIV8,2018-10-29 15:00:17,Carole King - One (2018)


In [14]:
dfd = pd.merge(df_date, df_count, on='videoId', how='inner')
dfd.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 124 entries, 0 to 123
Data columns (total 4 columns):
videoId        124 non-null object
publishedAt    124 non-null datetime64[ns]
title          124 non-null object
viewCount      124 non-null float64
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 4.8+ KB


In [15]:
dfd.sort_values(by=['publishedAt'],ascending=[False]).head()

Unnamed: 0,videoId,publishedAt,title,viewCount
6,ew_MFY5MWXc,2019-07-12 15:49:12,Welcome To My Living Room - Medley Part 2,924.0
0,Xdec4IZJY28,2019-06-18 15:41:06,Girl Power - Carole King,3296.0
1,5eQU8tnNBdM,2019-05-28 15:06:57,Carole King - Live at Montreux Trailer 30sec.,8096.0
2,JPczvGZoW9A,2018-12-21 07:58:53,Happy Holidays from Carole King Productions,1320.0
3,cRPls7hjRfs,2018-11-13 16:38:38,"""It's Too Late"" - Carole King",232062.0


In [16]:
df_count_date = pd.merge(df_count, df_date, how='inner', on='videoId')
#df_count_date.to_excel('c:\\aab\\data\\eagle_rock.xlsx',encoding='utf-8')
songs = df_count_date.sort_values(by=['viewCount'],ascending=[False]).head(12)
songs

Unnamed: 0,videoId,viewCount,publishedAt,title
0,gc93thQVIV8,241628.0,2018-10-29 15:00:17,Carole King - One (2018)
1,cRPls7hjRfs,232062.0,2018-11-13 16:38:38,"""It's Too Late"" - Carole King"
2,vIBhzZxy3bs,158509.0,2014-03-13 15:14:39,"Carole King ""Beautiful"" with Lyrics"
3,rJGPXP7i-RM,145338.0,2018-04-05 00:15:37,"""So Far Away"" - Carole King"
4,zaPYz-6Bji4,134620.0,2017-01-26 19:08:08,One Small Voice - Carole King
5,0AjiuDHe9uc,58697.0,2016-09-27 15:08:50,"""Way Over Yonder"" - Carole King"
6,EiJ0uK8qYXM,55182.0,2009-11-24 01:44:44,Carole King • James Taylor | Troubadour Reunion
7,q1aZRgajWgY,46951.0,2013-08-02 13:12:11,"""I Believe In Loving You"" by Carole King"
8,RGpR9AzVyDk,46088.0,2016-05-11 02:55:33,"""The Loco Motion"" - Carole King"
9,1D6ECVuVxXc,42355.0,2011-02-03 01:56:23,Love Makes the World


In [17]:
sr = songs['videoId']
sr.shape

(12,)

In [18]:
vids= sr.values.tolist()
vids

['gc93thQVIV8',
 'cRPls7hjRfs',
 'vIBhzZxy3bs',
 'rJGPXP7i-RM',
 'zaPYz-6Bji4',
 '0AjiuDHe9uc',
 'EiJ0uK8qYXM',
 'q1aZRgajWgY',
 'RGpR9AzVyDk',
 '1D6ECVuVxXc',
 '-9JdmHOsWtU',
 'IrzjDVH4XcA']