In [1]:
import pandas as pd
from datetime import date, datetime, timedelta
from apiclient.discovery import build
YOUTUBE_DEVELOPER_KEY = 'AIzaSyBYOWoFmf3cG5Ez653Qdmw9xHmchEMz4Ys'
youtube = build('youtube', 'v3', developerKey=YOUTUBE_DEVELOPER_KEY)
today = date.today()
today

datetime.date(2019, 9, 3)

In [2]:
def get_channel(channel_name):
    return youtube.search().list(q=channel_name, type='channel', part='id,snippet').execute()['items'][0]


def get_videos(channel_id, part='id,snippet', limit=10):
    res = youtube.channels().list(id=channel_id, 
                                  part='contentDetails').execute()
    playlist_id = res['items'][0]['contentDetails']['relatedPlaylists']['uploads']
    
    videos = []
    next_page_token = None
    
    while 1:
        res = youtube.playlistItems().list(playlistId=playlist_id, 
                                           part=part, 
                                           maxResults=min(limit, 50),
                                           pageToken=next_page_token).execute()
        videos += res['items']
        next_page_token = res.get('nextPageToken')
        
        if next_page_token is None or len(videos) >= limit:
            break

    return videos

def get_videos_stats(video_ids):
    stats = []
    for i in range(0, len(video_ids), 50):
        res = youtube.videos().list(id=','.join(video_ids[i:i+50]),
                                   part='statistics').execute()
        stats += res['items']
        
    return stats

def parse_count(video):
    return video['id'],video['statistics']['viewCount']

def parse_publish_date(video):
    return video['snippet']['resourceId']['videoId'],datetime.strptime(video['snippet']['publishedAt'], "%Y-%m-%dT%H:%M:%S.000Z"),video['snippet']['title']

In [4]:
channel_name = 'Taylor Swift'
channel_id = get_channel(channel_name)['id']['channelId']
channel_id

'UCANLZYMidaCbLQFWXBC95Jg'

In [5]:
videos = get_videos(channel_id, limit=6500)
videos

[{'kind': 'youtube#playlistItem',
  'etag': '"8jEFfXBrqiSrcF6Ee7MQuz8XuAM/6R0oZqAb9czA_1O6kQsUoPHxfP8"',
  'id': 'VVVBTkxaWU1pZGFDYkxRRldYQkM5NUpnLkdURUZTdUZmZ25V',
  'snippet': {'publishedAt': '2019-08-23T04:00:03.000Z',
   'channelId': 'UCANLZYMidaCbLQFWXBC95Jg',
   'title': 'Taylor Swift - Death By A Thousand Cuts (Official Audio)',
   'description': 'Official audio for ‘Death By A Thousand Cuts’ performed by Taylor Swift. Off her new album ‘Lover.’ Stream/Download the album here: https://TaylorSwift.lnk.to/Loversu\n\n►Subscribe to Taylor Swift on YouTube: https://ts.lnk.to/subscribe\n\n►Exclusive Merch: https://store.taylorswift.com\n\n►Follow Taylor Swift online:\nInstagram: http://www.instagram.com/taylorswift\nFacebook: http://www.facebook.com/taylorswift\nTumblr: http://taylorswift.tumblr.com\nTwitter: http://www.twitter.com/taylorswift13\nWebsite: http://www.taylorswift.com\n\n►Follow Taylor Nation online:\nInstagram: http://www.instagram.com/taylornation\nTumblr: http://taylo

In [6]:
video_ids = list(map(lambda x:x['snippet']['resourceId']['videoId'], videos))
len(video_ids)

105

In [7]:
stats = get_videos_stats(video_ids)
len(stats)

105

In [8]:
most_viewed = sorted(stats, key=lambda x:int(x['statistics']['viewCount']), reverse=True)

In [9]:
counts = [parse_count(video) for video in most_viewed]
len(counts)

105

In [10]:
df_count = pd.DataFrame(data = counts , columns=['videoId','viewCount'])
df_count.head()

Unnamed: 0,videoId,viewCount
0,nfWlot6h_JM,2816219421
1,e-ORhEE9VVg,2488668553
2,QcIy9NiNbmo,1311326305
3,3tmd-ClpJxA,1086151062
4,VuNIsY6JdUw,984083366


In [11]:
df_count["viewCount"] = df_count["viewCount"].astype("float")

In [12]:
df_count.sort_values(by=['viewCount'],ascending=[False]).head()

Unnamed: 0,videoId,viewCount
0,nfWlot6h_JM,2816219000.0
1,e-ORhEE9VVg,2488669000.0
2,QcIy9NiNbmo,1311326000.0
3,3tmd-ClpJxA,1086151000.0
4,VuNIsY6JdUw,984083400.0


In [13]:
publish_dates = [parse_publish_date(video) for video in videos]
len(publish_dates)

105

In [14]:
df_date = pd.DataFrame(data = publish_dates , columns=['videoId','publishedAt','title'])
df_date.head(20)

Unnamed: 0,videoId,publishedAt,title
0,GTEFSuFfgnU,2019-08-23 04:00:03,Taylor Swift - Death By A Thousand Cuts (Offic...
1,8zdg-pDF10g,2019-08-23 04:00:02,Taylor Swift - Paper Rings (Official Audio)
2,p1cEvNn88jM,2019-08-23 04:00:06,Taylor Swift - I Forgot That You Existed (Offi...
3,2d1wKn-oJnA,2019-08-23 04:00:01,Taylor Swift - I Think He Knows (Official Audio)
4,VsKoOH6DVys,2019-08-23 04:00:03,Taylor Swift - London Boy (Official Audio)
5,Kwf7P2GNAVw,2019-08-23 04:00:05,Taylor Swift - Miss Americana & The Heartbreak...
6,VikHHWrgb4Y,2019-08-23 04:00:03,Taylor Swift - Cornelia Street (Official Audio)
7,u9raS7-NisU,2019-08-23 04:00:07,Taylor Swift - Daylight (Official Audio)
8,tMoW5G5LU08,2019-08-23 04:00:05,Taylor Swift - Soon You’ll Get Better (Officia...
9,eaP1VswBF28,2019-08-23 04:00:06,Taylor Swift - It’s Nice To Have A Friend (Off...


In [15]:
dfd = pd.merge(df_date, df_count, on='videoId', how='inner')
dfd.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 105 entries, 0 to 104
Data columns (total 4 columns):
videoId        105 non-null object
publishedAt    105 non-null datetime64[ns]
title          105 non-null object
viewCount      105 non-null float64
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 4.1+ KB


In [16]:
dfd.sort_values(by=['publishedAt'],ascending=[False]).head(20)

Unnamed: 0,videoId,publishedAt,title,viewCount
12,pHoHDNxay3A,2019-08-23 04:00:11,Taylor Swift - The Man (Official Audio),4464692.0
7,u9raS7-NisU,2019-08-23 04:00:07,Taylor Swift - Daylight (Official Audio),3376977.0
13,ic8j13piAhQ,2019-08-23 04:00:06,Taylor Swift - Cruel Summer (Official Audio),4169191.0
2,p1cEvNn88jM,2019-08-23 04:00:06,Taylor Swift - I Forgot That You Existed (Offi...,3067839.0
9,eaP1VswBF28,2019-08-23 04:00:06,Taylor Swift - It’s Nice To Have A Friend (Off...,1126069.0
5,Kwf7P2GNAVw,2019-08-23 04:00:05,Taylor Swift - Miss Americana & The Heartbreak...,2506256.0
8,tMoW5G5LU08,2019-08-23 04:00:05,Taylor Swift - Soon You’ll Get Better (Officia...,2274709.0
0,GTEFSuFfgnU,2019-08-23 04:00:03,Taylor Swift - Death By A Thousand Cuts (Offic...,1213049.0
6,VikHHWrgb4Y,2019-08-23 04:00:03,Taylor Swift - Cornelia Street (Official Audio),2644848.0
4,VsKoOH6DVys,2019-08-23 04:00:03,Taylor Swift - London Boy (Official Audio),2406202.0


In [17]:
df_count_date = pd.merge(df_count, df_date, how='inner', on='videoId')
df_count_date.to_excel('c:\\aab\\data\\taylor_swift.xlsx',encoding='utf-8')
songs = df_count_date.sort_values(by=['viewCount'],ascending=[False]).head(11)
songs.videoId, songs.title

(0     nfWlot6h_JM
 1     e-ORhEE9VVg
 2     QcIy9NiNbmo
 3     3tmd-ClpJxA
 4     VuNIsY6JdUw
 5     IdneKLhsWOQ
 6     -CmadmM5cOk
 7     WA4iX5D9Z64
 8     7F37r50VUTQ
 9     AgFeZr5ptV8
 10    8xg3vE8Ie_E
 Name: videoId, dtype: object,
 0                           Taylor Swift - Shake It Off
 1                            Taylor Swift - Blank Space
 2           Taylor Swift - Bad Blood ft. Kendrick Lamar
 3               Taylor Swift - Look What You Made Me Do
 4                     Taylor Swift - You Belong With Me
 5                         Taylor Swift - Wildest Dreams
 6                                  Taylor Swift - Style
 7     Taylor Swift - We Are Never Ever Getting Back ...
 8     ZAYN, Taylor Swift - I Don’t Wanna Live Foreve...
 9                                     Taylor Swift - 22
 10                            Taylor Swift - Love Story
 Name: title, dtype: object)

In [18]:
artist = 'Taylor Swift'
print(artist)
filter = songs[songs['title'].str.contains(artist)]
filter

Taylor Swift


Unnamed: 0,videoId,viewCount,publishedAt,title
0,nfWlot6h_JM,2816219000.0,2014-08-18 21:18:00,Taylor Swift - Shake It Off
1,e-ORhEE9VVg,2488669000.0,2014-11-10 17:05:44,Taylor Swift - Blank Space
2,QcIy9NiNbmo,1311326000.0,2015-05-18 04:00:00,Taylor Swift - Bad Blood ft. Kendrick Lamar
3,3tmd-ClpJxA,1086151000.0,2017-08-28 00:30:22,Taylor Swift - Look What You Made Me Do
4,VuNIsY6JdUw,984083400.0,2009-06-16 22:02:30,Taylor Swift - You Belong With Me
5,IdneKLhsWOQ,667058100.0,2015-08-31 00:55:00,Taylor Swift - Wildest Dreams
6,-CmadmM5cOk,598828700.0,2015-02-13 13:20:01,Taylor Swift - Style
7,WA4iX5D9Z64,581607000.0,2012-09-01 00:00:08,Taylor Swift - We Are Never Ever Getting Back ...
8,7F37r50VUTQ,562530400.0,2017-01-27 05:00:30,"ZAYN, Taylor Swift - I Don’t Wanna Live Foreve..."
9,AgFeZr5ptV8,519381200.0,2013-03-13 15:25:06,Taylor Swift - 22


In [19]:
sr = filter['videoId']
sr.shape

(11,)

In [20]:
vids= sr.values.tolist()
vids

['nfWlot6h_JM',
 'e-ORhEE9VVg',
 'QcIy9NiNbmo',
 '3tmd-ClpJxA',
 'VuNIsY6JdUw',
 'IdneKLhsWOQ',
 '-CmadmM5cOk',
 'WA4iX5D9Z64',
 '7F37r50VUTQ',
 'AgFeZr5ptV8',
 '8xg3vE8Ie_E']