In [1]:
%load_ext nb_black
import pandas as pd
from datetime import datetime, timedelta
from apiclient.discovery import build

YOUTUBE_DEVELOPER_KEY = "AIzaSyBYOWoFmf3cG5Ez653Qdmw9xHmchEMz4Ys"
youtube = build("youtube", "v3", developerKey=YOUTUBE_DEVELOPER_KEY)

<IPython.core.display.Javascript object>

In [2]:
def get_channel(channel_name):
    return (
        youtube.search()
        .list(q=channel_name, type="channel", part="id,snippet")
        .execute()["items"][0]
    )


def get_videos(channel_id, part="id,snippet", limit=10):
    res = youtube.channels().list(id=channel_id, part="contentDetails").execute()
    playlist_id = res["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]

    videos = []
    next_page_token = None

    while 1:
        res = (
            youtube.playlistItems()
            .list(
                playlistId=playlist_id,
                part=part,
                maxResults=min(limit, 50),
                pageToken=next_page_token,
            )
            .execute()
        )
        videos += res["items"]
        next_page_token = res.get("nextPageToken")

        if next_page_token is None or len(videos) >= limit:
            break

    return videos


def get_videos_stats(video_ids):
    stats = []
    for i in range(0, len(video_ids), 50):
        res = (
            youtube.videos()
            .list(id=",".join(video_ids[i : i + 50]), part="statistics")
            .execute()
        )
        stats += res["items"]

    return stats


def parse_count(video):
    return video["id"], video["statistics"]["viewCount"]


def parse_publish_date(video):
    return (
        video["snippet"]["resourceId"]["videoId"],
        datetime.strptime(video["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z"),
        video["snippet"]["title"],
    )

<IPython.core.display.Javascript object>

In [3]:
channel_name = "IIIlllIII"
channel_id = get_channel(channel_name)["id"]["channelId"]
channel_id

'UCIXDyPryvE5t8ihkSoAi5Jw'

<IPython.core.display.Javascript object>

In [4]:
videos = get_videos(channel_id, limit=6500)
videos

[{'kind': 'youtube#playlistItem',
  'etag': '"j6xRRd8dTPVVptg711_CSPADRfg/IggIBHPu25AA2xprTb4Zo5bwVJE"',
  'id': 'VVVJWER5UHJ5dkU1dDhpaGtTb0FpNUp3LlR3QWpOS1RyOHJZ',
  'snippet': {'publishedAt': '2013-06-10T12:13:47.000Z',
   'channelId': 'UCIXDyPryvE5t8ihkSoAi5Jw',
   'title': 'The Beatles -  The Long and Winding Road (HQ Sound)',
   'description': "LYRICS:\n\nThe long and winding road\nThat leads to your door\nWill never disappear\nI've seen that road before\nIt always leads me here\nLead me to your door\n\nThe wild and windy night\nThat the rain washed away\nHas left a pool of tears\nCrying for the day\nWhy leave me standing here?\nLet me know the way\n\nMany times I've been alone\nAnd many times I've cried\nAnyway, you'll never know\nThe many ways I've tried\n\nAnd still they lead me back\nTo the long, winding road\nYou left me standing here\nA long, long time ago\nDon't leave me waiting here\nLead me to your door\n\nBut still they lead me back\nTo the long, winding road\nYou left m

<IPython.core.display.Javascript object>

In [5]:
video_ids = list(map(lambda x: x["snippet"]["resourceId"]["videoId"], videos))
len(video_ids)

27

<IPython.core.display.Javascript object>

In [6]:
stats = get_videos_stats(video_ids)
len(stats)

27

<IPython.core.display.Javascript object>

In [7]:
most_viewed = sorted(
    stats, key=lambda x: int(x["statistics"]["viewCount"]), reverse=True
)

<IPython.core.display.Javascript object>

In [8]:
counts = [parse_count(video) for video in most_viewed]
len(counts)

27

<IPython.core.display.Javascript object>

In [9]:
df_count = pd.DataFrame(data=counts, columns=["videoId", "viewCount"])
df_count.head()

Unnamed: 0,videoId,viewCount
0,qhtO1M1eVS0,2238520
1,qzRRAgAjGTo,2005451
2,kl2pFgEywng,1697712
3,33iz-1Mfus4,1323780
4,ZRtw61AVapM,949389


<IPython.core.display.Javascript object>

In [10]:
df_count["viewCount"] = df_count["viewCount"].astype("float")

<IPython.core.display.Javascript object>

In [11]:
df_count.sort_values(by=["viewCount"], ascending=[False]).head()

Unnamed: 0,videoId,viewCount
0,qhtO1M1eVS0,2238520.0
1,qzRRAgAjGTo,2005451.0
2,kl2pFgEywng,1697712.0
3,33iz-1Mfus4,1323780.0
4,ZRtw61AVapM,949389.0


<IPython.core.display.Javascript object>

In [12]:
publish_dates = [parse_publish_date(video) for video in videos]
len(publish_dates)

27

<IPython.core.display.Javascript object>

In [13]:
df_date = pd.DataFrame(data=publish_dates, columns=["videoId", "publishedAt", "title"])
df_date.head()

Unnamed: 0,videoId,publishedAt,title
0,TwAjNKTr8rY,2013-06-10 12:13:47,The Beatles - The Long and Winding Road (HQ S...
1,qhtO1M1eVS0,2013-06-10 12:12:17,The Beatles - Let It Be (HQ Sound)
2,0usZlmrQ7Ow,2013-06-10 12:11:30,The Beatles - Something (HQ Sound)
3,UCyMx9Cmkxs,2013-06-10 12:09:23,The Beatles - The Ballad of John and Yoko (HQ ...
4,qzRRAgAjGTo,2013-06-10 12:09:17,The Beatles - Get Back (HQ Sound)


<IPython.core.display.Javascript object>

In [14]:
dfd = pd.merge(df_date, df_count, on="videoId", how="inner")
dfd.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27 entries, 0 to 26
Data columns (total 4 columns):
videoId        27 non-null object
publishedAt    27 non-null datetime64[ns]
title          27 non-null object
viewCount      27 non-null float64
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 1.1+ KB


<IPython.core.display.Javascript object>

In [15]:
dfd.sort_values(by=["publishedAt"], ascending=[False]).head()

Unnamed: 0,videoId,publishedAt,title,viewCount
0,TwAjNKTr8rY,2013-06-10 12:13:47,The Beatles - The Long and Winding Road (HQ S...,13297.0
1,qhtO1M1eVS0,2013-06-10 12:12:17,The Beatles - Let It Be (HQ Sound),2238520.0
2,0usZlmrQ7Ow,2013-06-10 12:11:30,The Beatles - Something (HQ Sound),14914.0
3,UCyMx9Cmkxs,2013-06-10 12:09:23,The Beatles - The Ballad of John and Yoko (HQ ...,108288.0
4,qzRRAgAjGTo,2013-06-10 12:09:17,The Beatles - Get Back (HQ Sound),2005451.0


<IPython.core.display.Javascript object>

In [18]:
df_count_date = pd.merge(df_count, df_date, how="inner", on="videoId")
# df_count_date.to_excel('c:\\aab\\data\\eagle_rock.xlsx',encoding='utf-8')
songs = df_count_date.sort_values(by=["viewCount"], ascending=[False]).head(27)
songs

Unnamed: 0,videoId,viewCount,publishedAt,title
0,qhtO1M1eVS0,2238520.0,2013-06-10 12:12:17,The Beatles - Let It Be (HQ Sound)
1,qzRRAgAjGTo,2005451.0,2013-06-10 12:09:17,The Beatles - Get Back (HQ Sound)
2,kl2pFgEywng,1697712.0,2013-06-10 12:08:37,The Beatles - Hey Jude (HQ Sound)
3,33iz-1Mfus4,1323780.0,2013-06-10 11:17:10,The Beatles - We Can Work It Out (HQ Sound)
4,ZRtw61AVapM,949389.0,2013-06-10 10:56:46,The Beatles - Can't Buy Me Love (HQ Sound)
5,nN9PMTW9QOc,939626.0,2013-06-10 11:40:10,"the Beatles - Hello, Goodbye (HQ Sound)"
6,fd93wXvBl7s,847247.0,2013-06-10 11:33:07,The Beatles - All You Need Is Love (HQ Sound)
7,mgj6AcQZeQA,808516.0,2013-06-10 10:54:56,The Beatles - I Want To Hold Your Hand (HQ Sound)
8,QDXPTPo2oZU,767511.0,2013-06-10 11:29:41,The Beatles - Penny Lane (HQ Sound)
9,IrCAhi4t5H8,706369.0,2013-06-10 11:16:19,The Beatles - Day Tripper (HQ Sound)


<IPython.core.display.Javascript object>

In [21]:
sr = songs["videoId"]
sr.shape

(12,)

<IPython.core.display.Javascript object>

In [22]:
vids = sr.values.tolist()
vids

['qhtO1M1eVS0',
 'qzRRAgAjGTo',
 'kl2pFgEywng',
 '33iz-1Mfus4',
 'ZRtw61AVapM',
 'nN9PMTW9QOc',
 'fd93wXvBl7s',
 'mgj6AcQZeQA',
 'QDXPTPo2oZU',
 'IrCAhi4t5H8',
 'Q2f6QGqyWzE',
 'YHK6x85G5O0']

<IPython.core.display.Javascript object>