In [2]:
#%load_ext nb_black
import pandas as pd
from datetime import datetime, timedelta
#from google.oauth2 import service_account
from apiclient.discovery import build

YOUTUBE_DEVELOPER_KEY = "AIzaSyBYOWoFmf3cG5Ez653Qdmw9xHmchEMz4Ys"
youtube = build("youtube", "v3", developerKey=YOUTUBE_DEVELOPER_KEY)

In [3]:
def get_channel(channel_name):
    return (
        youtube.search()
        .list(q=channel_name, type="channel", part="id,snippet")
        .execute()["items"][0]
    )


def get_videos(channel_id, part="id,snippet", limit=10):
    res = youtube.channels().list(id=channel_id, part="contentDetails").execute()
    playlist_id = res["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]

    videos = []
    next_page_token = None

    while 1:
        res = (
            youtube.playlistItems()
            .list(
                playlistId=playlist_id,
                part=part,
                maxResults=min(limit, 50),
                pageToken=next_page_token,
            )
            .execute()
        )
        videos += res["items"]
        next_page_token = res.get("nextPageToken")

        if next_page_token is None or len(videos) >= limit:
            break

    return videos


def get_videos_stats(video_ids):
    stats = []
    for i in range(0, len(video_ids), 50):
        res = (
            youtube.videos()
            .list(id=",".join(video_ids[i : i + 50]), part="statistics")
            .execute()
        )
        stats += res["items"]

    return stats


def parse_count(video):
    return video["id"], video["statistics"]["viewCount"]


def parse_publish_date(video):
    return (
        video["snippet"]["resourceId"]["videoId"],
        datetime.strptime(video["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%S.000Z"),
        video["snippet"]["title"],
    )

In [4]:
channel_name = "SVansay"
channel_id = get_channel(channel_name)["id"]["channelId"]
channel_id

'UC3KdZe9VSKpEZZELEETtEzQ'

In [5]:
videos = get_videos(channel_id, limit=6500)
videos

[{'kind': 'youtube#playlistItem',
  'etag': 'JfE6cvKOtpfJeHw1MnHHwWYjI8w',
  'id': 'VVUzS2RaZTlWU0twRVpaRUxFRVR0RXpRLlYwYVNPOW1KQXJj',
  'snippet': {'publishedAt': '2022-01-23T00:26:45Z',
   'channelId': 'UC3KdZe9VSKpEZZELEETtEzQ',
   'title': 'Down Home  (1963)  -  RICKY NELSON  -  Lyrics',
   'description': "Down Home  (1963)  -  RICKY NELSON  -  Lyrics\nDown home, oh, down home\nThere used to be rivers and trees\nFresh bread every single morning\nAnd sweet magnolia in the breeze\n\nOh, fishing lines and young dreams\nOh I hear them calling to me\nBut there's no way to get down home\n'Cause down home's just a memory\n\nWish I could leave this big town\nCity living ain't living to me\nBut there's no way to get down home\nNo you can't retrieve it\n'Cause once you leave it\n\nOh, down home's just a memory\nDown home\nOh, down home \n\nSVansay (Vanhxay)",
   'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/V0aSO9mJArc/default.jpg',
     'width': 120,
     'height': 90},
    'med

In [6]:
video_ids = list(map(lambda x: x["snippet"]["resourceId"]["videoId"], videos))
len(video_ids)

281

In [7]:
stats = get_videos_stats(video_ids)
len(stats)

281

In [8]:
most_viewed = sorted(
    stats, key=lambda x: int(x["statistics"]["viewCount"]), reverse=True
)

In [9]:
counts = [parse_count(video) for video in most_viewed]
len(counts)

281

In [10]:
df_count = pd.DataFrame(data=counts, columns=["videoId", "viewCount"])
df_count.head()

Unnamed: 0,videoId,viewCount
0,9IWxocGm21U,14441427
1,5GWDgirgsq4,14427359
2,AlSjx6F5Pl8,11303578
3,IVxFbpMhn64,9255196
4,vaX0iqyzK7Q,8836617


In [11]:
df_count["viewCount"] = df_count["viewCount"].astype("float")

In [12]:
df_count.sort_values(by=["viewCount"], ascending=[False]).head()

Unnamed: 0,videoId,viewCount
0,9IWxocGm21U,14441427.0
1,5GWDgirgsq4,14427359.0
2,AlSjx6F5Pl8,11303578.0
3,IVxFbpMhn64,9255196.0
4,vaX0iqyzK7Q,8836617.0


In [13]:
publish_dates = [parse_publish_date(video) for video in videos]
len(publish_dates)

ValueError: time data '2022-01-23T00:26:45Z' does not match format '%Y-%m-%dT%H:%M:%S.000Z'

In [14]:
df_date = pd.DataFrame(data=publish_dates, columns=["videoId", "publishedAt", "title"])
df_date.head()

NameError: name 'publish_dates' is not defined

In [14]:
dfd = pd.merge(df_date, df_count, on="videoId", how="inner")
dfd.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 244 entries, 0 to 243
Data columns (total 4 columns):
videoId        244 non-null object
publishedAt    244 non-null datetime64[ns]
title          244 non-null object
viewCount      244 non-null float64
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 9.5+ KB


In [15]:
dfd.sort_values(by=["publishedAt"], ascending=[False]).head()

Unnamed: 0,videoId,publishedAt,title,viewCount
0,Z5pfG2KXz4o,2019-12-24 18:19:29,Tequila (1961) - THE VENTURES - Music (G...,1335.0
1,c3iSNTiBq8Q,2019-12-21 18:43:29,Feelings (1978) - PERRY COMO - Lyrics,2833.0
2,JUqPXXCUOns,2019-11-01 22:59:31,Where Is Your Heart (1961) - CONNIE FRANCIS...,14553.0
3,Ud4OK1lwt50,2019-10-21 09:33:11,Unchained Melody (1965) - RIGHTEOUS BROTHER...,17514.0
4,2eXDr8KIM64,2019-10-18 22:51:00,Flaming Star (1960) - ELVIS PRESLEY - Lyrics,7502.0


In [16]:
df_count_date = pd.merge(df_count, df_date, how="inner", on="videoId")
# df_count_date.to_excel('c:\\aab\\data\\svansay.xlsx',encoding='utf-8')
songs = df_count_date.sort_values(by=["viewCount"], ascending=[False])
songs.title.head(36)

0     Tennessee Waltz ( 1959 ) - CONNIE FRANCIS - Ly...
1                               Aubrey - BREAD - Lyrics
2     Cotton Fields (1969)  -  CREEDENCE CLEARWATER ...
3     Sad Movies ( Make me cry ) - SUE THOMPSON - Wi...
4     The End Of The World - SKEETER DAVIS - With ly...
5     The Way It Used To Be ( 1969 ) - ENGELBERT HUM...
6     Judy Judy Judy ( 1963 ) - JOHNNY TILLOTSON - L...
7      Evergreen Tree ( 1960 ) - CLIFF RICHARD - Lyrics
8                   Diana ( 1957 ) - PAUL ANKA - Lyrics
9     Why Do I Love You So ( 1960 ) - JOHNNY TILLOTS...
10    You Don't Have To Say You Love Me ( 1966 ) - D...
11    Killing Me Softly With Her Song ( 1973 ) - PER...
12             More Than I Can Say - LEO SAYER - Lyrics
13    Oh! Suzanna ( Rare ) ( 1848's Cover by CONNIE ...
14           It's Impossible - PERRY COMO - With lyrics
15    No More ( Another version. Rare ) 1973 - ELVIS...
16    Summer Kisses Winter Tears ( 1960 ) - ELVIS PR...
17      Rhythm Of The Rain - THE CASCADES - With

In [18]:
artist = 'THE CASCADES'
print(artist)
filter = songs[songs['title'].str.contains(artist)]
filter.head(13)

THE CASCADES


Unnamed: 0,videoId,viewCount,publishedAt,title
17,pt57gA1_W7c,1438498.0,2014-04-19 07:43:04,Rhythm Of The Rain - THE CASCADES - With lyrics
37,ekYqiZvfi90,514870.0,2015-02-11 12:08:06,Dreamin' ( Rare ) 1960s - THE CASCADES - Lyrics


In [25]:
artist = 'Engelbert Humperdinck'
print(artist)
filter2 = songs[songs['title'].str.contains(artist)]
filter2.head(13)

Engelbert Humperdinck


Unnamed: 0,videoId,viewCount,publishedAt,title
89,EAM2ajaMx3I,95078.0,2017-02-23 23:36:57,There Goes My Everything ( 1967 ) - Engelbert...
211,4HUcr1AEHms,9797.0,2019-08-16 09:36:12,A Time For Us (Love Theme from Romeo & Juliet)...


In [26]:
sr = filter['videoId'].head(13)
sr.shape

(7,)

In [27]:
vids= sr.values.tolist()
vids

['pKqfYRpc_9U',
 'K81MZplOnfU',
 'u5KV1_lFwQo',
 '036Vm1d2FEI',
 'KsdTvFpggG4',
 'NjzxRRA1_4s',
 'B1fmyPVR078']

In [21]:
filter.to_excel('c:\\aab\\data\\connie_francis.xlsx',encoding='utf-8')