Terminal:

```
pip install google-api-python-client

pip install --upgrade google-api-python-client
```

In [72]:
from googleapiclient.discovery import build
from IPython.display import JSON

from pprint import pprint
import pandas as pd
import json

In [73]:
credentials_file_path = "../credentials.json"

# open the file and load the data into a variable
with open(credentials_file_path, "r") as f:
    credentials = json.load(f)


In [74]:
youtube = build('youtube', 'v3', developerKey=credentials['youtube_api_key'])

In [75]:
def get_channel_stats(youtube, channel_ids):
    """
    Get channel statistics: title, subscriber count, view count, video count, upload playlist
    Params:
    
    youtube: the build object from googleapiclient.discovery
    channels_ids: list of channel IDs
    
    Returns:
    Dataframe containing the channel statistics for all channels in the provided list: title, subscriber count, view count, video count, upload playlist
    
    """
    all_data = []
    request = youtube.channels().list(
                part='snippet,contentDetails,statistics',
                id=','.join(channel_ids))
    response = request.execute() 
    
    for i in range(len(response['items'])):
        data = dict(channelName = response['items'][i]['snippet']['title'],
                    subscribers = response['items'][i]['statistics']['subscriberCount'],
                    views = response['items'][i]['statistics']['viewCount'],
                    totalVideos = response['items'][i]['statistics']['videoCount'],
                    playlistId = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        all_data.append(data)
    
    return pd.DataFrame(all_data)

def get_video_ids(youtube, playlist_id):
    """
    Get list of video IDs of all videos in the given playlist
    Params:
    
    youtube: the build object from googleapiclient.discovery
    playlist_id: playlist ID of the channel
    
    Returns:
    List of video IDs of all videos in the playlist
    
    """
    
    request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId = playlist_id,
                maxResults = 50)
    response = request.execute()
    
    video_ids = []
    
    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')
    more_pages = True
    
    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.playlistItems().list(
                        part='contentDetails',
                        playlistId = playlist_id,
                        maxResults = 50,
                        pageToken = next_page_token)
            response = request.execute()
    
            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])
            
            next_page_token = response.get('nextPageToken')
        
    return video_ids

def get_video_details(youtube, video_ids):
    """
    Get video statistics of all videos with given IDs
    Params:
    
    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs
    
    Returns:
    Dataframe with statistics of videos, i.e.:
        'channelTitle', 'title', 'description', 'tags', 'publishedAt'
        'viewCount', 'likeCount', 'favoriteCount', 'commentCount'
        'duration', 'definition', 'caption'
    """
        
    all_video_info = []
    
    for i in range(0, len(video_ids), 50):
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50])
        )
        response = request.execute() 

        for video in response['items']:
            stats_to_keep = {'snippet': ['channelTitle', 'title', 'description', 'tags', 'publishedAt'],
                             'statistics': ['viewCount', 'likeCount', 'favouriteCount', 'commentCount'],
                             'contentDetails': ['duration', 'definition', 'caption']
                            }
            video_info = {}
            video_info['video_id'] = video['id']

            for k in stats_to_keep.keys():
                for v in stats_to_keep[k]:
                    try:
                        video_info[v] = video[k][v]
                    except:
                        video_info[v] = None

            all_video_info.append(video_info)
            
    return pd.DataFrame(all_video_info)

def get_comments_in_videos(youtube, video_ids):
    """
    Get top level comments as text from all videos with given IDs (only the first 10 comments due to quote limit of Youtube API)
    Params:
    
    youtube: the build object from googleapiclient.discovery
    video_ids: list of video IDs
    
    Returns:
    Dataframe with video IDs and associated top level comment in text.
    
    """
    all_comments = []
    
    for video_id in video_ids:
        try:   
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id
            )
            response = request.execute()
        
            comments_in_video = [comment['snippet']['topLevelComment']['snippet']['textOriginal'] for comment in response['items'][0:10]]
            comments_in_video_info = {'video_id': video_id, 'comments': comments_in_video}

            all_comments.append(comments_in_video_info)
            
        except: 
            # When error occurs - most likely because comments are disabled on a video
            print('Could not get comments for video ' + video_id)
        
    return pd.DataFrame(all_comments)  


In [76]:
# Create the request object
data = []

search_response = youtube.search().list(
    part="snippet",
    maxResults=100,
    q="official music video",
    regionCode="SG",
    type="video",
    videoCategoryId="10",
    order="viewCount",
    fields="items(id/videoId,snippet(channelId,channelTitle,description,title)),nextPageToken,pageInfo,prevPageToken,regionCode"
)

# Execute the request and get the response
response = search_response.execute()

# Extract video IDs from the response
video_ids = [item['id']['videoId'] for item in response['items']]

for item in response['items']:
    video_id = item['id']['videoId']
    title = item['snippet']['title']
    channel_id = item['snippet']['channelId']
    channel_title = item['snippet']['channelTitle']
    description = item['snippet']['description']

    data.append({
        'video_id': video_id,
        'title': title,
        'channel_id': channel_id,
        'channel_title': channel_title,
        'description': description
    })

df = pd.DataFrame(data)
df.to_csv('music_videos_testdata.csv', index = False)

df

Unnamed: 0,video_id,title,channel_id,channel_title,description
0,kJQP7kiw5Fk,Luis Fonsi - Despacito ft. Daddy Yankee,UCLp8RBhQHu9wSsq62j_Md6A,LuisFonsiVEVO,Despacito” disponible ya en todas las platafor...
1,JGwWNGJdvx8,Ed Sheeran - Shape of You (Official Music Video),UC0C-w0YjGpqDXGB8IHb662A,Ed Sheeran,The official music video for Ed Sheeran - Shap...
2,RgKAFK5djSk,Wiz Khalifa - See You Again ft. Charlie Puth [...,UCLxOm-UQ1gtyQgfz8Rn34iQ,Wiz Khalifa Music,Download the new Furious 7 Soundtrack Deluxe V...
3,OPf0YbXqDm0,Mark Ronson - Uptown Funk (Official Video) ft....,UCmfFGTSsfJVu6CGvL8r75qg,MarkRonsonVEVO,Official Video for Uptown Funk by Mark Ronson ...
4,09R8_2nJtjg,Maroon 5 - Sugar (Official Music Video),UCN1hnUccO4FD5WfM7ithXaw,Maroon5VEVO,Buy Sugar on iTunes: http://smarturl.it/M5V Si...
5,hT_nvWreIhg,OneRepublic - Counting Stars,UCQ5kHOKpF3-1_UCKaqXARRg,OneRepublicVEVO,Stream & Download OneRepublic's latest album “...
6,CevxZvSJLk8,Katy Perry - Roar,UC-8Q-hLdECwQmaWNwXitYDw,KatyPerryVEVO,"Katy Perry's new album ""Smile"" out August 28 -..."
7,pRpeEdMmmQ0,Shakira - Waka Waka (This Time for Africa) (Th...,UCGnjeahCJW1AF34HBmQTJ-Q,shakiraVEVO,"Watch the official music video for ""Waka Waka ..."
8,lp-EO5I60KA,Ed Sheeran - Thinking Out Loud (Official Music...,UC0C-w0YjGpqDXGB8IHb662A,Ed Sheeran,The official music video for Ed Sheeran - Thin...
9,0KSOMA3QBU0,Katy Perry - Dark Horse ft. Juicy J,UC-8Q-hLdECwQmaWNwXitYDw,KatyPerryVEVO,Listen to Katy's new song “Smile”: https://kat...


In [81]:
### getting video statistics (likes, views, comments etc.)

video_stats = []
video_requests = youtube.videos().list(
    part = "snippet, statistics, contentDetails",
    id = video_ids
)

video_responses = video_requests.execute()

pprint(video_responses)


{'etag': 'uySQVLrPqUrocW-Uw2Kly0sMTew',
 'items': [{'contentDetails': {'caption': 'true',
                               'contentRating': {},
                               'definition': 'hd',
                               'dimension': '2d',
                               'duration': 'PT4M42S',
                               'licensedContent': True,
                               'projection': 'rectangular'},
            'etag': '7BrvFzp-L4BWKgicUhfSofib7iw',
            'id': 'kJQP7kiw5Fk',
            'kind': 'youtube#video',
            'snippet': {'categoryId': '10',
                        'channelId': 'UCLp8RBhQHu9wSsq62j_Md6A',
                        'channelTitle': 'LuisFonsiVEVO',
                        'defaultAudioLanguage': 'en-US',
                        'description': '“Despacito” disponible ya en todas las '
                                       'plataformas digitales: '
                                       'https://UMLE.lnk.to/DOoUzFp \n'
                        

In [86]:
like_count = video_responses['items'][0]['statistics']["likeCount"]
view_count = video_responses['items'][0]['statistics']["viewCount"]
like_count, view_count

('52525215', '8325786249')

# @Ruikai, do the same as above to add the like and view count to the dataframe of songs(same as the for loop above). then u can take that and start analysing with the lyrics.