# YouTube Analysis

**Usage**

- First you need to activate YouTube API from the following [link](https://console.cloud.google.com/apis/library/youtube.googleapis.com?project=youtube-368817).

**Pricing**

Google calculates your quota usage by assigning a cost to each request.
Different types of operations have different quota costs.
For example:

- a read operation that retrieves a list of resources -- channels, videos, playlists -- usually costs 1 unit;
- awrite operation that creates, updates, or deletes a resource usually has costs 50 units;
- a search request costs 100 units;
- a video upload costs 1600 units.

Projects that enable the YouTube Data API have a default quota allocation of 10,000 units per day.
You can see your quota usage from on the [Quota](https://console.cloud.google.com/iam-admin/quotas?pli=1&project=google.com:api-project-314373636293) page.

**References**
- YouTube API https://developers.google.com/youtube/v3/getting-started
- API Reference https://developers.google.com/youtube/v3/docs

---
# Configuration

In [None]:
import pandas as pd
from googleapiclient.discovery import build
from IPython.display import JSON

In [None]:
key = 'AIzaSyCGjSnYHipdnF_luhUXb0TZ6n2lSk20iLc'

In [None]:
channel_id = 'UC0R7xhniDJ76wnFaParQzFQ'
channel_ids = [
    'UC0R7xhniDJ76wnFaParQzFQ',  # Domenico Greselin
    'UCay6iZuhXxp2EKYGalLo1bA'   # Marco Tomasin
]

---
# Functions

In [None]:
def get_channel_stats(youtube, channels):
    """
    Get basic stats for input channels.
    :param youtube: build object of Youtube API
    :param channels: list of channel IDs
    :return: pandas DataFrame with the channel statistics for each channel ID    
    """
    
    out = []
    
    request = youtube.channels().list(
        part='snippet,contentDetails,statistics',
        id=','.join(channel_ids)
    )
    response = request.execute()

    # loop through items
    for item in response['items']:
        data = {'channel_name': item['snippet']['title'],
                'description': item['snippet']['description'],
                'country': item['snippet']['country'],
                'subscribers': item['statistics']['subscriberCount'],
                'views': item['statistics']['viewCount'],
                'totalVideos': item['statistics']['videoCount'],
                'playlistId': item['contentDetails']['relatedPlaylists']['uploads']
        }
        out.append(data)
        
    return pd.DataFrame(out)


def get_channel_playlists(youtube, channels):
    """
    Get list of playlists for input channels.
    :param youtube: build object of Youtube API
    :param channels: list of channel IDs
    :return: pandas DataFrame with the playlists statistics for each channel ID    
    """
    
    out = []
    
    # loop through channel IDs
    for channel in channels:
        request = youtube.playlists().list(
            part='snippet,contentDetails',
            channelId=channel,
            maxResults=25
        )
        response = request.execute()
        
        # loop through items
        for item in response['items']:
            data = {
                'channel_id': item['snippet']['channelId'],
                'channel_title': item['snippet']['channelTitle'],
                'playlist_id': item['id'],
                'playlist_title': item['snippet']['title'],
                'dt_publication': item['snippet']['publishedAt'],
                'nr_videos': item['contentDetails']['itemCount'],
            }
        
            out.append(data)
        
    return pd.DataFrame(out)


def get_playlist_videos(youtube, playlists):
    """
    Get list of videos for input playlists.
    :param youtube: build object of Youtube API
    :param channels: list of playlists IDs
    :return: pandas DataFrame with the playlists stats for each playlist ID    
    """
    
    out = []
    
    # loop through playlist IDs
    for pl in playlists:
        request = youtube.playlistItems().list(
            part='snippet,contentDetails',
            playlistId=pl,
            maxResults=50
        )
        response = request.execute()
        
        # loop through items
        for item in response['items']:
            data = {
                'channel_id': item['snippet']['channelId'],
                'channel_title': item['snippet']['channelTitle'],
                'playlist_id': pl,
                'video_id': item['contentDetails']['videoId'],
                'video_title': item['snippet']['title'],
                'dt_publication': item['contentDetails']['videoPublishedAt'],
                'video_description': item['snippet']['description']
            }
        
            out.append(data)
        
    return pd.DataFrame(out)

def get_video_stats(youtube, videos):
    """
    Get basic stats for input videos.
    :param youtube: build object of Youtube API
    :param channels: list of video IDs
    :return: pandas DataFrame with the video statistics for each video ID    
    """
    
    out = []
    
    request = youtube.videos().list(
        part='snippet,contentDetails,statistics',
        id=','.join(videos)
    )
    response = request.execute()
        
    # loop through items
    for item in response['items']:
        data = {
            'channel_id': item['snippet']['channelId'],
            'channel_title': item['snippet']['channelTitle'],
            'video_id': item['id'],
            'video_title': item['snippet']['title'],
            'dt_publication': item['snippet']['publishedAt'],
            'video_description': item['snippet']['description'],
            'video_category': item['snippet']['categoryId'],
            'language': item['snippet']['defaultAudioLanguage'],
            'live_broadcast': item['snippet']['tags'],
            'tags': item['snippet']['liveBroadcastContent'],
            'duration': item['contentDetails']['duration'],
            'dimension': item['contentDetails']['dimension'],
            'definition': item['contentDetails']['definition'],
            'caption': item['contentDetails']['caption'],
            'licensed': item['contentDetails']['licensedContent'],
            'projection': item['contentDetails']['projection'],
            'views': item['statistics']['viewCount'],
            'likes': item['statistics']['likeCount'],
            'comments': item['statistics']['commentCount'],
            'favorites': item['statistics']['favoriteCount'],
        }

        out.append(data)
        
    return pd.DataFrame(out)

---
# Main

In [None]:
api_service_name = 'youtube'
api_version = 'v3'

youtube = build(api_service_name, api_version, developerKey=key)

In [None]:
# # list channel statistics
# request = youtube.channels().list(
#     part='snippet,contentDetails,statistics',
#     id=channel_ids
# )
# response = request.execute()
# JSON(response)

get_channel_stats(youtube, channel_ids)

In [None]:
#  # list playlists
# request = youtube.playlists().list(
#     part='snippet, contentDetails',
#     channelId=channel_id,
#     maxResults=25
# )
# response = request.execute()
# JSON(response)

get_channel_playlists(youtube, channel_ids)

In [None]:
# # list playlist items
# request = youtube.playlistItems().list(
#     part='snippet, contentDetails',
#     playlistId='PLIczvbSeqUR8OUZP0JkSbCBPWtF5ZL3mY',
#     maxResults=25
# )
# response = request.execute()
# JSON(response)

get_playlist_videos(youtube, ['PLIczvbSeqUR8OUZP0JkSbCBPWtF5ZL3mY'])

In [None]:
# # get stats by video
# request = youtube.videos().list(
#         part='snippet,contentDetails,statistics',
#         id=','.join(['JGly94Fa_SE','Ks-_Mh1QhMc'])
#     )
# response = request.execute()
# JSON(response)

get_video_stats(youtube, ['JGly94Fa_SE'])