In [None]:

import json
from googleapiclient.discovery import build
import pandas as pd
from google.colab import files


In [None]:
youtube_apikey = # please insert your youtube api key here

# sample YouTube Channels
veritasium_channel_id = 'UCHnyfMqiRRG1u-2MsSQLbXA'
vox_channel_id = 'UCLXo7UDZvByw2ixzpQCufnA'


channel_ids = [veritasium_channel_id, vox_channel_id,
              # insert channel_id here
              ]


In [None]:
  api_service_name = "youtube"
  api_version = "v3"

    # Get credentials and create an API client
  youtube = build(
        api_service_name, api_version, developerKey=youtube_apikey)


## **Custom Function for getting Youtube Channel Info**

In [None]:
  # Custom Function to extract Youtube Channel's Information such as the following: channel_ID, channel_Name, subscribers, custom_url, total_Videos, country, v
  def extract_yt_channel_info(youtube, channel_ids):

     """
  Get the YouTube's channel information such as the following:
    channel_ID, channel_Name, subscribers, custom_url, total_Videos, country, etc.

  Args:
      youtube (object): A Resource object with methods for interacting with the Youtube Data API.
      channel_ids (list): A list containing the unique YouTube Channel's ids provided by YouTube

  Returns:
      A list of dictionaries containing the Channel's information.

  Example:
      video_data = get_video_info('googleapiclient.discovery.Resource object at 0x7ef834a10ed0' , ['insert_list_of_unique_channel_ids'])

      returns all the channel's information of each channel_ids

      """

      channel_Data = []

      request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id= ','.join(channel_ids)
      )
      response = request.execute()

      for item in response['items']:
          info_to_keep = {
                          'snippet': ['title', 'customUrl'],
                          'statistics': ['subscriberCount', 'videoCount'],
                          }

          data = {}
          data['channel_ID'] = item['id']

          for k in info_to_keep.keys():
            for v in info_to_keep[k]:
              try:
                data[v] = item[k][v]
              except:
                data[v] = None

          data['playlist_ID'] = item['contentDetails']['relatedPlaylists']['uploads']

          channel_Data.append(data)

      return channel_Data



In [None]:
channel_info = extract_yt_channel_info(youtube, channel_ids)

channel_info_df = pd.DataFrame(channel_info)

print(channel_info_df)

                 channel_ID       title    customUrl subscriberCount  \
0  UCHnyfMqiRRG1u-2MsSQLbXA  Veritasium  @veritasium        17500000   
1  UCLXo7UDZvByw2ixzpQCufnA         Vox         @vox        12500000   

  videoCount               playlist_ID  
0        431  UUHnyfMqiRRG1u-2MsSQLbXA  
1       1788  UULXo7UDZvByw2ixzpQCufnA  


## **Get the playlist_ID of each Youtube Channel**


In [None]:

get_playlist_id = [item['playlist_ID'] for item in channel_info]

print(get_playlist_id)

['UUHnyfMqiRRG1u-2MsSQLbXA', 'UULXo7UDZvByw2ixzpQCufnA']


## **Function for Getting all Video_IDs in each YouTube Channel**


In [None]:
# Custom Function to get all video_Ids on a channel
def get_video_ID(youtube, playlist_ids):

    """
  Get each video's unique id

  Args:
      youtube (object): A Resource object with methods for interacting with the Youtube Data API.
      playlist_ids (list): A list containing the unique playlist ids provided by YouTube

  Returns:
      A list of dictionaries containing the video ids.

  Example:
      video_data = get_video_info('googleapiclient.discovery.Resource object at 0x7ef834a10ed0' , ['insert_list_unique_playlist_id'])

      returns all the video ids of each playlist_id

  """

    video_AllData = []
    for playlist_id in playlist_ids:
        request = youtube.playlistItems().list(
                part="contentDetails",
                playlistId=playlist_id,
                maxResults = 50
        )

        response = request.execute()

        for video in response['items']:
            data = {
                    'video_ID': video['contentDetails']['videoId'],
            }

            video_AllData.append(data)

        next_page_token = response.get('nextPageToken')

        while next_page_token is not None:
            request = youtube.playlistItems().list(
                part="contentDetails",
                playlistId=playlist_id,
                maxResults = 50,
                pageToken = next_page_token
            )

            response = request.execute()

            for video in response['items']:
                data = {
                    'video_ID': video['contentDetails']['videoId'],
                }

                video_AllData.append(data)

            next_page_token = response.get('nextPageToken')

    return video_AllData




In [None]:
video_ids = get_video_ID(youtube, get_playlist_id)


get_video_id_list = [video['video_ID'] for video in video_ids]

get_video_id_list_df = pd.DataFrame(get_video_id_list)

# check the df shape
print(get_video_id_list_df.shape)


(2214, 1)


## **Getting All video infos per Video_IDs**

In [None]:
# Custom Function to get all video infos on all videos per channel
def get_video_info(youtube, video_ids):

  """
  Get each video's data such as the following:
  title, published date, view count, like count, dislike count, favorite count, comment count, duration, tags, etc.

  Args:
      youtube (object): A Resource object with methods for interacting with the Youtube Data API.
      video_ids (list): A list containing the unique video ids provided by YouTube

  Returns:
      A list of dictionaries containing the video data.

  Example:
      video_data = get_video_info('googleapiclient.discovery.Resource object at 0x7ef834a10ed0' , ['uFGhya14xBdxMX3K'])

      returns the details such as the following:
      title, published date, view count, like count, dislike count, favorite count, comment count, duration, tags, etc.

      of the video with the video_ID 'uFGhya14xBdxMX3K'

  """

  video_all_data = []
  for video_id in video_ids:
      request = youtube.videos().list(
              part="snippet, contentDetails, statistics",
              id=video_id
          )

      response = request.execute()

      for item in response['items']:
                info_to_keep = {
                                'snippet': ['title', 'publishedAt'],
                                'statistics': ['viewCount', 'likeCount', 'dislikeCount', 'FavoriteCount', 'commentCount'],
                                'contentDetails': ['duration']
                                }

                data = {}
                data['video_ID'] = item['id']
                data['channel_ID'] = item['snippet'].get('channelId', None)


                for k in info_to_keep.keys():
                  for v in info_to_keep[k]:
                    try:
                      data[v] = item[k][v]
                    except:
                      data[v] = None

                data['list_of_tags'] = item['snippet'].get('tags', [])

                video_all_data.append(data)

  return video_all_data





In [None]:
video_data = get_video_info(youtube, get_video_id_list)

video_data_df = pd.DataFrame(video_data)

print(video_data_df.head())

      video_ID                channel_ID  \
0  qJZ1Ez28C-A  UCHnyfMqiRRG1u-2MsSQLbXA   
1  dFVrncgIvos  UCHnyfMqiRRG1u-2MsSQLbXA   
2  mqo1lQL59VI  UCHnyfMqiRRG1u-2MsSQLbXA   
3  P_fHJIYENdI  UCHnyfMqiRRG1u-2MsSQLbXA   
4  88bMVbx1dzM  UCHnyfMqiRRG1u-2MsSQLbXA   

                                               title           publishedAt  \
0  Something Strange Happens When You Trust Quant...  2025-03-05T02:22:56Z   
1  The Google Interview Question Everyone Gets Wrong  2025-02-18T16:03:35Z   
2               This Is The Perfect Bowling Strategy  2025-02-14T16:49:48Z   
3             The Most Useful Thing AI Has Ever Done  2025-02-10T14:03:41Z   
4                  What if you just keep zooming in?  2025-02-05T15:02:34Z   

  viewCount likeCount dislikeCount FavoriteCount commentCount  duration  \
0   7581678    252933         None          None        17072  PT33M37S   
1   6032233    166658         None          None        16825  PT20M13S   
2   1433424     69211         None       

In [None]:
# load the df into a CSV file
video_data_df.to_csv('video_data.csv', index=False)
channel_info_df.to_csv('channel_data.csv', index=False)

# load the df into aa Excel file
video_data_df.to_excel('video_data.xlsx', index=False)


In [None]:
# Download the dataframe as a CSV file
files.download('channel_data.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Download the dataframe as an Excel file
files.download('video_data.xlsx')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>