In [53]:
import os
import pandas as pd

from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import TranscriptsDisabled
from dotenv import load_dotenv


In [32]:
# Get api keys
load_dotenv()
YOUTUBE_APIKEY = os.environ['YOUTUBE_APIKEY']
OPENAI_APIKEY = os.environ['OPENAI_APIKEY']

In [33]:
# Initialiaze youtube api client
youtube = build('youtube', 'v3', developerKey=YOUTUBE_APIKEY)

In [43]:
# Let's see what's available in the channels list api
#https://developers.google.com/youtube/v3/docs/channels/list
username = 'NutritionFactsOrg'
request = youtube.channels().list(
        part="id",
        forUsername=username
    )
response = request.execute()
response

{'kind': 'youtube#channelListResponse',
 'etag': 'y2wbGNYxO1mQLSW8NgLvKwMoFvQ',
 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5},
 'items': [{'kind': 'youtube#channel',
   'etag': 'QrBFR4pa4iZJUXj0kXL8bnskofE',
   'id': 'UCddn8dUxYdgJz3Qr5mjADtA'}]}

In [44]:
# Let's see what's available in the channels list api
username = 'NutritionFactsOrg'
request = youtube.channels().list(
        part="statistics",
        forUsername=username
    )
response = request.execute()
response

{'kind': 'youtube#channelListResponse',
 'etag': 'Nd35sJutPw9BcdfgNIQPGbqIVL8',
 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5},
 'items': [{'kind': 'youtube#channel',
   'etag': 'p4XTTWuzBdOOveeFuWt47K3KG8o',
   'id': 'UCddn8dUxYdgJz3Qr5mjADtA',
   'statistics': {'viewCount': '239767241',
    'subscriberCount': '1110000',
    'hiddenSubscriberCount': False,
    'videoCount': '2596'}}]}

In [45]:
# Get youtube channel ID with some better code!

def get_channel_id_by_username(username):
    request = youtube.channels().list(
        part='id',
        forUsername=username
    )
    response = request.execute()

    if response['items']:
        # Assuming the first item is the one we need
        return response['items'][0]['id']
    else:
        return None

# The username portion from the channel URL (for https://www.youtube.com/@NutritionFactsOrg, it's 'NutritionFactsOrg')
username = 'NutritionFactsOrg'

channel_id = get_channel_id_by_username(username)
if channel_id:
    print(f'Channel ID for {username}: {channel_id}')
else:
    print('Channel ID not found.')



Channel ID for NutritionFactsOrg: UCddn8dUxYdgJz3Qr5mjADtA


In [46]:
# How many videos does the channel have?

def get_channel_video_count(channel_id):
    request = youtube.channels().list(
        part='statistics',
        id=channel_id
    )
    response = request.execute()
    
    if response['items']:
        # Assuming the channel is found and the first item is the target channel
        return response['items'][0]['statistics']['videoCount']
    else:
        return None

video_count = get_channel_video_count(channel_id)
if video_count is not None:
    print(f"The channel has {video_count} videos.")
else:
    print("Channel not found or error occurred.")

The channel has 2596 videos.


In [57]:
# Get info on videos in the channel
#https://developers.google.com/youtube/v3/docs/search

def get_channel_video_data(channel_id, max_results=50):
    #Initialize list to hold video data
    videos = []

    # Fetch videos from a specific channel
    res = youtube.search().list(part="snippet", channelId=channel_id, maxResults=max_results, type="video").execute()
    
    for item in res['items']:
        video_id = item['id']['videoId']
        playlist_id = item['id'].get('playlistId')
        video_title = item['snippet'].get('title')
        video_description = item['snippet'].get('description')
        upload_date = item['snippet'].get('publishedAt')

    # Append a dictionary of this information to our list
        videos.append({
            'video_id': video_id,
            'playlist_id': playlist_id,
            'title': video_title,
            'description': video_description,
            'upload_date': upload_date
        })

    # Convert the list of dictionaries into a DataFrame
    df_videos = pd.DataFrame(videos)

    return df_videos

df_videos = get_channel_video_data(channel_id, max_results=50)
df_videos

Unnamed: 0,video_id,playlist_id,title,description,upload_date
0,hUi4ii-0Cfc,,Citrus Peels and Cancer: Zest for Life?,The reason eating citrus fruit appears to prot...,2016-05-18T11:56:55Z
1,gm-UcdE3nTQ,,Reversing Diabetes with Food,Type 2 diabetes can be reversed with severe ca...,2016-03-02T12:57:10Z
2,UVH7S6xCKIE,,Recipe: Morning Grain Bowl,Here's a delicious way to start the day! Morni...,2019-02-13T13:03:43Z
3,w5bG_0ubO0E,,"Should Probiotics Be Taken Before, During, or ...",Proper timing of probiotic supplements may imp...,2013-05-29T11:24:24Z
4,5fgVDT0qw88,,Should We Take a Multivitamin?,New subscribers to our e-newsletter always rec...,2014-01-31T12:11:34Z
5,E_3V58sPR9M,,How to Strengthen the Mind-Body Connection,Slow paced breathing at the right frequency ca...,2017-02-24T12:59:10Z
6,pSDedXTsQkE,,Which Fruits and Vegetables Boost DNA Repair?,There are 800 incidents of DNA damage in our b...,2016-05-16T11:40:30Z
7,-_w2WchC_fs,,Recipe: Soba Noodle Soup,"A light broth soup, packed with a rainbow of v...",2019-09-23T12:00:03Z
8,ZcL6qyc76Xg,,Best Food to Prevent Common Childhood Infections,Just a dusting of nutritional yeast worth of b...,2017-11-08T12:57:12Z
9,tYaWLWBcSLk,,How Not to Die from Diabetes,"Type 2 diabetes can be prevented, arrested, an...",2016-10-07T11:55:14Z


In [59]:
# Save video list
df_videos.to_csv('../data/nutrition_facts_videos_50.csv')

In [61]:
# Get more than 50 videos

def get_channel_video_data(channel_id, max_results=50):
    #Initialize list to hold video data
    videos = []
    nextPageToken = None

    while True:

        # Fetch videos from a specific channel
        res = youtube.search().list(part="snippet", 
                                    channelId=channel_id, 
                                    maxResults=max_results, 
                                    type="video",
                                    pageToken=nextPageToken).execute()
        
        for item in res['items']:
            video_id = item['id']['videoId']
            playlist_id = item['id'].get('playlistId')
            video_title = item['snippet'].get('title')
            video_description = item['snippet'].get('description')
            upload_date = item['snippet'].get('publishedAt')

        # Append a dictionary of this information to our list
            videos.append({
                'video_id': video_id,
                'playlist_id': playlist_id,
                'title': video_title,
                'description': video_description,
                'upload_date': upload_date
            })

        # Check if there's a next page
        nextPageToken = res.get('nextPageToken')
        if not nextPageToken:
            break
    

    # Convert the list of dictionaries into a DataFrame
    df_videos = pd.DataFrame(videos)

    return df_videos

df_videos = get_channel_video_data(channel_id)
df_videos

Unnamed: 0,video_id,playlist_id,title,description,upload_date
0,hUi4ii-0Cfc,,Citrus Peels and Cancer: Zest for Life?,The reason eating citrus fruit appears to prot...,2016-05-18T11:56:55Z
1,gm-UcdE3nTQ,,Reversing Diabetes with Food,Type 2 diabetes can be reversed with severe ca...,2016-03-02T12:57:10Z
2,E_3V58sPR9M,,How to Strengthen the Mind-Body Connection,Slow paced breathing at the right frequency ca...,2017-02-24T12:59:10Z
3,5fgVDT0qw88,,Should We Take a Multivitamin?,New subscribers to our e-newsletter always rec...,2014-01-31T12:11:34Z
4,ZcL6qyc76Xg,,Best Food to Prevent Common Childhood Infections,Just a dusting of nutritional yeast worth of b...,2017-11-08T12:57:12Z
...,...,...,...,...,...
502,Ld0BdqvMFI4,,Is Obesity Infectious?,Given the role our gut bacteria can play in af...,2017-01-20T13:10:50Z
503,6dmJpRfjvio,,Sugar Industry Attempts to Manipulate the Science,How the food industry responds to “health food...,2019-04-22T11:40:39Z
504,07vk6TuLw1Q,,Highlights from the 2020 Dietary Guidelines He...,I was honored to testify before the US governm...,2019-10-09T12:00:09Z
505,dfOdps2bppY,,9 out of 10 Women Misinformed about Mammograms,"Most women are just being told what to do, rat...",2018-01-22T13:08:13Z


In [62]:
# Save video list
df_videos.to_csv('../data/nutrition_facts_videos_500.csv')

In [63]:


# Get a list of videos in the given channel
response = youtube.channels().list(part='contentDetails', id=channel_id).execute()
playlist_id = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']

# Get a list of videos in the channel's uploads playlist
videos = []
next_page_token = None
while True:
    if next_page_token is None:
        response = youtube.playlistItems().list(part='snippet', playlistId=playlist_id, maxResults=50).execute()
    else:
        response = youtube.playlistItems().list(part='snippet', playlistId=playlist_id, maxResults=50, pageToken=next_page_token).execute()

    for item in response['items']:
        videos.append({
            'videoId': item['snippet']['resourceId']['videoId'],
            'title': item['snippet']['title'],
            'description': item['snippet']['description'],
            'publishedAt': item['snippet']['publishedAt']
        })

    next_page_token = response.get('nextPageToken')
    if next_page_token is None:
        break

# Create a DataFrame from the list of videos
df = pd.DataFrame(videos)

df

Unnamed: 0,videoId,title,description,publishedAt
0,r3PuCQ8CxTc,Benefits and Side Effects of the Pneumonia Vac...,Randomized controlled trials have found that p...,2024-04-10T11:59:52Z
1,Oc8T8OGKnZ8,Live Q&A with Dr. Greger,"Join Dr. Greger on Thursday, April 25 at 3:00 ...",2024-04-09T19:50:43Z
2,oa6UtySJKFE,Benefits and Side Effects of the Flu Vaccine,Flu shots can prevent more than just the flu. ...,2024-04-08T11:59:55Z
3,wZLgy4dvM1Y,New Sponsorship: Big Broccoli,Help keep us ad- and sponsorship-free by makin...,2024-04-07T15:59:56Z
4,Et0lozAIbI8,Friday Favorites: Removing Warts with Duct Tape,Duct tape beat out cryotherapy (freezing) and ...,2024-04-05T11:59:52Z
...,...,...,...,...
2592,P_X3exQtuGA,The Healthiest Herbal Tea,New subscribers to our e-newsletter always rec...,2011-02-08T14:59:42Z
2593,ce-pvksbiwM,Update on Yerba Maté,New subscribers to our e-newsletter always rec...,2011-02-08T14:57:18Z
2594,1Yb5MjU38ng,Update on MSG,New subscribers to our e-newsletter always rec...,2011-02-08T14:48:31Z
2595,lD2RzNJYGxQ,Update on Juice Plus+®,New subscribers to our e-newsletter always rec...,2011-02-08T05:41:12Z


In [64]:
# Save video list
# df.to_csv('../data/nutrition_facts_videos_2597_2024-04-10.csv')

In [65]:
# you need to get the uploads playlist in order to get all the videos...
response = youtube.channels().list(part='contentDetails', id=channel_id).execute()
playlist_id = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']

In [66]:
response

{'kind': 'youtube#channelListResponse',
 'etag': '7whv3ksMb8m-duWjB6ji9qJLgck',
 'pageInfo': {'totalResults': 1, 'resultsPerPage': 5},
 'items': [{'kind': 'youtube#channel',
   'etag': 'xeYzVPBU-CLqmwmPx42v0w0V8p8',
   'id': 'UCddn8dUxYdgJz3Qr5mjADtA',
   'contentDetails': {'relatedPlaylists': {'likes': '',
     'uploads': 'UUddn8dUxYdgJz3Qr5mjADtA'}}}]}