# Import Libraries

In [1]:
import requests
import pandas as pd
import time 
from dotenv import load_dotenv
import os

import warnings
warnings.filterwarnings("ignore")

# Setup environment: Store & Call API_KEY

**Remember: this is personal, sensitive information. Therefor, keep you API key safe!**

- How to hide you API key:
    - Go to tutorial [Soma, Jonathan. Hide API keys in Python scripts using python-dotenv, .env, and .gitignore, youtube.com](https://www.youtube.com/watch?v=YdgIWTYQ69A)
    - Or go to [README](xxx) file on this repo for a quick setup 

In [2]:
# Get dotenv environment
load_dotenv() # We'll get environment variables in the .env file to use when called 

# Call API key
API_KEY = os.getenv("API_KEY") # I stored key in the .env as API_KEY=xxxxxx
CHANNEL_ID = "UC176GAQozKKjhz62H8u9vQQ"

# Set pageToken & url
pageToken = ""
url = "https://www.googleapis.com/youtube/v3/search?key=" + API_KEY + "&channelId=" + CHANNEL_ID + "&part=snippet,id&order=date&maxResults=1000" + pageToken


# Making API call

In [3]:
# Make API call & store the data in json form in the "response" object
response = requests.get(url).json()

In [4]:
# Take a look at the json object to navigate the data we need
# response

# Let's look at the first video
response['items'][0]

{'kind': 'youtube#searchResult',
 'etag': 'xptPehm0NA3uRhDdtZIXTk1k_1k',
 'id': {'kind': 'youtube#video', 'videoId': 'K_MvDo7spEs'},
 'snippet': {'publishedAt': '2023-06-24T13:30:14Z',
  'channelId': 'UC176GAQozKKjhz62H8u9vQQ',
  'title': 'When Your Eyes Can’t See, but Your Brain is Still Watching',
  'description': 'Be one of the first 200 people to sign up with this link and get 20% off your subscription with Brilliant.org!',
  'thumbnails': {'default': {'url': 'https://i.ytimg.com/vi/K_MvDo7spEs/default.jpg',
    'width': 120,
    'height': 90},
   'medium': {'url': 'https://i.ytimg.com/vi/K_MvDo7spEs/mqdefault.jpg',
    'width': 320,
    'height': 180},
   'high': {'url': 'https://i.ytimg.com/vi/K_MvDo7spEs/hqdefault.jpg',
    'width': 480,
    'height': 360}},
  'channelTitle': 'Real Science',
  'liveBroadcastContent': 'none',
  'publishTime': '2023-06-24T13:30:14Z'}}

- Inside the array, inside the key 'items' we can get

    - videoId
    - title
    - description (but we will not collect this one)
    - publish date

# Navigate through json object to get the data

In [15]:
# For example, check video_id of the first video
video_id = response['items'][0]['id']['videoId']

# Check title of the first video
video_title = response['items'][0]['snippet']['title'].replace("&amp;", "")

# Check publish date
publish_date = response['items'][0]['snippet']['publishTime'].split("T")[0]

print("video_id: ", video_id)
print("video_title: ", video_title)
print("publish_date: ", publish_date)

SyntaxError: invalid syntax (1328927087.py, line 5)

# Collecting view, like, comment counts

In [6]:
# Create a second url call for stats (like, comment counts)
url_video_stats = "https://www.googleapis.com/youtube/v3/videos?id=" + video_id + "&part=statistics&key=" + API_KEY
response_video_stats = requests.get(url_video_stats).json()

# Note that we need 'video_id' for this (calling stats for each video)

In [7]:
response_video_stats

{'kind': 'youtube#videoListResponse',
 'etag': 'ysIj4nsZyWdgKaco7j_ygPi9zrI',
 'items': [{'kind': 'youtube#video',
   'etag': 'cH8EV1nZDo2dyteg3NnNnifIjBI',
   'id': 'K_MvDo7spEs',
   'statistics': {'viewCount': '104651',
    'likeCount': '6099',
    'favoriteCount': '0',
    'commentCount': '294'}}],
 'pageInfo': {'totalResults': 1, 'resultsPerPage': 1}}

Inside this json object, inside key 'statistics' we can get
- viewCount
- likeCount
- favoriteCount
- commentCount

In [8]:
# Let's look at view count
view_count = response_video_stats['items'][0]['statistics']['viewCount']

# Check like count
like_count = response_video_stats['items'][0]['statistics']['likeCount']

# Check favorite count
favorite_count = response_video_stats['items'][0]['statistics']['favoriteCount']

# Check comment count
comment_count = response_video_stats['items'][0]['statistics']['commentCount']

print("view_count: ", view_count)
print("like_count: ", like_count)
print("favorite_count: ", favorite_count)
print("comment_count: ", comment_count)

view_count:  104651
like_count:  6099
favorite_count:  0
comment_count:  294


# Collect data on multiple videos & store in pandas dataframe

In [19]:
# Create an empty dataframe
df = pd.DataFrame(columns = ['video_id', 'video_title', 'publish_date', 
                             'view_count', 'like_count', 'favorite_count', 'comment_count'])
df

Unnamed: 0,video_id,video_title,publish_date,view_count,like_count,favorite_count,comment_count


In [20]:
for video in response['items']:
    # Specify that we want to pick only the youtube video (not ads)
    
    string_replacement = {"&amp;": "", "&#39;": "'", "of:": "of"}
    if video['id']['kind'] == "youtube#video":  
        video_id = video['id']['videoId']
        video_title = video['snippet']['title']
        publish_date = video['snippet']['publishTime'].split("T")[0]
        
        # replace some unwanted strings in the video_title
        for k, v in string_replacement.items():
            video_title = video_title.replace(k, v)
            
        
        # Make API call for stats or each video_id that it loops through
        url_video_stats = "https://www.googleapis.com/youtube/v3/videos?id=" + video_id + "&part=statistics&key=" + API_KEY
        response_video_stats = requests.get(url_video_stats).json()
        
        view_count = response_video_stats['items'][0]['statistics']['viewCount']
        like_count = response_video_stats['items'][0]['statistics']['likeCount']
        favorite_count = response_video_stats['items'][0]['statistics']['favoriteCount']
        comment_count = response_video_stats['items'][0]['statistics']['commentCount']

          
        df = df.append({'video_id': video_id, 
                        'video_title': video_title,
                        'publish_date': publish_date,
                        'view_count': view_count,
                        'like_count': like_count,
                        'comment_count': comment_count}, ignore_index=True)
        
        time.sleep(3)

In [21]:
df

Unnamed: 0,video_id,video_title,publish_date,view_count,like_count,favorite_count,comment_count
0,K_MvDo7spEs,"When Your Eyes Can’t See, but Your Brain is St...",2023-06-24,104658,6099,,299
1,oJIvn18SSfc,Why Animals Get Creepier the Deeper You Go,2023-06-10,825520,27069,,1100
2,MQ8JC1d_wgY,The Insane Biology of The Mantis Shrimp,2023-05-27,478738,14423,,696
3,nPOQQp8CCls,The Insane Biology of Slime Mold,2023-05-13,464956,18147,,831
4,7JgaXMcNTJ8,The Secrets of the Oldest Footprints Ever Found,2023-04-29,361488,14768,,862
5,RwwgbVHE8Dk,The Insane Biology of Kangaroos,2023-04-15,407270,15223,,1203
6,aBeK88m_T6w,The Assassin's Favorite Plant,2023-03-31,264944,10318,,465
7,hph9OeKjg3w,Could Chat GPT Talk to Whales?,2023-03-18,964201,31047,,2755
8,LXnCCq6dkm8,The Insane Biology of The Gorilla,2023-02-28,726808,22991,,1425
9,8WaIycNrQMs,The Insane Evolution of Hibernation,2023-02-18,587196,18532,,701


#### Note:
See clean code in the youtube_api.py inside this repo

## Further study:

- [YouTube API to fetch all videos on a channel](https://stackoverflow.com/questions/18953499/youtube-api-to-fetch-all-videos-on-a-channel)