In [30]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build
import pandas as pd
import json

load_dotenv()
api_key = os.getenv('YOUTUBE_API_KEY')

if not api_key:
    print("API key not found. Check your .env file")
else:
    print("API key loaded successfully")
    youtube = build('youtube', 'v3', developerKey=api_key)

API key loaded successfully


In [31]:
# Test basic search
request = youtube.search().list(
    q='data science',
    part='snippet',
    maxResults=3
)

response = request.execute()


In [44]:
response

{'kind': 'youtube#searchListResponse',
 'etag': 'ptbDvkZCtGBSLFFpO67k8_KCEkM',
 'nextPageToken': 'CAMQAA',
 'regionCode': 'US',
 'pageInfo': {'totalResults': 1000000, 'resultsPerPage': 3},
 'items': [{'kind': 'youtube#searchResult',
   'etag': '30R3NGvaVteMYqTh6kPdz3jmDQE',
   'id': {'kind': 'youtube#channel', 'channelId': 'UCEBpSZhI1X8WaP-kY_2LLcg'},
   'snippet': {'publishedAt': '2017-08-07T15:17:05Z',
    'channelId': 'UCEBpSZhI1X8WaP-kY_2LLcg',
    'title': '365 Data Science',
    'description': 'At 365 Data Science we make #DataScience accessible to all by providing the highest quality online data science education.',
    'thumbnails': {'default': {'url': 'https://yt3.ggpht.com/-YcMhCGqQL5RY85jbwN7pw1Axda_dTemcKR-deDb_nV-ocr7eKeGpTzOn94yAQw1Yj9nEyQORdY=s88-c-k-c0xffffffff-no-rj-mo'},
     'medium': {'url': 'https://yt3.ggpht.com/-YcMhCGqQL5RY85jbwN7pw1Axda_dTemcKR-deDb_nV-ocr7eKeGpTzOn94yAQw1Yj9nEyQORdY=s240-c-k-c0xffffffff-no-rj-mo'},
     'high': {'url': 'https://yt3.ggpht.com/-

In [40]:
response.keys()

dict_keys(['kind', 'etag', 'nextPageToken', 'regionCode', 'pageInfo', 'items'])

In [43]:
response['kind']

'youtube#searchListResponse'

In [6]:
for item in response['items']:
    if item['id']['kind'] == 'youtube#video':
        print(f"- {item['snippet']['title']}")
        print(f"  Channel: {item['snippet']['channelTitle']}")

- What is Data Science?
  Channel: IBM Technology
- Intro to Data Science: What is Data Science?
  Channel: Steve Brunton


In [25]:
# for v in video:
#     print("Keys in this video:", list(v.keys()))

for v in video:
    if isinstance(v, dict):  # Check if 'v' is a dictionary
        print("Keys in this video:", list(v.keys()))
    else:
        print("Not a dictionary:", v)

Not a dictionary: kind
Not a dictionary: etag
Not a dictionary: id
Not a dictionary: snippet
Not a dictionary: statistics


In [8]:
video_ids=[]
for item in response['items']:
    if item['id']['kind'] == 'youtube#video':
        video_ids.append(item['id']['videoId'])

request = youtube.videos().list(
    part='snippet,statistics',
    id=','.join(video_ids)
)
video_details=request.execute()
videos_data=[]
for video in video_details['items']:
    video_info={
        'id': video['id'],
        'title': video['snippet']['title'],
        'channel': video['snippet']['channelTitle'],
        'views': video['statistics']['viewCount'],
        'likes': video['statistics']['likeCount'],
        'comments': video['statistics']['commentCount']
        # 'duration': video['contentDetails']['duration'] 
    }
    videos_data.append(video_info)
    

In [11]:
with open('../data/raw/sample_videos.json', 'w') as f:
    json.dump(videos_data, f, indent=2)

In [13]:
channel_id = video_details['items'][0]['snippet']['channelId']
channel_id

'UCKWaEZ-_VweaEx1j62do_vQ'

In [15]:
channel['statistics']

{'viewCount': '83562291',
 'subscriberCount': '1300000',
 'hiddenSubscriberCount': False,
 'videoCount': '1360'}

In [14]:
if video_ids:
    # Get channel ID from first video
    channel_id = video_details['items'][0]['snippet']['channelId']
    
    request = youtube.channels().list(
        id=channel_id,
        part='snippet,statistics'
    )
    channel_response = request.execute()
    
    if channel_response['items']:
        channel = channel_response['items'][0]
        print(f"Channel: {channel['snippet']['title']}")
        print(f"Subscribers: {channel['statistics'].get('subscriberCount', 'N/A')}")
        print(f"Total views: {channel['statistics'].get('viewCount', 'N/A')}")
        print(f"Video count: {channel['statistics'].get('videoCount', 'N/A')}")

Channel: IBM Technology
Subscribers: 1300000
Total views: 83562291
Video count: 1360


In [7]:
search_request = youtube.search().list(
    q='data science',
    part="snippet",
    maxResults=10,
    type="video"
)
search_response = search_request.execute()

video_ids = []

for item in search_response['items']:
    if item['id']['kind'] == 'youtube#video':
        video_ids.append(item['id']['videoId'])

video_request = youtube.videos().list(
    id=','.join(video_ids),
    part="snippet,statistics"
)
video_response = video_request.execute()

videos_data = []
for video in video_response.get('items', []):
    video_info = {
        'id': video['id'],
        'title': video['snippet']['title'],
        'description': video['snippet']['description'],
        'published_at': video['snippet']['publishedAt'],
        'views': video['statistics'].get('viewCount', 0),
        'likes': video['statistics'].get('likeCount', 0),
        'comments': video['statistics'].get('commentCount', 0),
        # 'duration': video['contentDetails']['duration']
    }
    videos_data.append(video_info)


In [8]:
videos_data

[{'id': 'RBSUwFGa6Fk',
  'title': 'What is Data Science?',
  'description': 'Want a career in Data Science? Start here → https://ibm.biz/BdK65F\nData Science touches almost every operation of a business. See how  → https://ibm.biz/BdPEfu\n\nData Science is the convergence of computer science, mathematics, and business expertise and helps entrepreneurs predict, diagnose, and solve their problems. Luv Aggarwal, a Data Solution Engineer at IBM, goes through the basics and explains how the discipline deploys data mining, data cleaning, machine learning and a variety of advanced analytics to yield actionable insights that will provide a roadmap for growth.  \n\nAI news moves fast. Sign up for a monthly newsletter for AI updates from IBM → https://ibm.biz/BdK65E\n \n#DataScience #BusinessSolutions #Lightboard #IBM #ComputerScience #Data #MachineLearning',
  'published_at': '2022-06-13T12:00:14Z',
  'views': '808254',
  'likes': '17146',
  'comments': '306'},
 {'id': 'FsSrzmRawUg',
  'title':

In [10]:
import pandas as pd

In [14]:
df = pd.DataFrame(videos_data)
df.head(10)

Unnamed: 0,id,title,description,published_at,views,likes,comments
0,RBSUwFGa6Fk,What is Data Science?,Want a career in Data Science? Start here → ht...,2022-06-13T12:00:14Z,808254,17146,306
1,FsSrzmRawUg,Intro to Data Science: What is Data Science?,This lecture provides an overview of the vario...,2019-06-06T05:19:06Z,126788,1831,48
2,9R3X0JoCLyU,The Complete Data Science Roadmap,Go from zero to a data scientist in 12 months....,2024-08-01T13:00:08Z,466542,14831,364
3,JL_grPUnXzY,What is Data Science? | Free Data Science Cour...,We will understand what is data science by loo...,2019-12-10T03:47:20Z,639139,8511,410
4,dcXqhMqhZUo,Data Analytics vs Data Science,Explore analytics tools and solutions → https:...,2023-11-08T12:00:38Z,682285,13858,231
5,X3paOmcrTjQ,Data Science In 5 Minutes | Data Science For B...,🔥Data Scientist Masters Program (Discount Code...,2018-12-04T14:30:01Z,4427568,62162,1101
6,-8A6SM7wDUo,Data Analyst vs Data Scientist | Difference be...,🔥🔥Difference between Data Analyst and Data Sci...,2024-01-21T15:00:00Z,88640,2319,14
7,ua-CiDNNj30,Learn Data Science Tutorial - Full Course for ...,Learn Data Science is this full tutorial cours...,2019-05-30T12:48:19Z,4070308,84727,1318
8,snIT3KdONmY,Uniform Distribution | Stats For Data Science ...,Euron - https://euron.one/\nResource Link : ht...,2025-07-31T10:30:35Z,22,1,0
9,PLVmp1V7kIY,🔥Salary of Data Scientist in US | Simplilearn ...,Curious about a career in data science? In thi...,2024-09-12T13:55:19Z,22069,606,0


In [13]:
df.dtypes

id              object
title           object
description     object
published_at    object
views           object
likes           object
comments        object
dtype: object

In [18]:
from datetime import datetime

In [19]:
def save_to_json(data, filename=None):
    if filename is None:
        timestamp= datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f"../data/raw/youtube_data_{timestamp}.json"

    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with open(filename, 'w') as f:
        json.dump(data, f, indent=2)

In [20]:
save_to_json(videos_data)