In [171]:
from googleapiclient.discovery import build
import pandas as pd
from datetime import datetime
import isodate

In [172]:
api_key = 'YOUR_API_KEY'
channel_id = 'EXTRACTED_CHANNEL_ID'

youtube = build('youtube', 'v3', developerKey=api_key)

In [173]:
res = youtube.channels().list(id=channel_id, part='snippet,contentDetails').execute()
channel_title = res['items'][0]['snippet']['title']
playlist_id = res['items'][0]['contentDetails']['relatedPlaylists']['uploads']

In [174]:
video_ids = []
next_page_token = None

while True:
    res = youtube.playlistItems().list(
        playlistId=playlist_id,
        part='contentDetails',
        maxResults=50,
        pageToken=next_page_token
    ).execute()
    
    for item in res['items']:
        video_ids.append(item['contentDetails']['videoId'])

    next_page_token = res.get('nextPageToken')
    if not next_page_token:
        break

In [175]:
videos = []

for i in range(0, len(video_ids), 50):
    res = youtube.videos().list(
        id=','.join(video_ids[i:i+50]),
        part='snippet,statistics,contentDetails'
    ).execute()
    
    for video in res['items']:
        try:
            duration = isodate.parse_duration(video['contentDetails']['duration']).total_seconds() / 60  # in minutes
            videos.append({
                'channel_name': channel_title,
                'title': video['snippet']['title'],
                'publishedAt': video['snippet']['publishedAt'],
                'views': int(video['statistics'].get('viewCount', 0)),
                'likes': int(video['statistics'].get('likeCount', 0)),
                'comments': int(video['statistics'].get('commentCount', 0)),
                'duration_min': round(duration, 2),
                'is_short': duration < 1.0
            })
        except Exception as e:
            print("Error with video:", video.get('id'), e)


In [176]:
df = pd.DataFrame(videos)
df['publishedAt'] = pd.to_datetime(df['publishedAt'])
df['weekday'] = df['publishedAt'].dt.day_name()
df['hour'] = df['publishedAt'].dt.hour
df['title_length'] = df['title'].apply(len)
df['engagement'] = (df['likes'] + df['comments']) / df['views']

In [177]:
df.to_csv('PATH_TO_SAVE_FILE', index=False)

print(" Data collection complete. Total videos fetched:", len(df))


