In [None]:
import pandas as pd
from googleapiclient.discovery import build
import configparser
import ast

In [None]:
config = configparser.ConfigParser()
config.read('config.ini')
api_key = config['youtube']['api_key']
youtube = build('youtube', 'v3', developerKey=api_key)

## Channal ID data from the text file

In [None]:
channel_name = pd.read_csv('data_channels.txt')
channel_name = channel_name.transpose()
channel_name.columns = ['Music','Gaming','Auto and Vehicles','Sports','Travel']
channel_name=channel_name.drop('Category',axis=0)
df_melted = channel_name.melt(var_name='Category', value_name='Channel')

def get_channel_stats(channel_name):
    search_response = youtube.search().list(
        q=channel_name,
        part='snippet',
        type='channel',
        maxResults=1  # We want the top result only
    ).execute()
    
    if 'items' not in search_response or len(search_response['items']) == 0:
        print(f"No channel found for: {channel_name}")
        return None
    
    channel_id = search_response['items'][0]['snippet']['channelId']
    
    channel_response = youtube.channels().list(
        id=channel_id,
        part='snippet,statistics'  # Fetch snippet (basic info) and statistics
    ).execute()
    
    if 'items' in channel_response and len(channel_response['items']) > 0:
        channel_data = channel_response['items'][0]
        
        channel_title = channel_data['snippet']['title']
        channel_description = channel_data['snippet']['description']
        view_count = channel_data['statistics'].get('viewCount', 'N/A')
        subscriber_count = channel_data['statistics'].get('subscriberCount', 'N/A')
        video_count = channel_data['statistics'].get('videoCount', 'N/A')

        print(f"Channel Title: {channel_title}")
        print(f"Channel ID: {channel_id}")
        print(f"Description: {channel_description}")
        print(f"View Count: {view_count}")
        print(f"Subscriber Count: {subscriber_count}")
        print(f"Video Count: {video_count}")

        return {
            'channel_title': channel_title,
            'channel_id': channel_id,
            'description': channel_description,
            'view_count': view_count,
            'subscriber_count': subscriber_count,
            'video_count': video_count
        }
    else:
        print(f"No statistics found for channel ID: {channel_id}")
        return None

In [None]:
channel_data = df_melted['Channel'].apply(get_channel_stats)
channel_df = pd.json_normalize(channel_data)
channel_df['Category'] = df_melted.Category
channel_df.to_csv('ChannelID_Data')

In [None]:
def get_video_ids(channel_id):
    video_ids = []
    max_results = 50
        # Get videos from the channel
    response = youtube.search().list(
    channelId=channel_id,
    part='id',
    order='viewCount',
    maxResults=min(50 - len(video_ids), 50),
    type='video'
        ).execute()

        # Extract video IDs
    video_ids += [item['id']['videoId'] for item in response['items']]
    return video_ids

In [None]:
video_id = channel_df.channel_id.apply(get_video_ids)
videoID_data =pd.DataFrame(video_id)
videoID_data.columns = ['Video_ID']
channel_df['video_id']=videoID_data['Video_ID']

In [None]:
def get_video_data(video_data_path, channel_data):
    if os.path.exists(video_data_path):
        video_data = pd.read_csv(video_data_path)
    else:
        video_data = pd.DataFrame(columns=["channel_id","video_id", "video_title", "publish_date" ,"view_count", "like_count", "dislike_count", "comment_count", "default_language", "duration","has_paid_product_placement"])

        try:
            for id,channel_id in zip(channel_data["video_id"],channel_data["channel_id"]):

                video_request = youtube.videos().list(
                    part="snippet,statistics,contentDetails",
                    id=id
                )

                video_response = video_request.execute()

                for video in video_response["items"]:

                    try:
                        video_id = video["id"]
                    except KeyError:
                        video_id = float('nan')
                    try:
                        view_count = video["statistics"]["viewCount"]
                    except KeyError:
                        view_count = float('nan')
                    try:
                        like_count = video["statistics"]["likeCount"]
                    except KeyError:
                        like_count = float('nan')
                    try:
                        dislike_count = video["statistics"]["dislikeCount"]
                    except KeyError:
                        dislike_count = float('nan')
                    try:
                        comment_count = video["statistics"]["commentCount"]
                    except KeyError:
                        comment_count = float('nan')
                    try:
                        video_title = video["snippet"]["title"]
                    except KeyError:
                        video_title = float('nan')
                    try:
                        publish_date = video["snippet"]["publishedAt"]
                    except KeyError:
                        publish_date = float('nan')
                    try:
                        default_language = video["snippet"]["defaultLanguage"]
                    except KeyError:
                        default_language = float('nan')
                    try:
                        duration = video["contentDetails"]["duration"]
                    except KeyError:
                        duration = float('nan')
                    try:
                        has_paid_product_placement = video["contentDetails"]["hasPaidProductPlacement"]
                    except KeyError:
                        has_paid_product_placement = float('nan')
                    

                    row = pd.DataFrame({
                        "video_id": [video_id],
                        "channel_id": [channel_id],
                        "video_title": [video_title],
                        "view_count": [view_count],
                        "like_count": [like_count],
                        "dislike_count": [dislike_count],
                        "comment_count": [comment_count],
                        "publish_date": [publish_date],
                        "default_language": [default_language],
                        "duration": [duration],
                        "has_paid_product_placement": [has_paid_product_placement]
                    })

                    video_data = pd.concat([video_data, row], ignore_index=True)

        except Exception as e:
            print(f"Error {e}")
            video_response = {"items": []}

        if len(video_data) > 0:    
            video_data.to_csv(video_data_path, index=False)
    
    return video_data

In [None]:
video_data_path = f'video_data.csv'
video_data = get_video_data(video_data_path, channel_df)
video_data.to_csv('video_data.csv')