# Setup

In [9]:
import os
import googleapiclient.discovery
import googleapiclient.errors
import pandas as pd

In [10]:
with open('secret/secret.txt') as f:
    api_key = f.readline().strip()

youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

# Load and Cache

In [11]:
def get_and_add_playlist_id_to_dataframe(dataframe, channel_ids):
    playlist_ids = []
    for channel_id in channel_ids:
        request = youtube.channels().list(
            part="contentDetails",
            id=channel_id
        )
        response = request.execute()
        playlist_ids.append(response['items'][0]['contentDetails']['relatedPlaylists']['uploads'])
    dataframe['playlist_id'] = playlist_ids
    return dataframe

In [12]:
import ast

def get_video_data(video_data_path, channel_data):
    if os.path.exists(video_data_path):
        video_data = pd.read_csv(video_data_path)
    else:
        video_data = pd.DataFrame(columns=["channel_id","video_id", "video_title", "publish_date" ,"view_count", "like_count", "dislike_count", "comment_count"])
        video_ids = {}
        all_video_ids = ""
        try:
            for list_string,channel_id in zip(channel_data["Video_ID"],channel_data["channel_id"]):
                ids_list = ast.literal_eval(list_string)

                for id in ids_list:

                    video_ids[id] = channel_id

                    if all_video_ids == "":
                        all_video_ids += id
                    else:
                        all_video_ids += ',' + id

                    if len(all_video_ids) > 10:

                        video_request = youtube.videos().list(
                            part="snippet,statistics",
                            id=all_video_ids
                        )

                        video_response = video_request.execute()

                        for video in video_response["items"]:

                            try:
                                video_id = video["id"]
                            except KeyError:
                                video_id = float('nan')
                            try:
                                view_count = video["statistics"]["viewCount"]
                            except KeyError:
                                view_count = float('nan')
                            try:
                                like_count = video["statistics"]["likeCount"]
                            except KeyError:
                                like_count = float('nan')
                            try:
                                dislike_count = video["statistics"]["dislikeCount"]
                            except KeyError:
                                dislike_count = float('nan')
                            try:
                                comment_count = video["statistics"]["commentCount"]
                            except KeyError:
                                comment_count = float('nan')
                            try:
                                video_title = video["snippet"]["title"]
                            except KeyError:
                                video_title = float('nan')
                            try:
                                publish_date = video["snippet"]["publishedAt"]
                            except KeyError:
                                publish_date = float('nan')

                            row = pd.DataFrame({
                                "video_id": [video_id],
                                "channel_id": [video_ids[video_id]],
                                "video_title": [video_title],
                                "view_count": [view_count],
                                "like_count": [like_count],
                                "dislike_count": [dislike_count],
                                "comment_count": [comment_count],
                                "publish_date": [publish_date]
                            })

                            video_data = pd.concat([video_data, row], ignore_index=True)

                        all_video_ids = ""
                        video_ids = {}

        except Exception as e:
            print(f"Error {e}")
            video_response = {"items": []}

        

        if len(video_data) > 0:    
            video_data.to_csv(video_data_path, index=False)
    
    return video_data

In [13]:
channel_data_path = f"ChannelID_Data.csv"
video_data_path = f"video_data.csv"

channel_data =  pd.read_csv(channel_data_path)
video_data = pd.read_csv(video_data_path)
 

# Use Data

In [None]:
display(channel_data.head())
display(video_data.head())