# Setup

In [1]:
import os
import googleapiclient.discovery
import googleapiclient.errors
import pandas as pd

In [2]:
CHANNELS_BY_CATEGORY = {
    'Auto & Vehicles': {
                'Stokes twins squad':'UCbp9MyKCTEww4CxEzc_Tp0Q', 
                'DASH CAM CN':'UCJZpPG8wegZJ3rsnc6-Bpnw', 
                'Supercar Blondie':'UC0_7evGVYla1lREeXjN_JLg',
                'The Classic Motorcycle Channel':'UCtjWY31B54jqxRFQM9xyhDw',
                'Salai Sathish777':'UCaEUPNkdnbQROROKm09SGoQ',
                'Exotic Rubber':'UCF2eaKLl9I8yXg31FW3GH2g',
                'Sarah Lezito':'UCT7dKiVfnUSRk7ckOfIwWvw',
                'San Razka':'UCcDiv0JjkAnirZvcwcZYXTw',
                'Suvo Stunts':'UCzRnN9ntMZf83mquKpT6a0w',
                'Nikolai Savic':'UCqcRe8F3rETPbzOFJHzk'
    },
    'Entertainment': {
                'Anaya Kandhal':'UCJTqi2KOenudYnEOUSAj5Hg',
                'Toys and Colors':'UCIte5_YtyuMKsSytQDgWdMA',
                'J House jr.':'UCGHGx_VOEWUE_WINENxxKbA',
                'Sierra & Rhia FAM':'UCe6n0z9UbsxYCS8P83f84tw',
                'KL BRO Biju Rithvik':'UCABNiUtltfp4piL69GD2BmA',
                'StarPlus':'UCAGZZ-Ua-yB-fqObs1GKzag',
                'MrBeast':'UClD832S2f_F_W4epwAd5sOw',
                'HAR PAL GEO':'UCNoMrALPTsDJaNiIdMk_kCw',
                'Upin & Ipin':'UCB8VtllrypUr7YP9WiYuXCA',
                'Dylan Anderson':'UCRY8HfhFV5w9toYlR8bfupQ'
    },
    'Gaming': {
                'Linh Nhi Shorts':'UCOGJ0sPUo9_39Qiru4Lx51w',
                'Animal World':'UCEexsbjV02l4bPWfyNlMJ3A',
                'LankyBox':'UCU7d91rxwcqHh-fSgCNU73Q',
                'Kissy Show':'UCRA3T1IofrU3HSys0FJGbZQ',
                'FACT FIRE KING':'UCDjrDvzuRUmwIBmY8GBOPVw',
                'Daquavis':'UCvwYaSSj-ceh5-nfSv71WKw',
                'Sinotal Gaming':'UCe6qB7dYiurwyKSxrgCdmLA',
                'LetsPlay':'UCia7dsxxnoUsJMicn-0gbTw',
                'Nahz':'UCq-BiqUyulZMOZEr0xtGOjg',
                'Canva India':'UC2V8gNWRrOrBFOm08tNSugQ'
    },
    'Sports': {
                'Celine Dept':'UCTq7Bs_Whk-WYwhU9CoezCQ',
                'WWE':'UC2NNoTGy7-nNSA1DOefkCpA',
                'Red Bull':'UCuLq0ME8tqK6pzCm9rpkwjQ',
                'House of Highlights':'UCYXbyePIPf6lJkOuVvdugQw',
                'YOLO AVENTURAS':'UCqTxve5-0sVxxBB8dX3DkFA',
                'SportsNation':'UC7dzGIZnOoGVjODipT1183Q',
                'UR · Cristiano':'UCHzgN0TNou2bCHkLsCV-tLw',
                'Omar Raja - ESPN':'UCLFhLRABMCFl5H6hS-NgrPg',
                'ESPN':'UCGVx9_dtZD0MuFdNxmNL1eg',
                'Score 90 Shorts':'UCbVtlOOHT_3VMKhuM1d9ziA'
    },
    'Music': { 
                'Seyhan Müzik':'UCNqr9QkPw2ThS6reZ_VglVA',
                'T-Series':'UCpN-WGQgGsOrXm7nHXDdTDA',
                'SHADE Of Love':'UCR5hcvWZc9XlSEK7sPlMpTw',
                'Sony Music South':'UCXNslTpQcYUNS1g6TEwQ_bQ',
                'El Payaso Plim Plim':'UCHU3DmaXhrrZ9oBQF7TnpIQ',
                'Saregama Music':'UCerohJ7AcKkilNALvAx6-_w',
                'Zee Music Company':'UCE-C1Zk9eL5K-aGJcLGurrw',
                # 'Prvnci':'',
                'Tips Official':'UCohjG0ZUU_VTvgZ_EpM0OrA',
                'The Weeknd':'UCPY2tjsvvV_Y1l_n9LxaLnQ'
    }
}

In [3]:
with open('secret/secret.txt') as f:
    api_key = f.readline().strip()

youtube = googleapiclient.discovery.build("youtube", "v3", developerKey=api_key)

# Load and Cache

loads channel data

In [4]:
def get_and_add_playlist_id_to_dataframe(dataframe, channel_ids):
    playlist_ids = []
    for channel_id in channel_ids:
        request = youtube.channels().list(
            part="contentDetails",
            id=channel_id
        )
        response = request.execute()
        playlist_ids.append(response['items'][0]['contentDetails']['relatedPlaylists']['uploads'])
    dataframe['playlist_id'] = playlist_ids
    return dataframe

loads video data

In [5]:
def get_video_data(video_data_path, channel_data):
    if os.path.exists(video_data_path):
        video_data = pd.read_csv(video_data_path)
    else:
        video_data = pd.DataFrame(columns=["video_id", "video_title", "view_count", "like_count", "dislike_count", "comment_count"])
        video_ids = {}
        all_video_ids = ""
        try:
            for playlist_id in channel_data["playlist_id"]:

                try:
                    request = youtube.playlistItems().list(
                        part="snippet",
                        playlistId=playlist_id,
                        maxResults=100,

                    )

                    response = request.execute()
                except Exception as e:
                    print(f"Error {e}")
                    response = {"items": []}
                    continue

                for item in response["items"][:min(100, len(response["items"]))]:
                    try:
                        id = item["snippet"]["resourceId"]["videoId"]
                        video_ids[id] = playlist_id
                        if all_video_ids == "":
                            all_video_ids += id
                        else:
                            all_video_ids += ',' + id
                    except KeyError:
                        continue

                if len(all_video_ids) > 10 or playlist_id == channel_data["playlist_id"][-1]:

                    video_request = youtube.videos().list(
                        part="snippet,statistics",
                        id=all_video_ids
                    )

                    video_response = video_request.execute()

                    for video in video_response["items"]:

                        try:
                            video_id = video["id"]
                        except KeyError:
                            video_id = float('nan')
                        try:
                            view_count = video["statistics"]["viewCount"]
                        except KeyError:
                            view_count = float('nan')
                        try:
                            like_count = video["statistics"]["likeCount"]
                        except KeyError:
                            like_count = float('nan')
                        try:
                            dislike_count = video["statistics"]["dislikeCount"]
                        except KeyError:
                            dislike_count = float('nan')
                        try:
                            comment_count = video["statistics"]["commentCount"]
                        except KeyError:
                            comment_count = float('nan')
                        try:
                            video_title = video["snippet"]["title"]
                        except KeyError:
                            video_title = float('nan')

                        row = pd.DataFrame({
                            "video_id": [video_id],
                            "playlist_id": [video_ids[video_id]],
                            "video_title": [video_title],
                            "view_count": [view_count],
                            "like_count": [like_count],
                            "dislike_count": [dislike_count],
                            "comment_count": [comment_count]
                        })

                        video_data = pd.concat([video_data, row], ignore_index=True)

                    all_video_ids = ""
                    video_ids = {}

        except Exception as e:
            print(f"Error {e}")
            video_response = {"items": []}

        

        if len(video_data) > 0:    
            video_data.to_csv(video_data_path, index=False)
    
    return video_data

loads comment data

In [6]:
CACHE_PATH = ".cache"

if not os.path.exists(CACHE_PATH):
    os.makedirs(CACHE_PATH)

In [None]:
channel_data_path = f"ChannelID_Data.csv"
video_data_path = f"{CACHE_PATH}/video_data.csv"
comment_data_path = f"{CACHE_PATH}/comment_data.csv"

channel_data =  pd.read_csv(channel_data_path)
video_data = get_video_data(video_data_path, channel_data)
data = {
    "channel_data": channel_data,
    "video_data": video_data
}    

# Use Data

In [None]:
for key in data:
    print(key)
    display(data[key].head())