# User Guide

Import packages

In [1]:
%load_ext autoreload
%autoreload 2
import logging
import pandas as pd

from gembox.debug_utils import Debugger
from youtube_crawler import YoutubeVideoInfoCrawler, YoutubeCommentCrawler
from youtube_crawler.config.filter_enum import FilterSection, FilterLengthOption, FilterOrderByOption

## 1. Youtube Video Info Crawler

In [2]:
video_crawler = YoutubeVideoInfoCrawler(debug_tool=Debugger(level=logging.INFO))

In [4]:
n_target = 50
search_term = "Python"
filter_config = {
    FilterSection.ORDER_BY: FilterOrderByOption.VIEW_COUNT,
    FilterSection.LENGTH: FilterLengthOption.MEDIUM,
}

await video_crawler.start()
video_info_list = await video_crawler.crawl(search_term=search_term, n_target=n_target)
video_df = pd.DataFrame([video_info.to_dict() for video_info in video_info_list])

video_df

2023-08-19 21:16:18,677 - gembox.debug_utils_eb84_logger - INFO - Browser: Go to https://www.youtube.com/
2023-08-19 21:16:22,196 - gembox.debug_utils_eb84_logger - INFO - Typing Python in input#search...
2023-08-19 21:16:22,257 - gembox.debug_utils_eb84_logger - INFO - Python typed successfully in input#search
2023-08-19 21:16:22,358 - gembox.debug_utils_eb84_logger - INFO - waiting for new page to load...
2023-08-19 21:16:23,426 - gembox.debug_utils_eb84_logger - INFO - waiting for new page to load...
2023-08-19 21:16:24,436 - gembox.debug_utils_eb84_logger - INFO - Search for Python successfully after 2 retries.
2023-08-19 21:16:24,437 - gembox.debug_utils_eb84_logger - INFO - Scrolling and loading ytd-video-renderer...
2023-08-19 21:16:24,438 - gembox.debug_utils_eb84_logger - INFO - Inner Call Scrolling and loading...(selector=ytd-video-renderer, scroll_step=400, load_wait=40, same_th=20, threshold=50)
2023-08-19 21:16:28,066 - gembox.debug_utils_eb84_logger - INFO - Top unchanged

Unnamed: 0,video_id,title,video_url,is_short,view_count,publish_time,channel_name,channel_url,desc_text
0,kqtD5dpn9C8,Python for Beginners - Learn Python in 1 Hour,https://www.youtube.com/watch?v=kqtD5dpn9C8,False,1293万次观看,2年前,Programming with Mosh,https://www.youtube.com/@programmingwithmosh,"#Python, #MachineLearning, #WebDevelopment Pyt..."
1,_uQrJ0TkZlc,Python Tutorial - Python Full Course for Begin...,https://www.youtube.com/watch?v=_uQrJ0TkZlc,False,3498万次观看,4年前,Programming with Mosh,https://www.youtube.com/@programmingwithmosh,Python tutorial - Python full course for begin...
2,rfscVS0vtbw,学习Python - 初学者全程,https://www.youtube.com/watch?v=rfscVS0vtbw,False,4131万次观看,5年前,freeCodeCamp.org,https://www.youtube.com/@freecodecamp,本课程将向您全面介绍python中的所有核心概念。跟随视频，你很快就会成为一名python程...
3,nLRL_NcnK-4,Harvard CS50’s Introduction to Programming wit...,https://www.youtube.com/watch?v=nLRL_NcnK-4,False,165万次观看,3个月前,freeCodeCamp.org,https://www.youtube.com/@freecodecamp,Learn Python programming from Harvard Universi...
4,XKHEtdqhLK8,Python Full Course for free 🐍,https://www.youtube.com/watch?v=XKHEtdqhLK8,False,1208万次观看,2年前,Bro Code,https://www.youtube.com/@BroCodez,Python tutorial for beginners full course #pyt...
5,zdMUJJKFdsU,【python】4小时初学者教程 #python #python编程 #python入门,https://www.youtube.com/watch?v=zdMUJJKFdsU,False,165万次观看,2年前,GrandmaCan -我阿嬤都會,https://www.youtube.com/@GrandmaCan,python \npython 入门 \npython 教学 \npython 教程 \np...
6,ghFe0fexIVI,Find Wifi 📶 Password 🔑 using Python 😨,https://www.youtube.com/shorts/ghFe0fexIVI,True,137次观看,1天前,Aman Aadi,https://www.youtube.com/@AmanAadi_,In this video :- How to find wifi password usi...
7,t8pPdKYpowI,Python Tutorial for Beginners - Learn Python i...,https://www.youtube.com/watch?v=t8pPdKYpowI,False,457万次观看,2年前,TechWorld with Nana,https://www.youtube.com/@TechWorldwithNana,Python Tutorial for Beginners | Full Python Co...
8,OP5HcXJg2Aw,【機器學習2021】卷積神經網路 (Convolutional Neural Network...,https://www.youtube.com/watch?v=OP5HcXJg2Aw,False,19万次观看,2年前,Hung-yi Lee,https://www.youtube.com/@HungyiLeeNTU,ML2021 week3 3/12 Convolution Neural Network(C...
9,Sjds9-gf_80,30 Moments Python Swallows Prey On A Tree Bran...,https://www.youtube.com/watch?v=Sjds9-gf_80,False,63万次观看,7天前,The Hawk,https://www.youtube.com/@thehawkus,566.30 Moments Python Swallows Prey On A Tree ...


In [5]:
await video_crawler.stop()

## 2. Youtube Video Comment Crawler

In [6]:
comment_crawler = YoutubeCommentCrawler(debug_tool=Debugger(level=logging.INFO))

In [7]:
test_video_url = video_df.video_url[0]
n_target = 50

await comment_crawler.start()
comment_list = await comment_crawler.crawl(video_url=test_video_url, n_target=n_target)
comment_df = pd.DataFrame([comment.to_dict() for comment in comment_list])

comment_df

2023-08-19 21:16:53,613 - gembox.debug_utils_881d_logger - INFO - Browser: Go to https://www.youtube.com/
2023-08-19 21:16:58,115 - gembox.debug_utils_881d_logger - INFO - Browser: Go to https://www.youtube.com/watch?v=kqtD5dpn9C8
2023-08-19 21:16:59,607 - gembox.debug_utils_881d_logger - INFO - Scrolling and loading ytd-comment-renderer...
2023-08-19 21:16:59,608 - gembox.debug_utils_881d_logger - INFO - Inner Call Scrolling and loading...(selector=ytd-comment-renderer, scroll_step=400, load_wait=40, same_th=20, threshold=50)
2023-08-19 21:16:59,815 - gembox.debug_utils_881d_logger - INFO - Top unchanged, Scroll top: 188.8000030517578, last top: 188.8000030517578, same count: 1, same_th: 20
2023-08-19 21:16:59,876 - gembox.debug_utils_881d_logger - INFO - Top unchanged, Scroll top: 188.8000030517578, last top: 188.8000030517578, same count: 2, same_th: 20
2023-08-19 21:16:59,923 - gembox.debug_utils_881d_logger - INFO - Top unchanged, Scroll top: 188.8000030517578, last top: 188.80000

Unnamed: 0,comment_id,is_reply,author_name,author_url,publish_time,parent_comment_id,video_id,author_thumbnail,content_text,like_count
0,Ugzby30dYHdobijJxnF4AaABAg,False,@programmingwithmosh,/channel/UCWv7vMbMWH4-V0ZXdmDpPBA,1年前,,kqtD5dpn9C8,https://yt3.ggpht.com/tBEPr-zTNXEeae7VZKSZYfiy...,Want to master Python? Get my complete Python...,499
1,UgzPbddLriMshHZ6-3F4AaABAg,False,@codingmadeclear2229,/channel/UCXabjleRKPYvWRmqDQIgJjQ,2年前,,kqtD5dpn9C8,https://yt3.ggpht.com/ytc/AOPolaSnLsxI6qTLP9eR...,"This guy, sat for 1 hour and talked about pyth...",1.6万
2,UgzLwXDNxg0zTrmsRiZ4AaABAg,False,@meilinfjellstad7168,/channel/UCZybS6NF3tXW7BeUQSpb24Q,2个月前,,kqtD5dpn9C8,https://yt3.ggpht.com/54Pm2TqFAeglB_Xp4yssVMDG...,Thanks Mosh! After struggling for 1 year with...,1723
3,UgxCBToChpmxqVVLaLh4AaABAg,False,@TamalaAikman-vm8jy,/channel/UC3C2Phc1Jnn3z7QoC5rTKOw,3个月前,,kqtD5dpn9C8,https://yt3.ggpht.com/ytc/AOPolaQs_yRtwGFmrrnZ...,This video was a game changer! Python is no l...,242
4,Ugwe9c509Y4PxlbmWOh4AaABAg,False,@user-vz3ln2yz4l,/channel/UCF8wMZTKqZtTWzsinot3NpA,1个月前,,kqtD5dpn9C8,https://yt3.ggpht.com/plz1njEKHNip-FxVhiX8EneD...,This is my first time learning programming lan...,67
5,UgyYSabcK6aBQt2NJBJ4AaABAg,False,@dcruan6771,/channel/UC4MtNbHXZieATD904ZmoNag,5天前,,kqtD5dpn9C8,https://yt3.ggpht.com/ytc/AOPolaRGR6l-7obIY-Bb...,"I use to always fail my coding classes, but yo...",3
6,UgwkDo11H4HZFr9Ox014AaABAg,False,@everestdennis4792,/channel/UCQ1soOGN5DAro2D1WWajpKQ,1年前,,kqtD5dpn9C8,https://yt3.ggpht.com/ytc/AOPolaQITfPdhrj5qvjg...,I'm literally only like 5 minutes in and this ...,2199
7,Ugz8ofa8mkjSuyETGpt4AaABAg,False,@nickygood3342,/channel/UCidXXZdcTpFQsIbneoPAldg,13天前,,kqtD5dpn9C8,https://yt3.ggpht.com/ytc/AOPolaQuEghVAvh8s43j...,Love the simplicity & wisdom with which you te...,3
8,Ugx6XvdBtSUEzd5KQEJ4AaABAg,False,@sumedhghawalkar3522,/channel/UClnZKOkmdojZcYyXry-aJgQ,2周前,,kqtD5dpn9C8,https://yt3.ggpht.com/ytc/AOPolaScXbMS5hJqXL1V...,Im a Masters student in Automotive Engineering...,8
9,Ugy8xdumN-fom_SozH14AaABAg,False,@SpaceTacos60316,/channel/UCpR8DujH7FNNd6snSlZmPmQ,1个月前,,kqtD5dpn9C8,https://yt3.ggpht.com/oKUXYRtt15Nq5-zvPrcSOLEs...,Thanks for this Mosh! Really easy to understan...,6


In [8]:
await comment_crawler.stop()