# User Guide

Import packages

In [34]:
%load_ext autoreload
%autoreload 2
import logging
import pandas as pd

from utils.debug_utils import Debugger
from youtube_crawler import YoutubeVideoInfoCrawler, YoutubeCommentCrawler
from youtube_crawler.config.filter_enum import FilterSection, FilterLengthOption, FilterOrderByOption

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1. Youtube Video Info Crawler

In [37]:
video_crawler = YoutubeVideoInfoCrawler(debug_tool=Debugger(level=logging.INFO))

In [39]:
n_target = 50
search_term = "Python"
filter_config = {
    FilterSection.ORDER_BY: FilterOrderByOption.VIEW_COUNT,
    FilterSection.LENGTH: FilterLengthOption.MEDIUM,
}

await video_crawler.start()
video_info_list = await video_crawler.crawl(search_term=search_term, n_target=n_target)
video_df = pd.DataFrame([video_info.to_dict() for video_info in video_info_list])

video_df

2023-08-19 19:29:34,125 - utils.debug_utils_2c2dcbaa-b31c-4954-9dee-db9c2a4d6d4f_logger - INFO - Browser: Go to https://www.youtube.com/
2023-08-19 19:29:36,370 - utils.debug_utils_2c2dcbaa-b31c-4954-9dee-db9c2a4d6d4f_logger - INFO - Typing Python in input#search...
2023-08-19 19:29:36,478 - utils.debug_utils_2c2dcbaa-b31c-4954-9dee-db9c2a4d6d4f_logger - INFO - Python typed successfully in input#search
2023-08-19 19:29:36,556 - utils.debug_utils_2c2dcbaa-b31c-4954-9dee-db9c2a4d6d4f_logger - INFO - waiting for new page to load...
2023-08-19 19:29:37,555 - utils.debug_utils_2c2dcbaa-b31c-4954-9dee-db9c2a4d6d4f_logger - INFO - Search for Python successfully after 1 retries.
2023-08-19 19:29:37,555 - utils.debug_utils_2c2dcbaa-b31c-4954-9dee-db9c2a4d6d4f_logger - INFO - Scrolling and loading ytd-video-renderer...
2023-08-19 19:29:37,555 - utils.debug_utils_2c2dcbaa-b31c-4954-9dee-db9c2a4d6d4f_logger - INFO - Inner Call Scrolling and loading...(selector=ytd-video-renderer, scroll_step=400, 

Unnamed: 0,video_id,title,video_url,is_short,view_count,publish_time,channel_name,channel_url,desc_text
0,nLRL_NcnK-4,Harvard CS50’s Introduction to Programming wit...,https://www.youtube.com/watch?v=nLRL_NcnK-4,False,165万次观看,3个月前,freeCodeCamp.org,https://www.youtube.com/@freecodecamp,Learn Python programming from Harvard Universi...
1,lvH4-4iYjgs,給初學者的 Python 6 小時教學 (2023) #python教學 #完整課程 ...,https://www.youtube.com/watch?v=lvH4-4iYjgs,False,4.3万次观看,4周前,CodeShiba 程式柴,https://www.youtube.com/@CodeShiba,【 Python 】 給初學者的Python 6 小時教學(2023) ⭐️章節目錄⭐️ 0...
2,zdMUJJKFdsU,【python】4小时初学者教程 #python #python编程 #python入门,https://www.youtube.com/watch?v=zdMUJJKFdsU,False,165万次观看,2年前,GrandmaCan -我阿嬤都會,https://www.youtube.com/@GrandmaCan,python \npython 入门 \npython 教学 \npython 教程 \np...
3,XKHEtdqhLK8,Python Full Course for free 🐍,https://www.youtube.com/watch?v=XKHEtdqhLK8,False,1208万次观看,2年前,Bro Code,https://www.youtube.com/@BroCodez,Python tutorial for beginners full course #pyt...
4,rfscVS0vtbw,学习Python - 初学者全程,https://www.youtube.com/watch?v=rfscVS0vtbw,False,4131万次观看,5年前,freeCodeCamp.org,https://www.youtube.com/@freecodecamp,本课程将向您全面介绍python中的所有核心概念。跟随视频，你很快就会成为一名python程...
5,_uQrJ0TkZlc,Python Tutorial - Python Full Course for Begin...,https://www.youtube.com/watch?v=_uQrJ0TkZlc,False,3498万次观看,4年前,Programming with Mosh,https://www.youtube.com/@programmingwithmosh,Python tutorial - Python full course for begin...
6,Sjds9-gf_80,30 Moments Python Swallows Prey On A Tree Bran...,https://www.youtube.com/watch?v=Sjds9-gf_80,False,62万次观看,7天前,The Hawk,https://www.youtube.com/@thehawkus,566.30 Moments Python Swallows Prey On A Tree ...
7,FWGKsd68Aao,【爆箱】,https://www.youtube.com/watch?v=FWGKsd68Aao,False,48人正在观看,,爆箱兄弟,https://www.youtube.com/@TheUnboxBrothers0928,睇完片既兄弟你又訂閱左未？ 訂閱爆箱兄弟 ▷ https://goo.gl/6XrYsq 加...
8,UCFv9Zq6Ywo,"30 Moments When Cobra Confronts Python, What H...",https://www.youtube.com/watch?v=UCFv9Zq6Ywo,False,72万次观看,3个月前,The Hawk,https://www.youtube.com/@thehawkus,"377. 30 Moments When Cobra Confronts Python, W..."
9,3ztzyrxIAa8,20 Times Snakes Messed With The Wrong Opponent,https://www.youtube.com/watch?v=3ztzyrxIAa8,False,408万次观看,2个月前,Discoverize,https://www.youtube.com/@Discoverize,"For copyright matters, please contact: juliaba..."


## 2. Youtube Video Comment Crawler

In [41]:
comment_crawler = YoutubeCommentCrawler(debug_tool=Debugger(level=logging.INFO))

In [42]:
test_video_url = video_df.video_url[0]
n_target = 50

await comment_crawler.start()
comment_list = await comment_crawler.crawl(video_url=test_video_url, n_target=n_target)
comment_df = pd.DataFrame([comment.to_dict() for comment in comment_list])

comment_df

2023-08-19 19:31:11,664 - utils.debug_utils_4137c3ee-2af9-416f-ba4d-9eccafdb54f2_logger - INFO - Browser: Go to https://www.youtube.com/
2023-08-19 19:31:15,482 - utils.debug_utils_4137c3ee-2af9-416f-ba4d-9eccafdb54f2_logger - INFO - Browser: Go to https://www.youtube.com/watch?v=nLRL_NcnK-4
2023-08-19 19:31:17,489 - utils.debug_utils_4137c3ee-2af9-416f-ba4d-9eccafdb54f2_logger - INFO - Scrolling and loading ytd-comment-renderer...
2023-08-19 19:31:17,489 - utils.debug_utils_4137c3ee-2af9-416f-ba4d-9eccafdb54f2_logger - INFO - Inner Call Scrolling and loading...(selector=ytd-comment-renderer, scroll_step=400, load_wait=40, same_th=20, threshold=50)
2023-08-19 19:31:17,605 - utils.debug_utils_4137c3ee-2af9-416f-ba4d-9eccafdb54f2_logger - INFO - Top unchanged, Scroll top: 188.8000030517578, last top: 188.8000030517578, same count: 1, same_th: 20
2023-08-19 19:31:17,720 - utils.debug_utils_4137c3ee-2af9-416f-ba4d-9eccafdb54f2_logger - INFO - Top unchanged, Scroll top: 188.8000030517578, l

Unnamed: 0,comment_id,is_reply,author_name,author_url,publish_time,parent_comment_id,video_id,author_thumbnail,content_text,like_count
0,UgweEv9ZpDpLlmoEAGh4AaABAg,False,@tenamsb686,/channel/UCzTEZmIBjklFKbY6wUa9l7Q,2周前,,nLRL_NcnK-4,https://yt3.ggpht.com/ytc/AOPolaQyj5IDrUzPh9Wy...,"Dear beginners, I wish you to know a few thing...",252
1,UgwPcg4TzDT1JOBEJxl4AaABAg,False,@krishna_3406,/channel/UCwmZI0eYQhKfpbE_g7LM4cg,3个月前,,nLRL_NcnK-4,https://yt3.ggpht.com/ytc/AOPolaRNU1hDKEeEGQx4...,Course Contents \r\n(00:00:00) Introduction\r...,900
2,UgyIOF690CM-xQKNiwR4AaABAg,False,@timkohn4194,/channel/UCmBD-P_0ojjuiDeD_2BqEuw,3个月前,,nLRL_NcnK-4,https://yt3.ggpht.com/ytc/AOPolaRBJPMh1zja9V3x...,He is such an insanely good instructor. For th...,545
3,UgwHZq8K7mQw-92PH3x4AaABAg,False,@Teejito,/channel/UCX2oeeHLGgQBlRJov3HUq5Q,5天前,,nLRL_NcnK-4,https://yt3.ggpht.com/ytc/AOPolaTfpSlZGSe8Vgqk...,"David really is a great teacher, as someone wh...",7
4,Ugz4HNnaG1oO5ZLRtyp4AaABAg,False,@markkennedy9767,/channel/UCfQdrlEhcUULOciHiJ9_SFQ,6天前,,nLRL_NcnK-4,https://yt3.ggpht.com/ytc/AOPolaQJ7Jm2xn8gCHi3...,This is really good. He really goes into the d...,9
5,UgxN4DO0YK5k8yJdtYN4AaABAg,False,@PaulW-mc5fr,/channel/UCth0WrdTuwi6cxetaC7jGfw,3个月前,,nLRL_NcnK-4,https://yt3.ggpht.com/ytc/AOPolaQ1DLlQlvHAb3at...,"Firstly, a big thank you to the owners of this...",244
6,UgwnDPBMaikwI5SQQ6V4AaABAg,False,@JeremyJanzen,/channel/UC7beuFtPlQeIWhei2z3oEQA,2个月前,,nLRL_NcnK-4,https://yt3.ggpht.com/ytc/AOPolaTyTjuMZqPng2ac...,I’m not a beginner. But I found the OOP and U...,76
7,UgwhGLbcFUl46DyKEc14AaABAg,False,@summertube1,/channel/UCWpMU29011Y8vspH-X9SCeQ,2天前,,nLRL_NcnK-4,https://yt3.ggpht.com/ytc/AOPolaQ4h-6Qz7w33hL_...,When a teacher really knows his stuff. A huge ...,1
8,UgySfhOpJLP6h2QJVVh4AaABAg,False,@Mr786Addzy,/channel/UC79gzPrOH4eWxQ9j1ONKBGw,8天前,,nLRL_NcnK-4,https://yt3.ggpht.com/ytc/AOPolaTBseMkjXN8jN3y...,"This is priceless, thank you Free Code Camp fo...",1
9,UgxYFhiHMAy2ARRCb_J4AaABAg,False,@pardiedanser,/channel/UCf-0CO07zJK6HFeH2pgB8fA,2个月前,,nLRL_NcnK-4,https://yt3.ggpht.com/ytc/AOPolaQDBGRnCsQ2WKNA...,So privileged to get to watch this for free. W...,56
