In [None]:
import datetime
from webdriver import get_chrome_driver
from youtube import crawl_youtube_comments, get_video_titles_URLs
from utils import Time
from pandas import DataFrame

# 비디오당 최대 댓글 수
max_comments_per_video = 100    

# 찾고자 하는 비디오 제목, 링크, 댓글 블록, 댓글의 HTML에서의 위치
selector_video_title = '#video-title'
selector_comment = '#content-text'

day = datetime.date.today().strftime('%y%m%d')
save_path = f'data/YT_cmts_{day}.txt'

URL_list = {
    'Music': 'https://www.youtube.com/feed/trending?bp=4gINGgt5dG1hX2NoYXJ0cw%3D%3D',
    'Gaming': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
    'Movies': 'https://www.youtube.com/feed/trending?bp=4gIKGgh0cmFpbGVycw%3D%3D',
}
URL = URL_list['Music'] # 카테고리를 바꿔 시도해 보세요

In [None]:
driver = get_chrome_driver(headless=False)
urls, titles = get_video_titles_URLs(driver, selector_video_title, URL, location='한국')
driver.quit()

# Pandas를 사용해 데이터를 표로 출력
df = DataFrame({'Title': titles, 'URL': urls})
styles = [
    {'selector': 'th', 'props': [('text-align', 'left')]},   # Header
    {'selector': 'td', 'props': [('text-align', 'left')]}    # Data cells
]
styled = df.style.set_table_styles(styles)
display(styled)

In [None]:
start_time = Time.now()

driver = get_chrome_driver(headless=False)
crawl_youtube_comments(driver, urls, titles, selector_comment, save_path, max_comments_per_video, start_time)
driver.quit()

print('소요 시간: ' + Time.str_delta(start_time))

In [None]:
import matplotlib.pyplot as plt

plt.rcParams["font.family"] = "NanumGothic"
plt.rcParams["font.size"] = 15


# 한글 폰트 적용되었나 테스트 (깨진다면 Restart session 필요)
plt.text(0.2, 0.3, '한글', size=100) 

In [None]:
from konlpy.tag import Okt
from konlpy.corpus import kolaw, kobill
from nltk import Text
import datetime
from utils.visualize import plot_frequency_chart, draw_word_cloud


print('명사 추출 중...')
day = datetime.date.today().strftime('%y%m%d')
save_path = f'data/YT_cmts_{day}.txt'
corpus = open(save_path, encoding='utf-8').read()[:100_000] # 100,000 글자만 분석 (슬라이싱 코드 지우면 전부 분석)

# 다른 예제 말뭉치
# corpus = kolaw.open('./constitution.txt').read() # 위 코드를 주석 처리하고 corpus를 바꿔보세요
# corpus = kobill.open('./1809890.txt').read()

words = Text(Okt().nouns(corpus))
plot_frequency_chart(words, top_n=30)
draw_word_cloud(words, font_path='./source/font/주아체.ttf')