# YouTube Smart Crawling
* Author: 고지형, [iloveslowfood](https://github.com/iloveslowfood)
* 유튜브 채널의 기본 정보와 구독자 추이, 조회수 추이를 수집한다.
* 기본 정보: 영상 조회수, 영상 길이 등
* 구독자 추이, 조회수 추이는 최근 360일까지 수집된다.

In [6]:
import os
import time
import json
import glob
from tqdm import tqdm
import datetime
from datetime import timedelta
from tqdm import tqdm

import pandas as pd
import numpy as np

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys

## Manual
* INPUT: YTCCCrawler로 수집된 채널 URL 데이터
* 크롤러 객체 생성
```python
crawler = SmartCrawler(
    driver_path, # 크롬드라이버 경로
    save_path # 수집한 데이터의 저장 경로
)
```
* 크롤러 실행
```python
crawler.work(channel_list) # channel_list: YTCCCrawler에 의해 수집된 URL 데이터
```

In [2]:
channel_list = pd.read_csv('../raw/channel_list_지형.csv')
save_path = '../raw/test' # 파일을 저장할 폴더
driver_path = './drivers/chromedriver.exe' # 크롬드라이버 저장 경로

# generate Crawler object
crawler = SmartCrawler(driver_path, save_path)

In [None]:
crawler.work(channel_list.head(10))

In [7]:
class SmartCrawler:
    def __init__(self, driver_path, save_path):
        self.driver_path = driver_path
        self.save_path = save_path
        
    def work(self, channel_list: '채널명, 채널 url 컬럼을 지닌 데이터프레임'):
        for _, channel in channel_list.iterrows():
            name, url = channel['channel'], channel['url']
            channel_save_path = os.path.join(self.save_path, self.correct_file_name(name))
            try:
                os.mkdir(channel_save_path)
            except:
                pass
            
            trend_crawler = TrendCrawler(self.driver_path, channel_save_path)
            meta_crawler = MetaCrawler(self.driver_path, channel_save_path)

            try:
                trend_crawler.work(name=name)
                meta_crawler.work(url=url)
            except:
                continue    
            
            
    @staticmethod
    def correct_file_name(title):
        invalid_file_name_list = ['\\', '/', ':', '*', '?', '"', '<', '>', '|']
        for inv in invalid_file_name_list:
            if inv in title:
                title = title.replace(inv, '')
        return title


class TrendCrawler:
    URL = 'https://kr.noxinfluencer.com/'
    def __init__(self, driver_path, save_path):
        self.driver_path = driver_path
        self.save_path = save_path
    
    def work(self, name: '수집할 채널의 이름'):
        '''일해라 로봇'''
        
        self.driver = webdriver.Chrome(self.driver_path)
        self.driver.get(self.URL)
        self.into_channel(name)
        print(f'Getting channel trend from {name}...', end='\t')
        sub_trend = self.get_trend(name, trend_type='sub_trend')
        view_trend = self.get_trend(name, trend_type='view_trend')

        start_date = str(view_trend['date'].iloc[0])[:10]
        cumul_start_view = self.get_trend(name, trend_type='cumul_start_view', start_date=start_date)

        result = self.wrap(sub_trend, view_trend, cumul_start_view)
        file_name = f'{self.correct_file_name(name)}_trend.csv'
        result.to_csv(os.path.join(self.save_path, file_name), index=False)
        print(f'{file_name} saved.\n')
        self.driver.close()
    
    def get_trend(self, name, trend_type, start_date: '누적 조회수 수집에 사용'=None):
        '''추이를 크롤링하는 함수'''
        if trend_type == 'cumul_start_view':
            cumul_start_view = self.grope(trend_type, start_date)
            return cumul_start_view
        
        graph_elements = self.grope(trend_type)
        if trend_type == 'sub_trend':
            date_list = []
            n_sub_list = []
            for n in range(1, graph_elements['date_interval']):
                self.move_cursor(n_offset=n, origin=graph_elements['start_point'], pix=graph_elements['pix'])
                soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                info = soup.find_all('div', id="channel-history-sub-chart")[0].get_text()
                if '획기적' in info:
                    try:
                        try:
                            date = (pd.to_datetime(date) + timedelta(1)).strftime('%Y-%m-%d')
                        except:
                            date = np.nan
                    except:
                        date = pd.to_datetime('1900-01-01')
                    n_sub = self.calc_n_str(info.split(':')[-1].split('구독자 ')[-1])
                else:
                    date = info[:10]
                    n_sub = self.calc_n_str(info[10:])
                date_list.append(date)
                n_sub_list.append(n_sub)

            sub_trend = pd.DataFrame(dict(date=date_list, subscriber=n_sub_list))
            sub_trend['date'] = pd.to_datetime(sub_trend['date'])
            sub_trend = self.correct_timeline(sub_trend)
            sub_trend = sub_trend.drop_duplicates(ignore_index=True)

            return sub_trend
        
        elif trend_type == 'view_trend':
            date_list = []
            n_view_list = []
            for n in range(1, graph_elements['date_interval']):
                self.move_cursor(n_offset=n, origin=graph_elements['start_point'], pix=graph_elements['pix'])
                soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                info = soup.find('div', id="channel-history-view-chart").find_all('div')[-1].get_text()
                date = info[:10]
                if '조회수' in info[10:]:
                    n_view = self.calc_n_str(info[10:].strip().split('조회수')[0])
                else:
                    n_view = self.calc_n_str(info[10:])

                date_list.append(date)
                n_view_list.append(n_view)

            view_trend = pd.DataFrame(dict(date=date_list, view=n_view_list))
            view_trend['date'] = pd.to_datetime(view_trend['date'])
            view_trend = self.correct_timeline(view_trend)
            view_trend = view_trend.drop_duplicates(ignore_index=True)
            
            return view_trend
        
    def grope(self, trend_type, start_date:'cumul_view_start(누적 조회수 초기값)를 구할 때만 사용'=None):
        '''추출할 트렌드의 날짜 범위, 그래프 내 좌표 범위를 추출하는 함수'''
        if trend_type == 'sub_trend':
            wait = WebDriverWait(self.driver, 10)
            self.element = wait.until(lambda x: x.find_element_by_xpath('//*[@id="channel-history-sub-chart"]/div[1]/canvas'))
        elif trend_type == 'view_trend':
            wait = WebDriverWait(self.driver, 10)
            self.element = wait.until(lambda x: x.find_element_by_xpath('//*[@id="channel-history-view-chart"]/div[1]/canvas'))
        elif trend_type == 'cumul_start_view':
            CUMUL = self.driver.find_element_by_xpath('//*[@id="tab-channel"]/div[5]/div[1]/div/span[2]')
            CUMUL.click()
        else:
            raise NotImplementedError()
        
        # 그래프 위치 찾기
        loc = self.element.location
        size = self.element.size
        origin = self.element.size['width'] // 2
        self.move_cursor(n_offset=0, origin=origin)

        start_origin = -origin + 60
        end_origin = origin - 20
        
        if trend_type in ['sub_trend', 'view_trend']:
            start_point, start_date = self.find_edges(start_origin, trend_type, 'start')
            end_point, end_date = self.find_edges(end_origin, trend_type, 'end')
            pix_interval = end_point - start_point
            date_interval = (end_date - start_date).days
            pix = pix_interval / date_interval
            return dict(date_interval=date_interval, start_point=start_point, pix=pix)
        
        else: # 'cumul_start_view'
            margin = 0
            while True:
                self.move_cursor(n_offset=0, origin=start_origin+12-margin)
                soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                info = soup.find('div', id="channel-history-view-chart").find_all('div')[-1].get_text()
                compare = info[:10]
                if start_date == compare:
                    cumul_start_view = self.calc_n_str(info[10:].strip().split('조회수')[0]) if '조회수' in info[10:] else self.calc_n_str(info[10:])
                    break
                margin += 1        
            return cumul_start_view
    
    def find_edges(self, origin, trend_type, option, margin=-2) -> ('point', 'date'):
        '''그래프의 끝과 끝 위치값을 탐색하는 함수'''
        if trend_type == 'sub_trend':
            
            self.move_cursor(n_offset=0, origin=0)
            soup = BeautifulSoup(self.driver.page_source, 'html.parser')
            standard = soup.find_all('div', id="channel-history-sub-chart")[0].get_text()[:10]
            
            if option=='start':
                compare = None
                while True:
                    self.move_cursor(n_offset=0, origin=origin)
                    soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                    temp = soup.find_all('div', id="channel-history-sub-chart")[0].get_text()
                    compare = temp[:10]

                    if compare != standard:
                        if '획기적' not in compare: # '획기적 사건'이 아닌 일반적인 날짜
                            origin += margin
                            start_date = pd.to_datetime(soup.find_all('div', id="channel-history-sub-chart")[0].get_text()[:10])
                            start_point = origin
                            continue

                        else: # '획기적 사건'이 나올 경우
                            while True:
                                self.move_cursor(n_offset=0, origin=origin)
                                soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                                pseudo_start = soup.find_all('div', id="channel-history-sub-chart")[0].get_text()[:10]
                                start_point = origin
                                if '획기적' not in pseudo_start:
                                    start_date = pd.to_datetime(pseudo_start[:10]) - timedelta(1)
                                    break
                                else:
                                    origin -= margin
                                    continue
                            break
                    else:
                        break
                return start_point, start_date

            else:
                compare = None
                while True:
                    self.move_cursor(n_offset=0, origin=origin)
                    soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                    temp = soup.find_all('div', id="channel-history-sub-chart")[0].get_text()
                    compare = temp[:10]

                    if compare != standard:
                        if '획기적' not in compare: # '획기적 사건'이 아닌 일반적인 날짜
                            origin -= margin
                            end_date = pd.to_datetime(soup.find_all('div', id="channel-history-sub-chart")[0].get_text()[:10])
                            end_point = origin
                            continue

                        else: # '획기적 사건'이 나올 경우
                            while True:
                                self.move_cursor(n_offset=0, origin=origin)
                                soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                                pseudo_end = soup.find_all('div', id="channel-history-sub-chart")[0].get_text()[:10]
                                end_point = origin
                                if '획기적' not in pseudo_end:
                                    end_date = pd.to_datetime(pseudo_end[:10]) - timedelta(1)
                                    break
                                else:
                                    origin += margin
                                    continue
                            break
                    else:
                        break
                return end_point, end_date

        elif trend_type == 'view_trend':
            self.move_cursor(n_offset=0, origin=0)
            soup = BeautifulSoup(self.driver.page_source, 'html.parser')
            standard = soup.find('div', id="channel-history-view-chart").find_all('div')[-1].get_text()[:10]
            if option=='start':
                compare = None
                while True:
                    self.move_cursor(n_offset=0, origin=origin)
                    soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                    temp = soup.find('div', id="channel-history-view-chart").find_all('div')[-1].get_text()
                    compare = temp[:10]

                    if compare != standard:
                        origin += margin
                        start_date = pd.to_datetime(compare)
                        start_point = origin
                        continue
                    else:
                        break
                return start_point, start_date

            else:
                compare = None
                while True:
                    self.move_cursor(n_offset=0, origin=origin)
                    soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                    temp = soup.find('div', id="channel-history-view-chart").find_all('div')[-1].get_text()
                    compare = temp[:10]

                    if compare != standard:
                        origin -= margin
                        end_date = pd.to_datetime(compare)
                        end_point = origin
                        continue
                    else:
                        break
                return end_point, end_date
            
    def move_cursor(self, n_offset, origin, pix=2):
        action = webdriver.common.action_chains.ActionChains(self.driver)
        action.move_to_element(self.element)
        action.move_by_offset(origin + pix*n_offset, 0)
        action.perform()
        
    def into_channel(self, channel_name):
        CHANNEL = self.driver.find_element_by_xpath('//*[@id="header-search-input"]')
        CHANNEL.clear()
        CHANNEL.send_keys(channel_name)
        time.sleep(0.5)
        CHANNEL.send_keys(Keys.ENTER)
        
        patience = 0
        while True:
            try:
                soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                link = self.URL[:-1] + soup.find('div', class_='result').find('a', class_="channel-name ellipsis", href=True)['href']
                break
            except: 
                if patience == 10:
                    raise NotImplementedError()
                time.sleep(0.5)
                patience += 1
        
        compare = soup.find('a', class_='channel-name ellipsis').get_text().strip()
        
        if compare != channel_name.strip(): # 녹스에 채널이 등록되지 않은 경우
            print(f"Could not found channel '{channel_name.strip()}'")
            self.driver.close()
            raise NameError()
            
        self.driver.get(link)
        
    def wrap(self, sub_trend, view_trend, cumul_start_view):
        '''구독자 추이, 조회수 추이를 병합하는 함수'''
        result = sub_trend.merge(view_trend, how='outer', on='date').sort_values(by='date', ignore_index=True)
        result = self.imputate(result)
        result['subscriber'] = result['subscriber'].astype(int)
        result['view'] = result['view'].astype(int)
        result = self.add_cumul_view(result, cumul_start_view)
        return result
        
    @staticmethod
    def add_cumul_view(data:'Trend 데이터', cumul_start_view: '누적 조회수 초기값'):
        data['cumul_view'] = data['view'].cumsum() + cumul_start_view
        return data
    
    @staticmethod
    def calc_n_str(str_n):
        if '만' in str_n:
            num = str_n.split('만')[0]
            return int(float(num) * 10000)
        elif '천' in str_n:
            num = str_n.split('천')[0]
            return int(float(num) * 1000)
        else:
            return int(float(str_n))
        
    @staticmethod
    def correct_file_name(title):
        invalid_file_name_list = ['\\', '/', ':', '*', '?', '"', '<', '>', '|']
        for inv in invalid_file_name_list:
            if inv in title:
                title = title.replace(inv, '')
        return title

    @staticmethod
    def correct_timeline(data):
        idx_to_correct = data[data['date'] == pd.to_datetime('1900-01-01')].index.tolist()
        if idx_to_correct:
            idx_shift = list(np.array(idx_to_correct) + 1)
            data.loc[idx_to_correct, 'date'] = (data.loc[idx_shift, 'date'] - timedelta(1)).values
        return data
    
    @staticmethod
    def imputate(df):
        detect = df.isnull().sum()
        missing_cols = detect[detect != 0].index.tolist()
        for col in missing_cols:
            missing_idx = df[df[col].isnull()].index.tolist()
            for m in missing_idx:
                if m == 0:
                    upper_fill = df.loc[m+1:, col]
                    upper_bound = upper_fill[upper_fill.notnull()].tolist()[0]
                    fill_value = int(upper_bound)
                    pass
                elif m == df.shape[0]-1:
                    lower_fill = df.loc[:m-1, col]
                    lower_bound = lower_fill[lower_fill.notnull()].tolist()[-1]
                    fill_value = int(lower_bound)
                else:
                    lower_fill = df.loc[:m-1, col]
                    upper_fill = df.loc[m+1:, col]
                    lower_bound = lower_fill[lower_fill.notnull()].tolist()[-1]
                    upper_bound = upper_fill[upper_fill.notnull()].tolist()[0]
                    fill_value = int(np.mean([lower_bound, upper_bound]))
                df.loc[m, col] = fill_value
        return df
    
    
class MetaCrawler:
    def __init__(self, driver_path, save_path):
        self.driver_path = driver_path
        self.save_path = save_path
    
    def work(self, url: '수집할 채널의 url'):
        channels_info = pd.DataFrame()
        self.driver = webdriver.Chrome(self.driver_path)
        self.driver.get(url) # 특정 채널로 이동
        self.driver.maximize_window()

        channel_info = self.get_channel_info() # 채널 정보 수집
        if channel_info['channel'] == '채널 정지':
            self.driver.close()
            raise NotImplementedError()
        channel_name = self.correct_file_name(channel_info['channel'])
        self.driver.close()

        meta = self.get_video_info(url) # 각 영상 정보 수집(댓글 내용X)
        result = self.wrap(channel_info, meta)
            
        # 채널 하나에 대한 메타데이터 저장
        file_name = f'{channel_name}_meta.csv'
        meta.to_csv(os.path.join(self.save_path, file_name), index=False)
        result.to_csv(os.path.join(self.save_path, file_name), index=False)
        print(f"'{file_name}' saved.")
        print(f"All channels saved")
        
    def get_channel_info(self):
        '''채널 정보를 수집하는 함수(채널명, 가입일, 현재 누적 조회수)'''
        soup = BeautifulSoup(self.driver.page_source, 'html.parser')
        if soup.find('yt-formatted-string', class_="style-scope yt-alert-renderer"):
            return pd.Series(dict(channel='채널 정지', sign_in='채널 정지', cumul_view='채널 정지')).to_frame().T
        else:
            self.click(option='channel_info')
            soup = BeautifulSoup(self.driver.page_source, 'html.parser')
            channel_name = soup.find_all('yt-formatted-string', class_="style-scope ytd-channel-name")[0].get_text()
            sign_in = soup.find_all('yt-formatted-string', class_="style-scope ytd-channel-about-metadata-renderer")[-2].get_text().split('가입일: ')[-1].replace(' ', '')
            cumul_n_view = soup.find_all('yt-formatted-string', class_="style-scope ytd-channel-about-metadata-renderer")[-1].get_text().split('조회수 ')[-1][:-1].replace(',', '')
            try:
                cumul_n_view = int(float(cumul_n_view))
            except:
                pass
            return dict(channel=channel_name, sign_in=sign_in, total_cumul_view=cumul_n_view)

    def get_video_info(self, url):
        '''개별 영상 정보 수집'''
        self.driver = webdriver.Chrome(self.driver_path)
        self.driver.get(url)
        video_link_list = self.get_video_list()
        meta = pd.DataFrame() # 모든 영상 정보가 담길 데이터프레임
        for video_url in tqdm(video_link_list):
            self.driver.get(video_url)
            patience = 0
            while True:
                try:
                    self.scroll_down(n=1, time_sleep=1)
                    self.soup = BeautifulSoup(self.driver.page_source, 'html.parser')
                    n_comment = self.get_n_comment(self.soup) # 댓글 수를 읽어온다
                    break
                except:
                    if patience == 5:
                        break
                    patience += 1
                    continue
            if patience == 5:
                continue
            base = pd.Series(self.squeeze(n_comment)).to_frame().T
            meta = pd.concat([meta, base], ignore_index=True)
        self.driver.close()
        return meta
    
    
    def get_video_list(self):
        '''채널 내 비디오 링크 수집'''
        self.click(option='video')
        self.scroll_down(n=100, time_sleep=1)
        
        soup = BeautifulSoup(self.driver.page_source, 'html.parser')
        video_list = soup.select(selector='ytd-app')[0].find_all(class_='yt-simple-endpoint inline-block style-scope ytd-thumbnail')

        ytb_link = 'https://www.youtube.com'
        video_link_list = []
        for idx in range(len(video_list)):
            try:
                link = video_list[idx]['href']
                video_link_list.append(ytb_link + link)
            except:
                pass
        print(f'Got {len(video_link_list)} video links.')
        return video_link_list
    
    @staticmethod
    def get_n_comment(soup):
        '''영상의 댓글 수를 탐지하는 함수'''
        if soup.find_all('span', class_="style-scope yt-formatted-string"):
            is_stop = False
            for s in soup.find_all('span', class_="style-scope yt-formatted-string"):
                if s.get_text() == '댓글이 사용 중지되었습니다. ':
                    is_stop = True
            if is_stop:
                return '댓글 사용 중지'
            else:
                selector = 'ytd-comments > ytd-item-section-renderer > div > ytd-comments-header-renderer > div > h2'
                n_comment = int(soup.select(selector)[0].get_text().split(' ')[-1].split('개')[0].replace(',', ''))
                return n_comment
        else: 
            selector = 'ytd-comments > ytd-item-section-renderer > div > ytd-comments-header-renderer > div > h2'
            n_comment = int(soup.select(selector)[0].get_text().split(' ')[-1].split('개')[0].replace(',', '')) 
            return n_comment
    
    def squeeze(self, n_comment, selector='ytd-app > div > ytd-page-manager > ytd-watch-flexy'):
        '''채널명, 구독자 수, 영상 제목, 조회수, 영상 길이, 게시일, 상세 정보, 댓글 수, 좋아요, 싫어요, 썸네일 url'''
        temp = self.soup.select(selector)
        try:
            video_basic_info = json.loads(temp[0].find_all('script', class_="style-scope ytd-player-microformat-renderer", id='scriptTag')[0].text)
        except:
            video_basic_info = json.loads(temp[0].find_all('script', class_="style-scope ytd-player-microformat-renderer", id='scriptTag')[0].string)

        channel_name = video_basic_info['author']
        title = video_basic_info['name']
        post_date = video_basic_info['uploadDate']
        duration = round(int(video_basic_info['duration'].split('PT')[-1].split('S')[0]) / 60, 2)
        description = video_basic_info['description']
        thumbnail_url = video_basic_info['thumbnailUrl'][0]
        genre = video_basic_info['genre']
        isLive = video_basic_info.get('publication', False)
        
        # 실시간/최초공개 영상
        if isLive: 
            try:
                n_like = int(temp[0].find_all('yt-formatted-string')[5].get_attribute_list('aria-label')[0].split('좋아요 ')[-1][:-1].replace(',', ''))
            except:
                n_like = 0 # 좋아요/싫어요 숨김
            try:
                n_dislike = int(temp[0].find_all('yt-formatted-string')[6].get_attribute_list('aria-label')[0].split('싫어요 ')[-1][:-1].replace(',', ''))
            except:
                n_dislike = 0 # 좋아요/싫어요 숨김
            n_subscribe = temp[0].find_all('yt-formatted-string', class_="style-scope ytd-video-owner-renderer", id="owner-sub-count")[0].get_text()
            n_subscribe = self.calc_n_subscribe(n_subscribe)
            n_view = temp[0].find_all('span', class_="view-count style-scope yt-view-count-renderer")[0].get_text()
            n_view = self.calc_n_view(n_view)
        
        # 그 외
        else: # 그 외
            try:
                n_like = int(temp[0].find_all('yt-formatted-string', class_="style-scope ytd-toggle-button-renderer style-text")[0].get_attribute_list('aria-label')[0].split('좋아요 ')[-1][:-1].replace(',', ''))
            except:
                n_like = 0 # 좋아요/싫어요 숨김
            try:
                n_dislike = int(temp[0].find_all('yt-formatted-string', class_="style-scope ytd-toggle-button-renderer style-text")[1].get_attribute_list('aria-label')[0].split('싫어요 ')[-1][:-1].replace(',', ''))
            except:
                n_dislike = 0 # 좋아요/싫어요 숨김
            n_subscribe = temp[0].find_all('yt-formatted-string', class_="style-scope ytd-video-owner-renderer", id="owner-sub-count")[0].get_text()
            n_subscribe = self.calc_n_subscribe(n_subscribe)
            n_view = temp[0].find_all('span', class_="view-count style-scope yt-view-count-renderer")[0].get_text()
            n_view = self.calc_n_view(n_view)
        return dict(channel=channel_name, subscribe=n_subscribe, title=title, genre=genre, view=n_view, 
                    duration=duration, date=post_date, description=description, comment=n_comment, 
                    like=n_like, dislike=n_dislike, thumbnail=thumbnail_url)
    
    
    def scroll_down(self, n, time_sleep):
        '''브라우저의 스크롤을 내리는 함수'''
        recent_point = 0
        n_scroll = 0
        while True:
            end_point = self.driver.execute_script("return document.documentElement.scrollHeight")
            self.driver.execute_script(f"window.scrollTo(0, {end_point});")
            time.sleep(time_sleep)
            recent_point = self.driver.execute_script("return document.documentElement.scrollHeight")
            if (recent_point == end_point) or (n_scroll == n):
                break
            n_scroll += 1
            
            
    def click(self, option):
        soup = BeautifulSoup(self.driver.page_source, 'html.parser')
        n_tab = len(soup.find_all('paper-tab', class_="style-scope ytd-c4-tabbed-header-renderer"))
        if option=='video':
            video = self.driver.find_element_by_xpath('//*[@id="tabsContent"]/paper-tab[2]/div')
            video.click()
            time.sleep(1)
        elif option=='channel_info':
            info = self.driver.find_element_by_xpath(f'//*[@id="tabsContent"]/paper-tab[{n_tab}]/div')
            info.click()
            time.sleep(1)
        else:
            raise NotImplementedError()
            
    @staticmethod
    def wrap(channel_info, meta):
        meta['sign_in'] = channel_info['sign_in']
        meta['total_cumul_view'] = channel_info['total_cumul_view']
        return meta
    
    @staticmethod    
    def correct_file_name(title):
        invalid_file_name_list = ['\\', '/', ':', '*', '?', '"', '<', '>', '|']
        for inv in invalid_file_name_list:
            if inv in title:
                title = title.replace(inv, '')
        return title
    
    @staticmethod    
    def calc_n_subscribe(text):
        mined_text = text.split('구독자 ')[-1]
        if '천' in text:
            return int(float(mined_text[:-2]) * 1000)
        elif '만' in text:
            return int(float(mined_text[:-2]) * 10000)
        else:
            try:
                output = int(float(mined_text))
            except:
                output = int(float(mined_text[:-1]))
            return output
        
    @staticmethod    
    def calc_n_view(text):
        return int(text.split('조회수 ')[-1][:-1].replace(',', ''))
    
    @staticmethod    
    def calc_comment_like(text):
        return int(text.split('명이')[0].split(' ')[-1].replace(',', ''))