# 목적
* 네이버 블로그에 검색어를 넣었을 때 나오는 블로그들을 크롤링
* 검색시 방법은 특정기간, 월별, 연별 검색에 따라 사용방법이 다소다름 (현재 월별최적화)

# 선행설치

In [1]:
#선행설치
# # 크롤링
# !pip install beautifulsoup4
# !pip install requests
# # 자연어처리
# !pip install konlpy (JDK 설치가 되어있어야함 )
# # 워드클라우드
# !pip install wordcloud

In [2]:
# import

from tqdm import tqdm_notebook  # 진행과정 시각화
from datetime import timedelta  # 시간날짜

import re
import os
import datetime
import pandas as pd
import numpy as np
import gc

import requests
import urllib.request
import urllib.error
import urllib.parse
from bs4 import BeautifulSoup  # 크롤링


from IPython.core.display import display, HTML
display(HTML('<style>.container {width:100% !important; }</style>'))

In [3]:
def del_outword(string):
    '''크롤링 후 이모티콘, \u200b과 같은 문자가 아닌 것 제거
    [출처](https://stackoverflow.com/questions/33404752/removing-emojis-from-a-string-in-python)
    '''
    #이모지제거
    emoji_pattern = re.compile("["
                u"\U0001F600-\U0001F64F"  # emoticons
                u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                u"\U0001F680-\U0001F6FF"  # transport & map symbols
                u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                u"\U00002702-\U000027B0"
                u"\U0001f926-\U0001f937"
                u'\U00010000-\U0010ffff'
                u"\u200d"
                u"\u2640-\u2642"
                u"\u2600-\u2B55"
                u"\u23cf"
                u"\u23e9"
                u"\u231a"
                u"\u3030"
                u"\ufe0f"
    "]+", flags=re.UNICODE)

    #분석에 어긋나는 불용어구 제외 (특수문자, 의성어)
    han = re.compile(r'[ㄱ-ㅎㅏ-ㅣ!?~"^_\n\r#\ufeff\u200d\u200b\u7643\ufffd\u682e\u62c4]+')
    
    string = emoji_pattern.sub(r'',string)
    string = han.sub(r'',string)
    
    return string

In [4]:
def time_change(x):
    '''검색결과에 있는 문자를 날짜로 변경
    네이버 블로그 크롤링시 작성일자가 1분전/1시간전/1일전으로 나타는 경우가 가끔씩 존재
    이러한 패턴을 일반적인 YY-MM-DD형태로 변환함
    그리고 월별 계산이 중요하므로 모든 데이터는 1일로 통일
    
    Return
    -------
    x : datetime
        날짜형형태로 전환
        
    Example
    -------
    >>> x = "47분 전"
    >>> time_change(x)
    datetime.datetime(2020, 6, 1, 0, 0)
    
    >>> x2 = "2019.01.31"
    >>> time_change(x2)
    datetime.datetime(2019, 1, 1, 0, 0)
    '''
    
    min_pattern = re.compile('[0-9]+'+"분 전")
    hour_pattern = re.compile('[0-9]+'+"시간 전")
    day_pattern = re.compile('[0-9]+'+"일 전")

    today = datetime.datetime.today().date()
    # 일자
    if "일" in x :
        d = re.findall(day_pattern, x)[0][0]
        x = today - timedelta(days=int(d))
    elif "시간" in x:
        d = re.findall(hour_pattern, x)[0]
        x = today
    elif "분" in x:
        d = re.findall(min_pattern, x)[0]
        x = today
    elif x == "어제":
        x = today - timedelta(days=1)
    elif type(x) == str: # 'YY.NN.DD'형태
        x = datetime.datetime.strptime(x.replace('.', '-')[:-1], '%Y-%m-%d')
    
    #모든일자를 1일로 통일
    x = datetime.datetime.strptime(x.strftime('%Y-%m')+"-01","%Y-%m-%d")
    return x

In [5]:
def naver_blog_crawling(keyword, start_num=1, end_num=101, date_option=0, date_from='', date_to='', save=True):
    '''네이버 블로그 크롤링 함수
    네이버 블로그 검색결과를 크롤링하며, 1페이지당 10개씩을 검색한다
    
    Parameters
    ----------
    keyword(string) : 검색하고 싶은 키워드를 넣는다 "keyword +필수어" 형태로 필수단어 추가 가능
    start_num(int) : (default = 1)  시작할 위치, 1로 끝나는 단위 추천
    end_num(int) : (default = 101) 끝나는 위치, 1로 끝나는 단위 추천
    date_option(int) : (default = 0) 주어지는 숫자에 의해 검색방법이 변경됨
                         0 : 전체, 2 : 1일, 3 : 1주, 4 : 1개월, 6 : 6개월, 7 : 1년, 8 : 기간지정
    date_from(YYYYMMDD) : (default = "") date_option이 8일때 사용 검색 시작일자를 지정
    date_to(YYYYMMDD) : (default = ""), date_option이 8일때 사용 검색 마지막일자를 지정
    save(bool) : (default = True)csv로 저장 여부 결정
    
    Returns 
    -------
    crawling_df : DataFrame
        post_dates title  full_text         url
      0 2010-01-01 title  [full_text]       http://blog.naver.com/PostView.nhn?blogId=wend...  
    
    real_length : int
        crawling_df의 row수 
    '''
    # url 찾는 패턴
    pattern = re.compile('href="'+'[A-z0-9\:\/\&\;\.\?\=]+')

    # 저장위치
    postdates = []
    strings = []
    urls = []
    titles = []
    output_error = []
    connection_error = []
    count = 1

    # keyword와 시작넘버만 바꾸면서 진행하게끔
    base_url = 'https://search.naver.com/search.naver?date_from={date_from}&date_option={date_option}&date_to={date_to}&dup_remove=1&nso=&post_blogurl=&post_blogurl_without=&query={keyword}&sm=tab_pge&srchby=all&st=sim&where=post&start={start}'

    try:
        # check length
        search_list = base_url.format(keyword=keyword, start=start_num,
                                      date_option=date_option, date_from=date_from, date_to=date_to)
        response = requests.get(search_list)
        soup = BeautifulSoup(response.content, 'lxml')
        max_searched = soup.select("div.section_head")[0].text.split('/')[1]
        max_searched = int(re.sub('[가-힣 ,]+', "", max_searched))
        end_num = max_searched
    except:
        pass

    # for문 돌려야하는 부분(각 검색결과의 시작은 1이고, 10개씩 보여짐,)
    for i in tqdm_notebook(range(start_num, end_num+10, 10), desc = 'section'):
        search_list = base_url.format(keyword=keyword, start=i,
                                      date_option=date_option, date_from=date_from, date_to=date_to)
        response = requests.get(search_list)

        if response.status_code is 200:
            # 1번 검색시 10개의 결과가 출력 따라서 section은 총 10개
            soup = BeautifulSoup(response.content, 'lxml')
            sections = soup.findAll('li', attrs={'class': 'sh_blog_top'})

            for section in sections:
                try:
                    # href부분만 가져오기 어려워서 정규표현식으로 검색
                    url = re.findall(pattern, str(section))[0].replace(
                        '?Redirect=Log&amp;logNo=', '/').replace('href="', '')
                    title = section.select_one('a.txt84').text
                    date = section.select_one('dd.txt_inline').text.strip()

                    # 블로그 url안에 들어가기(아직 크롤링불가)
                    get_blog_post_content_code = requests.get(url)
                    get_blog_post_content_text = get_blog_post_content_code.text
                    get_blog_post_content_soup = BeautifulSoup(
                        get_blog_post_content_text, 'lxml')

                    # 크롤링가능한 url에 접속
                    real_blog_post_url = "http://blog.naver.com" + \
                        get_blog_post_content_soup.select('#mainFrame')[
                            0].get('src')
                    get_real_blog_post_content_code = requests.get(
                        real_blog_post_url)
                    get_real_blog_post_content_text = get_real_blog_post_content_code.text
                    get_real_blog_post_content_soup = BeautifulSoup(
                        get_real_blog_post_content_text, 'lxml')

                    # url (에러나면 위에서부터 에러남)
                    urls.append(real_blog_post_url)
                    # 블로그명
                    titles.append(title)
                    # 날짜
                    postdates.append(date)

                    # 본문  (postviewarea 패턴과 se-main-container 2가지 유형이 있어 분리함)
                    try:
                        blog_post_content = get_real_blog_post_content_soup.select(
                            'div#postViewArea')
                        if len(blog_post_content) == 0:
                            blog_post_content = get_real_blog_post_content_soup.select(
                                'div.se-main-container')
                            if len(blog_post_content) == 0:
                                blog_post_content = get_real_blog_post_content_soup.select(
                                    'div.se_component_wrap.sect_dsc.__se_component_area')

                        string = ""
                        for sentence in blog_post_content[0].stripped_strings:
                            string += " "+sentence.replace('\xa0', " ")
                            # 비언어 텍스트제거
                            string = del_outword(string)
                        # 공백에러대처
                        blank_check = string.replace(" ", "")
                        if len(blank_check) == 0:
                            strings.append(["X"])
                        else:
                            strings.append([string])
                        count += 1
                    except:
                        strings.append(["X"])
                        output_error.append(count)
                        count += 1

                except Exception as ex:
                    # print('가져오기에러 {num}번째'.format(num = count),ex)
                    output_error.append(count)
                    count += 1
                    pass
        else:
            # print('연결오류 {num}번째'.format(num = count),response.status_code)
            connection_error.append(count)
            count += 1

    out_length = len(output_error+connection_error)
#     real_length = end_num-start_num+10-out_length

    crawling_df = pd.DataFrame(
        {"post_dates": postdates, "title": titles, "full_text": strings, "url": urls})
    crawling_df['post_dates'] = crawling_df['post_dates'].apply(
        lambda x: time_change(x))
    # 중복제거
    crawling_df.drop_duplicates("url", inplace=True)
    crawling_df.reset_index(drop=True, inplace=True)
    real_length = crawling_df.shape[0]

    # 결과출력
#     print("검색한 길이:", end_num-start_num+10)
#     print("제외된 길이:", out_length)
#     print("검색된 길이:", real_length)

    if save == True:
        today = datetime.datetime.today().date()
        str(today)
        os.makedirs(
            './output/크롤링/{keyword}'.format(keyword=keyword), exist_ok=True)
        # 저장이름 "기간_키워드_길이_날짜" ex) 20160501~20160531_물치리 +강원도_9_2020-05-07
        crawling_df.to_csv(f"./output/크롤링/{keyword}/{date_from}~{date_to}_{keyword}_{real_length}_{today}.csv", encoding='utf-8', index=False)
    return crawling_df, real_length

In [6]:
def make_date_list(start_year,end_year,month_gap = 1,year_gap = 1, types = 'month'):
    '''검색에 사용할 날짜 list를 생성
    시작년도, 마지막년도를 입력하고 월별간격, 연별 간격을 입력하면
    검색에 사용할 "YYMMDD"형태의 날짜 list를 반환
    
    Parameters
    ----------
    start_year(int) : 검색을 시작할 연도
    end_year(int) : 검색을 종료할 연도
    month_gap(int) : 몇개월 단위로 검색할 것인지
    year_gap(int) : 몇년 단위로 검색할 것인지
    types(string) : 'year' or 'month'로 연간검색 혹은 월간검색지정
    
    Returns 
    -------
    start_date, end_date : list
        YYMMDD형태의 string형태의 날짜들은 반환한다
    
    Examples
    --------
    >>> make_date_list(start_year = 2010, end_year = 2011,month_gap = 1,year_gap = 1, types = 'month')
    start_date = ['20100101', '20100701', '20110101', '20110701']
    end_date = ['20100131', '20100731', '20110131', '20110731']
    
    >>> make_date_list(start_year = 2010, end_year = 2013,year_gap = 1, types = 'year')
    start_date = ['20100101', '20110101', '20120101', '20130101']
    end_date = ['20101231', '20111231', '20121231', '20131231']
    '''
    #일자
    month_gap = month_gap*100
    start_month = []
    end_month = []
    
    #MMDD 형태 추출
    for date in range(101,1202,month_gap):
        start_month.append(date)
    start_month = list(map(lambda x : '{:0>4}'.format(str(x)),start_month))

    for date in range(131,1232,month_gap):
        end_month.append(date)
    end_month = list(map(lambda x : '{:0>4}'.format(str(x)),end_month))

    start_date = []
    end_date = []
    
    #YYMMDD 형태로 추출
    if types.lower() == 'month':
        for year in range(start_year,end_year+1,year_gap):
            for date in start_month:
                start_date.append(str(year)+date)
            for date in end_month:
                end_date.append(str(year)+date)
    elif types.lower() == 'year':
        for year in range(start_year,end_year+1,year_gap):
            start_date.append(str(year)+'0101')
            end_date.append(str(year)+'1231')
            
        
    return start_date,end_date

In [7]:
# 특정기간 설정
word_list = ["물치","설악해수욕장","낙산","오산","수산항","동호","하조대","하광정리","기사문","동산","인구","광진","남애"]
mustword = "양양"

start_num = 1
end_num = 701
date_option = 8
date_from = "20150101"
date_to = "20151231"
today = datetime.datetime.today().date()

In [8]:
#월별 설정
word_list =["가족","커플","홀로"]
mustword = "강릉"
start_num = 1
end_num = 991
date_option = 8
start_year = 2010
end_year = 2019
month_gap = 1
year_gap = 1
today = datetime.datetime.today().date()
#날짜범위함수 
date_from,date_to = make_date_list(start_year,end_year,month_gap,year_gap)

In [9]:
#불용어
sw = list(pd.read_excel("stopword(cp949).xlsx",encoding = 'cp949')['불용어']) #불용어 불러오기

In [10]:
def searching_all(word_list,date_from, date_to, start_num = 1, end_num = 991, date_option = 8, mustword = ""):
    ''' 검색할 단어가 많을경우 사용
    
    Parameters
    ----------
    word_list(list): 검색할 단어들 
    date_from(list or int) : 검색을 시작할 날짜
    date_to(list or int) : 검색을 마칠 날짜 
    start_num(int) : (default = 1) 검색시작할 페이지 
    end_num(int) : (default= 991) 검색을 마칠 페이지 #네이버검색은 최대 1000개의 결과물만 보여줌
    mustword(string) : (default = '') 필수단어 지정
    
    '''
    #키워드별 
    for i in tqdm_notebook(range(0,len(word_list)), desc = 'Total'):
        keyword = word_list[i] + " +" + mustword
        print('{num}번째/{total_num}번째 {keyword}'.format(num = i+1, total_num = len(word_list),keyword = keyword))
        #날짜가 연별,월별인경우 
        if type(date_from) == list:
            for date_num in tqdm_notebook(range(0,len(date_from)),desc = "Time"):
                date_from_one = date_from[date_num]
                date_to_one = date_to[date_num]
                crawling_df, real_length = naver_blog_crawling(keyword, start_num, end_num, date_option, date_from_one, date_to_one)
#                 output_graph(okt,crawling_df,keyword,real_length,sw,date_from_one,date_to_one,unique=False)
        #단일날짜의 경우 
        else:
            crawling_df, real_length = naver_blog_crawling(keyword, start_num, end_num, date_option, date_from, date_to)
#             output_graph(okt,crawling_df,keyword,real_length,sw,date_from,date_to,unique=False)
        gc.collect()

In [11]:
searching_all(word_list = word_list, mustword = mustword, start_num = start_num, end_num = end_num, date_option = date_option, date_from = date_from, date_to = date_to)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  from ipykernel import kernelapp as app


HBox(children=(FloatProgress(value=0.0, description='Total', max=3.0, style=ProgressStyle(description_width='i…

1번째/3번째 가족 +강릉


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, description='Time', max=120.0, style=ProgressStyle(description_width='…

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, description='section', max=54.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=52.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=48.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=47.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=49.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=45.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=64.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=62.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=46.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=51.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=40.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=46.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=57.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=57.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=48.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=56.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=55.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=70.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=79.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=88.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=67.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=63.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=66.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=81.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=124.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=110.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=74.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=68.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=81.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=85.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=103.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=101.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=74.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=96.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=79.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=75.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=94.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', style=ProgressStyle(description_width='initial'…




HBox(children=(FloatProgress(value=0.0, description='section', max=101.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=103.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=105.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=108.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=114.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=139.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=107.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=103.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=96.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=99.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=125.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=112.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=107.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=123.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=131.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=144.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=169.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=174.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=149.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=135.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=110.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=120.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=141.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=139.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=130.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=144.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=166.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=167.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=187.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=227.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=174.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=161.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=138.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=155.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=175.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=164.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=156.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=168.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=186.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=200.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=210.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=258.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=196.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=167.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=155.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=170.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=208.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=219.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=215.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=205.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=241.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=245.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=241.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=290.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=260.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=247.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=253.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=323.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=354.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=427.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=291.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=281.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=316.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=292.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=383.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=483.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=407.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=337.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=324.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=417.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=446.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=421.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=385.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=411.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=405.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=420.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=537.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=623.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=511.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=471.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=401.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=489.0, style=ProgressStyle(description_widt…



2번째/3번째 커플 +강릉


HBox(children=(FloatProgress(value=0.0, description='Time', max=120.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='section', max=9.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=8.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=10.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=9.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=9.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=10.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=12.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=10.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=9.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=9.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=8.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=8.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=8.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=8.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=9.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=12.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=9.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=13.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=12.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=14.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=9.0, style=ProgressStyle(description_width=…




HBox(children=(FloatProgress(value=0.0, description='section', max=10.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=10.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=12.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=18.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=25.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=26.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=22.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=29.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=27.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=29.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=25.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=19.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=22.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=23.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', style=ProgressStyle(description_width='initial'…




HBox(children=(FloatProgress(value=0.0, description='section', style=ProgressStyle(description_width='initial'…




HBox(children=(FloatProgress(value=0.0, description='section', max=36.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=32.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=36.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=24.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=26.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=32.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=29.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=22.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=21.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=27.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=23.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=29.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=33.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=28.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=34.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=27.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=31.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=41.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=43.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=36.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=36.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=30.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=37.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=38.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=37.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=34.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=44.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=47.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=47.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=55.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=61.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=43.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=44.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=37.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=39.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=44.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=37.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=38.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=56.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=50.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=54.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=55.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=66.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=47.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=46.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=37.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=46.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=57.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=58.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=59.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=56.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=58.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=59.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=67.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=65.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=55.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=52.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=55.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=59.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=124.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=99.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=67.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=68.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=70.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=87.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=87.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=106.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=82.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=78.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=74.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=95.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=108.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=110.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=108.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=96.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=94.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=104.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=136.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=148.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=101.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=106.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=104.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='section', max=124.0, style=ProgressStyle(description_widt…



3번째/3번째 홀로 +강릉


HBox(children=(FloatProgress(value=0.0, description='Time', max=120.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='section', max=12.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=13.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=12.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=11.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=12.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=10.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=12.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=14.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=15.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=11.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=12.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=11.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=12.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=16.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=12.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=15.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=14.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=18.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=13.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=20.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=14.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=21.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=20.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=24.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=19.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=16.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=15.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=11.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=15.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=14.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=18.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=18.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=15.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=21.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=17.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=14.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=18.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=20.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=14.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=15.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=18.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=20.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=20.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=21.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=19.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=17.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=20.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=17.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=20.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=22.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=19.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=17.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=21.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=18.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=27.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=26.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=20.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=19.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=19.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=20.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=23.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=24.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=21.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=20.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=24.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=26.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=21.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=25.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=25.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=27.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=24.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=22.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=24.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=25.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=21.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=23.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=24.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=28.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=25.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=32.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=32.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=22.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=22.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=22.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=26.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=28.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=26.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=22.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=25.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=25.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=21.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=26.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=26.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=24.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=24.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=24.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=34.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=63.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=38.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=27.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=29.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=30.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=29.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=37.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=49.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=35.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=30.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=38.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=41.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=41.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=34.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=34.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=38.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=36.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=37.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=46.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=42.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=41.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=42.0, style=ProgressStyle(description_width…




HBox(children=(FloatProgress(value=0.0, description='section', max=49.0, style=ProgressStyle(description_width…






In [12]:
def word_couple(df, year,stopword, custom_sw,keyword,verbose = False):
    ''' 데이터프레임을 전달하면 단어쌍을 계산
    명사에 한정해서 진행하며, 연도별로 나눌수 있게끔 진행
    
    Parameter
    ---------
    df(DataFrame) : 크롤링된 데이터프레임
    year(int) : 분석할 연도
    stopword(list) : 불용어
    custom_sw(dictionary) : 단어별 개별 불용어
    keyword(string) : 불용어처리를 위한 키워드
    verbose(bool): 진행과정 확인
    
    Return
    ------
    word_couple_df : DataFrame
        word1    word2    freq
    0   서핑     양양군   1187
        
    
    '''
    import matplotlib.font_manager as fm
    
    #stopword
    stopword = stopword +[keyword]
    try :
        stopword = stopword +custom_sw[keyword]
        stopword = list(set(stopword))
    except:
        pass
    
    
    df = df.fillna("")    
    df['Noun'] = df['Noun'].apply(lambda x : re.sub("[\[\]' ]","",x).split(','))
    
    year_noun = df.loc[df.year == year,'Noun']
    year_noun = year_noun.apply(lambda sentence : [word for word in sentence if word not in stopword])
    corpus =list(map(lambda words : " ".join(words),year_noun))
    
    #DTM
    vector = CountVectorizer()
    vector.fit(corpus) #코퍼스로부터 각 단어의 빈도 수를 기록한다.
    values = vector.transform(corpus).toarray()
    cols = vector.get_feature_names() # 각 단어의 인덱스가 어떻게 부여되었는지를 보여준다.
    DTM_df = pd.DataFrame(values,columns= cols)
    
    # 단어의수가 70~80개가 되는 적정범위 산출
    
    imsi = DTM_df.sum()
    count = 0
    fin = 0
    start = 0
    end = imsi.max()
    min_v = 70
    max_v = 80
    while (fin>max_v)|(fin<min_v):
        y = random.randint(start, end)
        fin = imsi[imsi>y].shape[0]

        if fin <= min_v:
            end = y
        elif fin >= max_v:
            start = y
            
        if count> 15:
            break
    count_min = y

    select_cols = imsi[imsi>count_min].index.values
    DTM_df = DTM_df[select_cols]
    cols = DTM_df.columns.values
    word_length = len(cols)
    
    #단어쌍 계산
    count_dict = {}
    for i in range(len(DTM_df)): #각 블로그글 
        sample = DTM_df.T.iloc[:,i]
        sample = sample[sample>0]
        len_max = len(sample)
        for j in range(len_max):
            for z in range(j+1,len_max):
                count_dict[sample.index[j], sample.index[z]] = count_dict.get((sample.index[j], sample.index[z]), 0) + max(sample[sample.index[j]], sample[sample.index[z]])

    count_list = []
    for words in count_dict:
        count_list.append([words[0],words[1],count_dict[words]])
    word_couple_df = pd.DataFrame(count_list, columns = ['word1','word2','freq'])
    word_couple_df = word_couple_df.sort_values(by='freq',ascending= False)
    word_couple_df.reset_index(drop =True,inplace = True)
    
    if verbose == True:
        print(keyword, year)
        print("연도별 대상 row수:",year_noun.shape[0])
        print("count_min:", count_min)
        print("단어 길이:", word_length)
        print("word_couple_df 길이:",word_couple_df.shape[0])
        print("------------------")
    
    return word_couple_df