<a href="https://colab.research.google.com/github/respect5716/webshooter/blob/main/examples/naver-movie-review.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Naver Moive Review

## 1. Setup

In [None]:
!pip install webshooter

In [1]:
import pandas as pd
from bs4 import BeautifulSoup

from webshooter import StaticScraper, url_to_soup, static_request

In [2]:
import logging
logging.root.setLevel(logging.INFO)

## 2. Define

In [3]:
app = StaticScraper(progbar = True)
app.set_var('movie_id', 187323)

## 3. Funcs

In [4]:
def get_page_url(movie_id, page, order='newest'):
    return f'https://movie.naver.com/movie/bi/mi/pointWriteFormList.naver?code={movie_id}&order={order}&page={page}'

@app.register('browse')
def browse():
    url = get_page_url(app.v.movie_id, 1)
    soup = url_to_soup(url)
    cnt = int(soup.select('div.score_total strong em')[0].text.replace(',', ''))
    page, div = divmod(cnt, 10)
    page += bool(div)

    urls = [get_page_url(app.v.movie_id, p) for p in range(1, page+1)]
    return urls

@app.register('parse', multiprocess=True)
def parse(html):
    soup = BeautifulSoup(html, 'html.parser')
    
    res = []
    reviews = soup.select('div.score_result ul li')
    for idx, review in enumerate(reviews):
        text = review.select(f'span#_filtered_ment_{idx}')[0].text.strip()
        spoiler = bool(review.select('span._unfold_ment'))
        score = int(review.select('div.star_score em')[0].text)
        date = review.select('dt em')[-1].text
        sympathy = int(review.select('a._sympathyButton strong')[0].text)
        not_sympathy = int(review.select('a._notSympathyButton strong')[0].text)

        res.append({'text': text, 'score': score, 'spoiler': spoiler, 'date': date, 'sympathy': sympathy, 'not_sympathy': not_sympathy})

    return res

## 4. Run

In [5]:
data = app.run()
data.head()

INFO:root:All functions are registered!
INFO:root:Browsing started
INFO:root:Browsing finished
INFO:root:Requesting started


request:   0%|          | 0/206 [00:00<?, ?it/s]

INFO:root:Requesting finished
INFO:root:Parsing started


parse:   0%|          | 0/206 [00:00<?, ?it/s]

INFO:root:Parsing finished
INFO:root:Merging started
INFO:root:Merging finished
INFO:root:Postprocessing started
INFO:root:Postprocessing finished


Unnamed: 0,text,score,spoiler,date,sympathy,not_sympathy
0,알바한테 속지마세요 개씝노잼,2,False,2021.10.18 12:55,0,0
1,너무재미있게봤어요 유머도있고 액션도 좋아요ㅎㅎ 내기준1편보다 재밌었어요!,10,False,2021.10.18 12:51,0,0
2,푹 잤습니다.. 역대급 폭망영화,2,False,2021.10.18 12:47,0,0
3,별점 6.5 주고싶은데.. 평점이 너무 낮아 그냥 10으로 줌. 후반분 액션씬은 볼...,10,False,2021.10.18 12:40,0,0
4,마블영화중에 갠적으론 최악입니다 ㅠ,2,False,2021.10.18 12:39,0,0
