In [2]:
import re

import requests
from bs4 import BeautifulSoup

### 출력할 텍스트
menu = '''## 웹툰 크롤러 ##
1. 정보
2. 에피소드 목록
0. 나가기'''
info_msg = '''# 정보를 가져올 웹툰선택'''
episode_list_msg = '''# 에피소드 목록 ({title})'''

### 사용할 함수들
def webtoon_info(title):
    soup = BeautifulSoup(requests.get('https://comic.naver.com/webtoon/weekday.nhn').text)
    a_list = soup.select('a.title[title*="{}"]'.format(title))
    
    results = []
    for a in a_list:
        href = a['href']
        m = re.search(r'titleId=(\d+)', href)
        title_id = m.group(1)
        thumbnail = a.parent.select_one('img')['src']
        title = a.get_text(strip=True)
        cur_info = {
            'title': title,
            'title_id': title_id,
            'link': href,
            'thumbnail': thumbnail,
        }
        results.append(cur_info)
    return results


def webtoon_detail(title_id):
    # title_id에 해당하는 웹툰의 상세정보를 리턴해준다
    url = f'https://comic.naver.com/webtoon/list.nhn?titleId={title_id}'
    response = requests.get(url)
    html = response.text
    soup = BeautifulSoup(html)
    
    div_comicinfo = soup.select_one('div.comicinfo')
    div_detail = div_comicinfo.select_one('div.detail')
    
    title = div_detail.select_one('h2').contents[0].strip()
    author = div_detail.select_one('span.wrt_nm').get_text(strip=True)
    description = div_detail.select_one('p').get_text('\n', strip=True)
    
    # genre, age
    div_detail_info = div_detail.select_one('p.detail_info')
    genre = div_detail_info.select_one('span.genre').get_text(strip=True)
    age = div_detail_info.select_one('span.age').get_text(strip=True)
    
    # 모든 로직이 이 함수 안에 전부 존재
    return {
        'title': title,
        'description': description,
        'author': author,
        'genre': [g.strip() for g in genre.split(',')],
        'age': age,
    }


def episode_list(title_id):
    url = f'https://comic.naver.com/webtoon/list.nhn?titleId={title_id}'
    response = requests.get(url)
    html = response.text
    soup = BeautifulSoup(html)
    
    results = []
    table = soup.select_one('table.viewList')
    tr_list = table.select('tr')
    for tr in tr_list:
        # title클래스를 가진 td
        td_title = tr.select_one('td.title')
        # 가 없으면 넘어간다
        if not td_title:
            continue

        title = td_title.get_text(strip=True)

        td_rating = tr.select_one('td:nth-child(3)')
        rating = td_rating.select_one('strong').get_text(strip=True)

        td_date = tr.select_one('td.num')
        date = td_date.get_text(strip=True)

        episode = {
            'title': title,
            'rating': rating,
            'date': date,
        }
        results.append(episode)
    return results


def get_title_id(search_keyword, text='웹툰선택'):
    # 정보를 보고 싶을 때
    search_keyword = input('> 웹툰명을 입력하세요: ').strip()
    results = webtoon_info(search_keyword)

    print('# 검색 결과')
    for index, result in enumerate(results, start=1):
        print(f'{index}: {result["title"]}')
        
    choice = int(input(f'> {text}: '))
    # 선택한 번호 -1번 index의 웹툰이 선택한 웹툰(ex: 2일 경우 1번 index)
    webtoon = results[choice - 1]
    title_id = webtoon['title_id']
    return title_id


while True:
    print(menu)
    val = input('> 입력: ')
    print()
    if val == '1':
        title_id = get_title_id(val)
        info = webtoon_detail(title_id)
        print()
        print(f'# 정보 ({info["title"]})')
        print(f'설명: {info["description"]}')
        print(f'작가: {info["author"]}')
        print(f'장르: {info["genre"]}')
        print(f'연령제한: {info["age"]}')
        print()
            
    elif val == '2':
        # 에피소드 목록을 불러오고 싶을 때
        title_id = get_title_id(val)
        episodes = episode_list(title_id)
        for episode in episodes:
            print(episode['title'])
            print(f' 평점: {episode["rating"]}, 날짜: {episode["date"]}')
        print()
    elif val == '0':
        # 종료
        break


## 웹툰 크롤러 ##
1. 정보
2. 에피소드 목록
0. 나가기
> 입력: 

## 웹툰 크롤러 ##
1. 정보
2. 에피소드 목록
0. 나가기
> 입력: 0



In [8]:
import re

import requests
from bs4 import BeautifulSoup

class Webtoon:
    # 이 클래스에서 공통적으로 사용할 변수는 클래스변수로 선언
    URL_EPISODE_LIST = 'https://comic.naver.com/webtoon/list.nhn?titleId={id}'
    URL_WEBTOON_LIST = 'https://comic.naver.com/webtoon/weekday.nhn'
    WEBTOON_LIST_HTML = None

    
    def __init__(self, id, url_thumbnail, title):
        # 객체 초기화 메서드, 초기화시 주어진 매개변수들을 인스턴스의 속성으로 지정
        self.id = id
        self.url_thumbnail = url_thumbnail
        self.title = title
        
        self.author = None
        self.description = None
        self.genres = None
        self.age = None
        # 작가
        # 설명
        # 장르
        # 연령제한
        # 링크
    
    def __repr__(self):
        return f'Webtoon({self.title}, {self.id})'
    
    def get_detail_info(self):
        url = self.link
        response = requests.get(url)
        html = response.text
        soup = BeautifulSoup(html)

        div_comicinfo = soup.select_one('div.comicinfo')
        div_detail = div_comicinfo.select_one('div.detail')

        title = div_detail.select_one('h2').contents[0].strip()
        author = div_detail.select_one('span.wrt_nm').get_text(strip=True)
        description = div_detail.select_one('p').get_text('\n', strip=True)

        # genre, age
        div_detail_info = div_detail.select_one('p.detail_info')
        genre = div_detail_info.select_one('span.genre').get_text(strip=True)
        age = div_detail_info.select_one('span.age').get_text(strip=True)

        self.description = description
        self.author = author
        self.genres = [g.strip() for g in genre.split(',')]
        self.age = age
        
    def show_info(self):
        print(self.title)
        print(f' 작가 : {self.author}')
        print(f' 설명 : {self.description}')
        print(f' 장르 : {self.genrese}')
        print(f' 연령 : {self.age}')
    
    @classmethod
    def search(cls, keyword):
        if not cls.WEBTOON_LIST_HTML:
            response = requests.get(cls.URL_WEBTOON_LIST)
            cls.WEBTOON_LIST_HTML = response.text
        else:
            html = cls.WEBTOON_LIST_HTML
        
        soup = BeautifulSoup(cls.WEBTOON_LIST_HTML)
        css_selector = 'a.title[title*="{}"]'.format(keyword)
        a_list = soup.select(css_selector)

        results = []
        for a in a_list:
            href = a['href']
            m = re.search(r'titleId=(\d+)', href)
            title_id = m.group(1)
            thumbnail = a.parent.select_one('img')['src']
            title = a.get_text(strip=True)
            cur_info = {
                'title': title,
                'title_id': title_id,
                'link': href,
                'thumbnail': thumbnail,
            }
            results.append(cur_info)
    
        print('# 검색결과')
        for index, result in enumerate(results, start=1):
            print(f'{index}: {result["title"]}')
                  
        choice = int(input('> 선택 :'))
        selected = results[choice-1]
        instance =  cls(
            id=selected['title_id'],
            title=selected['title'],
            url_thumbnail=selected['thumbnail'],
        )
        return instance
    
    @property
    def link(self):
        return self.URL_EPISODE_LIST.format(id=self.id)
        
    def get_episode_list(self):
        # 이 웹툰이 가진 에피소드 목록을 리턴해준다
        pass

In [12]:
Webtoon.search('유')
self.get_detail_info('유미')
self.show_info()

# 검색결과
1: 유일무이 로맨스
2: 윌유메리미
3: 유미의 세포들
4: 유미의 세포들
5: 윌유메리미
6: 공유몽
7: 유령극단
> 선택 :1


NameError: name 'self' is not defined