## 특정 웹툰 페이지 모든 image를 다운로드 하기
- soup.select('img[src$=.jpg]')
- img 폴더를 생성하고 img 폴더 하위에 파일 저장

In [28]:
import requests
from bs4 import BeautifulSoup
import os

main_url= 'https://comic.naver.com/webtoon/weekdayList.nhn?week=thu'
res = requests.get(main_url)
html = res.text
soup = BeautifulSoup(html, 'html.parser')

img_urls = []
for img_url in soup.select("img[src$='.jpg']"):
    img_urls.append(img_url['src'])
    

# 디렉토리 생성
if not os.path.isdir('img'):
    os.mkdir('img')

req_header = {
    'referer': main_url
}

for img_url in img_urls:
    res = requests.get(img_url, headers=req_header)
    img_data = res.content
    file_name = os.path.basename(img_url)
    
    with open('img/'+file_name, 'wb') as file:
        print('Writing to {} ({} bytes)'.format(file_name, len(img_data)))
        file.write(img_data)

[<img alt="당신의 과녁" height="120" onerror="this.src='https://ssl.pstatic.net/static/comic/images/migration/common/blank.gif'" src="https://shared-comic.pstatic.net/thumb/webtoon/738194/thumbnail/thumbnail_IMAG04_28049435-312d-4253-8db2-afc549b8eda2.jpg" title="당신의 과녁" width="218"/>,
 <img alt="만물의 영장" height="120" onerror="this.src='https://ssl.pstatic.net/static/comic/images/migration/common/blank.gif'" src="https://shared-comic.pstatic.net/thumb/webtoon/729964/thumbnail/thumbnail_IMAG04_c05edb3a-a62b-469e-95d0-7338cd7ed81d.jpg" title="만물의 영장" width="218"/>,
 <img alt="별을 삼킨 너에게" height="120" onerror="this.src='https://ssl.pstatic.net/static/comic/images/migration/common/blank.gif'" src="https://shared-comic.pstatic.net/thumb/webtoon/748831/thumbnail/thumbnail_IMAG04_80132105-b0f1-40c9-b41b-b9b6dd401cbc.jpg" title="별을 삼킨 너에게" width="218"/>,
 <img alt="독립일기" height="90" onerror="this.src='https://ssl.pstatic.net/static/comic/images/migration/common/blank.gif'" src="https://shared-comic.p

## 특정 웹툰의 image 다운로드를 함수로 선언하기

In [119]:
def write_image(para_title, para_url):
    import requests
    from bs4 import BeautifulSoup
    import os

#     main_url= 'https://comic.naver.com/webtoon/weekdayList.nhn?week=thu'
    res = requests.get(para_url)
    html = res.text
    soup = BeautifulSoup(html, 'html.parser')
    
    
    
    img_urls = []
    for img_url in soup.select(".wt_viewer img[src$='.jpg']"):
        img_urls.append(img_url['src'])

    # 디렉토리 생성
    dir_name= 'img/' + para_title
    if not os.path.isdir(dir_name):
        os.mkdir(dir_name)
    
    for img_url in img_urls:
        req_header = {
            'referer': main_url
        }
        res2 = requests.get(img_url, headers=req_header)
        img_data = res2.content
        file_name = os.path.basename(img_url)

        with open(dir_name + '/' + file_name, 'wb') as file:
            print('Writing to {} ({} bytes)'.format(file_name, len(img_data)))
            file.write(img_data)

In [79]:
write_image('마음의소리','https://comic.naver.com/webtoon/detail.nhn?titleId=20853&no=1236')

Writing to 20200720114244_fa96de82e0f47d6bf20dfc76b3944eb7_IMAG01_1.jpg (150250 bytes)
Writing to 20200720114244_fa96de82e0f47d6bf20dfc76b3944eb7_IMAG01_2.jpg (148647 bytes)
Writing to 20200720114244_fa96de82e0f47d6bf20dfc76b3944eb7_IMAG01_3.jpg (107514 bytes)
Writing to 20200720114244_fa96de82e0f47d6bf20dfc76b3944eb7_IMAG01_4.jpg (101797 bytes)
Writing to 20200720114244_fa96de82e0f47d6bf20dfc76b3944eb7_IMAG01_5.jpg (163050 bytes)
Writing to 20200720114244_fa96de82e0f47d6bf20dfc76b3944eb7_IMAG01_6.jpg (134504 bytes)
Writing to 20200720114244_fa96de82e0f47d6bf20dfc76b3944eb7_IMAG01_7.jpg (100417 bytes)
Writing to 20200720114244_fa96de82e0f47d6bf20dfc76b3944eb7_IMAG01_8.jpg (142715 bytes)
Writing to 20200720114244_fa96de82e0f47d6bf20dfc76b3944eb7_IMAG01_9.jpg (144513 bytes)
Writing to 20200720114244_fa96de82e0f47d6bf20dfc76b3944eb7_IMAG01_10.jpg (136698 bytes)
Writing to 20200720114244_fa96de82e0f47d6bf20dfc76b3944eb7_IMAG01_11.jpg (189110 bytes)
Writing to 20200720114244_fa96de82e0f47d6

## 웹툰 메인페이지 스크래핑
- 추천 웹툰의 title의 link 를 가져오기
- [ {title: '제목', link: 'url'}, ... ]

In [130]:
## img tag title
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

main_url= 'https://comic.naver.com/index.nhn'
res = requests.get(main_url)
html = res.text
soup = BeautifulSoup(html, 'html.parser')

genre_recommand_urls = []
for div_tag in soup.select('.genreRecomInfo2'):
    for h6_a_tag in div_tag.select('h6 a'):
        title = h6_a_tag.text.strip()
        tlist_link = urljoin(main_url, h6_a_tag['href'])
    for p_a_tag in div_tag.select('p a'):
        link = urljoin(main_url, p_a_tag['href'])
    genre_recommand_urls.append({'title':title, 'link':link, 'tlist_link':tlist_link})    

genre_recommand_urls

[{'title': '패밀리 사이즈',
  'link': 'https://comic.naver.com/webtoon/detail.nhn?titleId=626906&no=602',
  'tlist_link': 'https://comic.naver.com/webtoon/list.nhn?titleId=626906'},
 {'title': '마음의소리',
  'link': 'https://comic.naver.com/webtoon/detail.nhn?titleId=20853&no=1236',
  'tlist_link': 'https://comic.naver.com/webtoon/list.nhn?titleId=20853'},
 {'title': '윌유메리미',
  'link': 'https://comic.naver.com/webtoon/detail.nhn?titleId=616239&no=642',
  'tlist_link': 'https://comic.naver.com/webtoon/list.nhn?titleId=616239'},
 {'title': '오늘의 순정망화',
  'link': 'https://comic.naver.com/webtoon/detail.nhn?titleId=716857&no=205',
  'tlist_link': 'https://comic.naver.com/webtoon/list.nhn?titleId=716857'},
 {'title': '자판귀',
  'link': 'https://comic.naver.com/webtoon/detail.nhn?titleId=703850&no=124',
  'tlist_link': 'https://comic.naver.com/webtoon/list.nhn?titleId=703850'},
 {'title': '한림체육관',
  'link': 'https://comic.naver.com/webtoon/detail.nhn?titleId=743139&no=9',
  'tlist_link': 'https://comic.n

In [121]:
for webtoon_list in genre_recommand_urls:
    write_image(webtoon_list['title'], webtoon_list['link'])
    

Writing to 20200717181605_906e36712889a5a922e1326491a1128f_IMAG01_1.jpg (145036 bytes)
Writing to 20200717181605_906e36712889a5a922e1326491a1128f_IMAG01_2.jpg (154780 bytes)
Writing to 20200717181605_906e36712889a5a922e1326491a1128f_IMAG01_3.jpg (204531 bytes)
Writing to 20200717181605_906e36712889a5a922e1326491a1128f_IMAG01_4.jpg (155246 bytes)
Writing to 20200717181605_906e36712889a5a922e1326491a1128f_IMAG01_5.jpg (179588 bytes)
Writing to 20200717181605_906e36712889a5a922e1326491a1128f_IMAG01_6.jpg (147698 bytes)
Writing to 20200717181605_906e36712889a5a922e1326491a1128f_IMAG01_7.jpg (198055 bytes)
Writing to 20200717181605_906e36712889a5a922e1326491a1128f_IMAG01_8.jpg (179605 bytes)
Writing to 20200717181605_906e36712889a5a922e1326491a1128f_IMAG01_9.jpg (142693 bytes)
Writing to 20200717181605_906e36712889a5a922e1326491a1128f_IMAG01_10.jpg (148089 bytes)
Writing to 20200716152201_0af758a65232bf334a14efccfb82d9a4_IMAG01_1.jpg (183439 bytes)
Writing to 20200716152201_0af758a65232bf33

Writing to 20200716175530_e9c8b8114831b7267e0082a6341a0b05_IMAG01_68.jpg (168857 bytes)
Writing to 20200716175530_e9c8b8114831b7267e0082a6341a0b05_IMAG01_69.jpg (94645 bytes)
Writing to 20200716175530_e9c8b8114831b7267e0082a6341a0b05_IMAG01_70.jpg (12197 bytes)
Writing to 20200614202804_4a512b7af6b92634efcc1466d9845ac9_IMAG01_1.jpg (110775 bytes)
Writing to 20200614202804_4a512b7af6b92634efcc1466d9845ac9_IMAG01_2.jpg (135349 bytes)
Writing to 20200614202804_4a512b7af6b92634efcc1466d9845ac9_IMAG01_3.jpg (136145 bytes)
Writing to 20200614202804_4a512b7af6b92634efcc1466d9845ac9_IMAG01_4.jpg (152036 bytes)
Writing to 20200614202804_4a512b7af6b92634efcc1466d9845ac9_IMAG01_5.jpg (118088 bytes)
Writing to 20200614202804_4a512b7af6b92634efcc1466d9845ac9_IMAG01_6.jpg (172562 bytes)
Writing to 20200614202804_4a512b7af6b92634efcc1466d9845ac9_IMAG01_7.jpg (75039 bytes)
Writing to 20200614202804_4a512b7af6b92634efcc1466d9845ac9_IMAG01_8.jpg (179168 bytes)
Writing to 20200614202804_4a512b7af6b92634e