## 네이버 웹툰 목록 크롤링

### 전체 코드

In [7]:
from collections import OrderedDict
from itertools import count
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent

ua = UserAgent()
ua_chrome = ua.chrome

headers = {
    'User-Agent': ua_chrome,
}

def get_list(title_id):
    list_url = 'http://comic.naver.com/webtoon/list.nhn'
    ep_dict = OrderedDict()
    
    for page in count(1):
        params = {
            'titleId': title_id,
            'page': page,
        }
        print('try {}'.format(params))
        
        html = requests.get(list_url, params=params, headers=headers).text
        soup = BeautifulSoup(html, 'html.parser')
        
        for tag in soup.select('.viewList tr'):
            try:
                a_tag = tag.select('a[href*=detail.nhn]')[0]
            except IndexError:
                continue
            
            is_up= bool(tag.select('img[src*=ico_toonup]'))  # 최신 업데이트 아이콘이 있는지 여부
            
            img_tag = a_tag.find('img')
            
            ep_url = urljoin(list_url, a_tag['href'])  # url 합치기(중복되는 부분 알아서 처리해줘서 편함)
            ep_name = img_tag['title']
            img_url = img_tag['src']
            
            if ep_url in ep_dict:
                return ep_dict
            
            ep = {
                'url': ep_url,
                'name': ep_name,
                'img_url': img_url,
                'is_up': is_up,
            }
            
            ep_dict[ep_url] = ep
            
            print(ep)

In [8]:
get_list(650305)

try {'titleId': 650305, 'page': 1}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=126&weekday=sat', 'name': '2부43화 무케 혼자 남아', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/126/thumbnail_202x120_0d55bd75-01b4-48e7-9af0-52393a0294b2.jpg', 'is_up': False}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=125&weekday=sat', 'name': '2부42화 가우리와 범찰', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/125/thumbnail_202x120_67cb84c7-fb36-4e8e-91b5-5f20eed077e9.jpg', 'is_up': False}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=124&weekday=sat', 'name': '2부41화 흰머리를 찾다', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/124/thumbnail_202x120_669727c2-0483-4f08-b653-3f0dd4bc35f8.jpg', 'is_up': False}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=123&weekday=sat', 'name': '2부40화 천남성 대 빠르3', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/123/thumbnail_202x120_cf432745-28cc-407e-8868-f

{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=76&weekday=sat', 'name': '76화 추이의 불', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/76/inst_thumbnail_20160826165415.jpg', 'is_up': False}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=75&weekday=sat', 'name': '75화 오해', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/75/inst_thumbnail_20160819163812.jpg', 'is_up': False}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=74&weekday=sat', 'name': '74화 누설(漏洩)', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/74/inst_thumbnail_20160812164555.jpg', 'is_up': False}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=73&weekday=sat', 'name': '73화 착호갑사를 뚫고', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/73/inst_thumbnail_20160805161149.jpg', 'is_up': False}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=72&weekday=sat', 'name': '72화 서서히 조여오는', 'img_url': 'http:

{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=36&weekday=sat', 'name': '36화 간악한 녹치', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/36/inst_thumbnail_20151120171921.jpg', 'is_up': False}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=35&weekday=sat', 'name': '35화 흰눈썹의 계략', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/35/inst_thumbnail_20151113120614.jpg', 'is_up': False}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=34&weekday=sat', 'name': '34화 무커의 정체', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/34/inst_thumbnail_20151106163905.jpg', 'is_up': False}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=33&weekday=sat', 'name': '33화 결전을 뒤로하고', 'img_url': 'http://thumb.comic.naver.net/webtoon/650305/33/inst_thumbnail_20151030184335.jpg', 'is_up': False}
{'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=32&weekday=sat', 'name': '32화 부활', 'img_url': 'http:

OrderedDict([('http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=126&weekday=sat',
              {'img_url': 'http://thumb.comic.naver.net/webtoon/650305/126/thumbnail_202x120_0d55bd75-01b4-48e7-9af0-52393a0294b2.jpg',
               'is_up': False,
               'name': '2부43화 무케 혼자 남아',
               'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=126&weekday=sat'}),
             ('http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=125&weekday=sat',
              {'img_url': 'http://thumb.comic.naver.net/webtoon/650305/125/thumbnail_202x120_67cb84c7-fb36-4e8e-91b5-5f20eed077e9.jpg',
               'is_up': False,
               'name': '2부42화 가우리와 범찰',
               'url': 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=125&weekday=sat'}),
             ('http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=124&weekday=sat',
              {'img_url': 'http://thumb.comic.naver.net/webtoon/650305/124/thumbnail_202x120_669727c

### 한 단계씩 보기

In [1]:
from collections import OrderedDict
from itertools import count
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup

In [2]:
from fake_useragent import UserAgent

ua = UserAgent()
ua_chrome = ua.chrome

headers = {
    'User-Agent': ua_chrome,
}

In [3]:
list_url = 'http://comic.naver.com/webtoon/list.nhn'
params = {
            'titleId': 650305,
            'page': 1,
        }
headers = {
    'User-Agent': ua_chrome,
}

html = requests.get(list_url, params=params, headers=headers).text
soup = BeautifulSoup(html, 'html.parser')
soup


<!DOCTYPE html>

<html lang="ko">
<head>
<meta content="IE=edge, chrome=1" http-equiv="X-UA-Compatible"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
<title>호랑이형님 :: 네이버 만화</title>
<meta content="호랑이형님" property="og:title"/>
<meta content="http://thumb.comic.naver.net/webtoon/650305/thumbnail/title_thumbnail_20161209212128_t220x202.jpg" property="og:image"/>
<meta content="신비한 힘을 가진 아이를 이용하여 세상을 지배하려는 반인반수 흰눈썹! 
그리고 얼떨결에 아이의 보호자가 된 괴물호랑이 빠르와 착호갑사 지망생 가우리!
이제 힘을 합쳐 흰눈썹으로부터 아이와 세상을 지켜라!!!" property="og:description"/>
<meta content="http://comic.naver.com/webtoon/list.nhn?titleId=650305&amp;page=1" property="og:url"/>
<meta content="article" property="og:type"/>
<meta content="네이버 웹툰" property="og:article:author"/>
<meta content="http://comic.naver.com" property="og:article:author:url"/>
<link href="http://www.naver.com/favicon.ico" rel="shortcut icon" type="image/x-icon"/>
<script type="text/javascript">
		document.domain = "naver.com";
	</script>
<link href="/c

In [4]:
tr_tag_list = soup.select('.viewList tr')
tr_tag_list

[<tr>
 <th scope="col">이미지</th>
 <th scope="col">제목</th>
 <th scope="col">별점</th>
 <th scope="col">등록일</th>
 </tr>, <tr class="band_banner v2">
 <td colspan="4">
 <a href="#" onclick="moveBookstore(198411);clickcr(this,'lst.gostore','650305','',event);"><em class="ico_store2">NAVER 만화</em>다음화를 미리 만나보세요.</a>
 </td>
 </tr>, <tr>
 <td>
 <a href="/webtoon/detail.nhn?titleId=650305&amp;no=126&amp;weekday=sat" onclick="clickcr(this,'lst.img','650305','126',event)">
 <img alt="2부43화 무케 혼자 남아" height="41" onerror="this.src='http://static.comic.naver.net/staticImages/COMICWEB/NAVER/img/common/non71_41.gif'" src="http://thumb.comic.naver.net/webtoon/650305/126/thumbnail_202x120_0d55bd75-01b4-48e7-9af0-52393a0294b2.jpg" title="2부43화 무케 혼자 남아" width="71"/>
 <span class="mask"></span>
 </a>
 </td>
 <td class="title">
 <a href="/webtoon/detail.nhn?titleId=650305&amp;no=126&amp;weekday=sat" onclick="clickcr(this,'lst.title','650305','126',event)">2부43화 무케 혼자 남아</a>
 </td>
 <td>
 <div class="rating_ty

In [5]:
for tag in tr_tag_list:
    a_tag = tag.select('a[href*=detail.nhn]')[0]
    print(a_tag, "\n")

IndexError: list index out of range

In [6]:
for tag in tr_tag_list:
    try:
        a_tag = tag.select('a[href*=detail.nhn]')[0]
    except IndexError:
        continue
    else:
        print(a_tag, "\n")

<a href="/webtoon/detail.nhn?titleId=650305&amp;no=126&amp;weekday=sat" onclick="clickcr(this,'lst.img','650305','126',event)">
<img alt="2부43화 무케 혼자 남아" height="41" onerror="this.src='http://static.comic.naver.net/staticImages/COMICWEB/NAVER/img/common/non71_41.gif'" src="http://thumb.comic.naver.net/webtoon/650305/126/thumbnail_202x120_0d55bd75-01b4-48e7-9af0-52393a0294b2.jpg" title="2부43화 무케 혼자 남아" width="71"/>
<span class="mask"></span>
</a> 

<a href="/webtoon/detail.nhn?titleId=650305&amp;no=125&amp;weekday=sat" onclick="clickcr(this,'lst.img','650305','125',event)">
<img alt="2부42화 가우리와 범찰" height="41" onerror="this.src='http://static.comic.naver.net/staticImages/COMICWEB/NAVER/img/common/non71_41.gif'" src="http://thumb.comic.naver.net/webtoon/650305/125/thumbnail_202x120_67cb84c7-fb36-4e8e-91b5-5f20eed077e9.jpg" title="2부42화 가우리와 범찰" width="71"/>
<span class="mask"></span>
</a> 

<a href="/webtoon/detail.nhn?titleId=650305&amp;no=124&amp;weekday=sat" onclick="clickcr(this,'lst




## 특정 에피소드의 이미지 다운받기

### 전체 코드

In [16]:
import os

def ep_download(ep_url):
    html = requests.get(ep_url).text
    soup = BeautifulSoup(html, 'html.parser')

    webtoon_name = soup.select('.comicinfo .detail h2')[0].text
    webtoon_name = ' '.join(webtoon_name.split())
    ep_name = soup.select('.tit_area h3')[0].text
    
    img_path_list = []

    for tag in soup.select('.wt_viewer img'):
        img_url = tag['src']
        headers = {
            'Referer': ep_url,
            'User-Agent': 'Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36',
        }        
        print(img_url)
        
        # 저장할 이미지의 이름 및 경로 설정
        img_name =os.path.basename(img_url)
        img_path = os.path.join(webtoon_name, ep_name, img_name)  # webtoon_name/ep_name/img_name        
        print(img_path)
        
        img_path_list.append(img_path)
        
        # 저장할 디렉토리가 없으면 만들기
        dir_path = os.path.dirname(img_path)  # webtoon_name/ep_name
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

        # 파일이 없다면 저장 / 이미 받았다면 skip~
        if not os.path.exists(img_path):
            img_data = requests.get(img_url, headers=headers).content
            with open(img_path, 'wb') as f:
                f.write(img_data)
            print('DOWMLOAD \n')
        else:
            print('SKIP \n')
    
    print('COMPLETED! \n')
    return img_path_list

In [17]:
ep_download('http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=125&weekday=sat')

http://imgcomic.naver.net/webtoon/650305/125/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_1.jpg
호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_1.jpg
DOWMLOAD 

http://imgcomic.naver.net/webtoon/650305/125/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_2.jpg
호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_2.jpg
DOWMLOAD 

http://imgcomic.naver.net/webtoon/650305/125/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_3.jpg
호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_3.jpg
DOWMLOAD 

http://imgcomic.naver.net/webtoon/650305/125/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_4.jpg
호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_4.jpg
DOWMLOAD 

http://imgcomic.naver.net/webtoon/650305/125/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_5.jpg
호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_5.jpg
D

DOWMLOAD 

http://imgcomic.naver.net/webtoon/650305/125/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_43.jpg
호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_43.jpg
DOWMLOAD 

COMPLETED! 



['호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_1.jpg',
 '호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_2.jpg',
 '호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_3.jpg',
 '호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_4.jpg',
 '호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_5.jpg',
 '호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_6.jpg',
 '호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_7.jpg',
 '호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_8.jpg',
 '호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_9.jpg',
 '호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_10.jpg',
 '호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_11.jpg',
 '호랑이형님 이상규/2부42화 가

### 한 단계씩 보기

In [9]:
ep_url = 'http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=126&weekday=sat'

html = requests.get(ep_url).text
soup = BeautifulSoup(html, 'html.parser')

In [10]:
soup.select('.wt_viewer img')

[<img alt="comic content" class="" id="content_image_0" oncontextmenu="return false" ondragstart="return false" onerror="this.src='http://static.comic.naver.net/staticImages/COMICWEB/NAVER/img/common/blank.gif'" onload="loadImageEvent(this);" onselectstart="return false" src="http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_1.jpg" title=""/>,
 <img alt="comic content" class="" id="content_image_1" oncontextmenu="return false" ondragstart="return false" onerror="this.src='http://static.comic.naver.net/staticImages/COMICWEB/NAVER/img/common/blank.gif'" onload="loadImageEvent(this);" onselectstart="return false" src="http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_2.jpg" title=""/>,
 <img alt="comic content" class="" id="content_image_2" oncontextmenu="return false" ondragstart="return false" onerror="this.src='http://static.comic.naver.net/staticImages/COMICWEB/NAVER/img/common/blank.gi

In [11]:
for tag in soup.select('.wt_viewer img'):
    print(tag['src'])

http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_1.jpg
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_2.jpg
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_3.jpg
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_4.jpg
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_5.jpg
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_6.jpg
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_7.jpg
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_8.jpg
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_9.jpg
http://imgcomic.naver.net/webtoon/650305/126/2

##### 준비작업 : 저장될 이미지의 경로 설정 (디렉토리 트리)
- 에피소드별로 구분해서 저장하기 위해 웹툰명, 에피소드명 가져오기

In [12]:
soup.select('.comicinfo .detail h2')[0].text

'호랑이형님\r                \t 이상규\n'

- split() : 전달한 문자로 문자열을 나눔, 결과는 리스트(구분자 포함 안됨)
- join() : 리스트 같은 iterable 인자를 전달하여 문자열로 연결
- [파이썬 str 문자열 객체 메소드 함수 정리](http://withcoding.com/74)

In [13]:
# 공백으로 나눠서 ' ' 으로 연결
webtoon_name = ' '.join(soup.select('.comicinfo .detail h2')[0].text.split())
webtoon_name

'호랑이형님 이상규'

In [14]:
ep_name = soup.select('.tit_area h3')[0].text
ep_name

'2부43화 무케 혼자 남아'

#### Referer 헤더 추가 / 이미지 저장 경로 설정
- 이미지를 다운받고 싶은데 새로고침해보면 'The page cannot be displayed' 화면이 나온다?
- 이런 경우, **Referer 헤더**를 넣어줘야 함.

In [15]:
import os

for tag in soup.select('.wt_viewer img'):
    img_url = tag['src']
    
    headers = {
        'Referer': ep_url,
    }
    
    print(img_url)
    
    
    # 저장할 이미지의 이름 및 경로 설정
    img_name =os.path.basename(img_url)  # 사이트의 이미지명을 그대로 사용
    img_path = os.path.join(webtoon_name, ep_name, img_name)  # webtoon_name/ep_name/img_name
    print(img_path)
    
    
    # 저장할 디렉토리가 없으면 만들어 주기
    dir_path = os.path.dirname(img_path)  # webtoon_name/ep_name
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
    
    
    # 파일이 없다면 저장 / 이미 받았다면 skip~
    if not os.path.exists(img_path):
        img_data = requests.get(img_url, headers=headers).content
        with open(img_path, 'wb') as f:
            f.write(img_data)
        print('DOWMLOAD')
    else:
        print('SKIP')

http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_1.jpg
호랑이형님 이상규/2부43화 무케 혼자 남아/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_1.jpg
DOWMLOAD
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_2.jpg
호랑이형님 이상규/2부43화 무케 혼자 남아/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_2.jpg
DOWMLOAD
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_3.jpg
호랑이형님 이상규/2부43화 무케 혼자 남아/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_3.jpg
DOWMLOAD
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_4.jpg
호랑이형님 이상규/2부43화 무케 혼자 남아/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_4.jpg
DOWMLOAD
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_5.jpg
호랑이형님 이상규/2부43화 무케 혼자 남아/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_5.jpg
DOWM

DOWMLOAD
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_43.jpg
호랑이형님 이상규/2부43화 무케 혼자 남아/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_43.jpg
DOWMLOAD
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_44.jpg
호랑이형님 이상규/2부43화 무케 혼자 남아/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_44.jpg
DOWMLOAD
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_45.jpg
호랑이형님 이상규/2부43화 무케 혼자 남아/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_45.jpg
DOWMLOAD
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_46.jpg
호랑이형님 이상규/2부43화 무케 혼자 남아/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_46.jpg
DOWMLOAD
http://imgcomic.naver.net/webtoon/650305/126/20170929220459_9011981cf980b88c69bf18fd5d9a54a5_IMAG01_47.jpg
호랑이형님 이상규/2부43화 무케 혼자 남아/20170929220459_9011981cf980b88c69bf18fd5d9a54a5

## 다운받은 이미지를 세로로 합치기

### 전체 코드

In [33]:
from PIL import Image

im_list = []
for img_path in img_path_list:
    im = Image.open(img_path)
    im_list.append(im)
    
canvas_size = (
    max(im.width for im in im_list),
    min(65500, sum(im.height for im in im_list))  # jpg로 저장할 경우
)

print(canvas_size)

canvas = Image.new('RGB', canvas_size)

top = 0
for im in im_list:
    canvas.paste(im, (0, top))
    top += im.height
    
canvas.save('merged.jpg')

(690, 65500)


### 한 단계씩 보기

In [18]:
img_path_list = ep_download('http://comic.naver.com/webtoon/detail.nhn?titleId=650305&no=125&weekday=sat')

http://imgcomic.naver.net/webtoon/650305/125/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_1.jpg
호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_1.jpg
SKIP 

http://imgcomic.naver.net/webtoon/650305/125/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_2.jpg
호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_2.jpg
SKIP 

http://imgcomic.naver.net/webtoon/650305/125/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_3.jpg
호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_3.jpg
SKIP 

http://imgcomic.naver.net/webtoon/650305/125/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_4.jpg
호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_4.jpg
SKIP 

http://imgcomic.naver.net/webtoon/650305/125/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_5.jpg
호랑이형님 이상규/2부42화 가우리와 범찰/20170921210001_a6a7b744ea9aa2f3a436e9da6c0d7c69_IMAG01_5.jpg
SKIP 

http://img

In [19]:
from PIL import Image

In [20]:
im_list = []

for img_path in img_path_list:
    im = Image.open(img_path)
    im_list.append(im)

In [21]:
im_list[0].size

(690, 1600)

In [22]:
print(im_list[0].width)
print(im_list[0].height)

690
1600


In [23]:
max(im.width for im in im_list)  # 가로 최대 크기

690

In [24]:
sum(im.height for im in im_list)  # 세로 크기 합

68797

In [25]:
canvas_size = (max(im.width for im in im_list), sum(im.height for im in im_list))

In [26]:
canvas = Image.new('RGB', canvas_size)  # 새 캔버스 생성

In [27]:
left = 0
top = 0
for im in im_list:
    canvas.paste(im, (left, top))
    top += im.height    

In [28]:
canvas_size

(690, 68797)

In [29]:
canvas.save('merged.jpg')  # jpg로 저장하면 오류가 나요. (∵ size가 너무 커서 지원이 안 됨)

OSError: encoder error -2 when writing image file

#### Tip : 포맷별 최대 지원 크기
- jpg는 최대 216-1 (65,535) 픽셀 -> 실제로 해보면 65,500까지만 지원됨.
- png는 최대 231-1 (2,147,483,647) 픽셀 (signed)

In [30]:
canvas.save('merged.png')  # png로 저장

In [32]:
from IPython.display import Image

Image(filename='merged.png')  # 파일이 너무 커서 IPython으로는 못 띄움.. Finder에서 직접 열어보기

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.
