### 웹툰 이미지를 다운로드하여 로컬에 저장하기

In [3]:
import requests
import os

req_header_dict = {
    'referer' : 'https://comic.naver.com/webtoon/detail?titleId=748105&no=145&weekday=thu'
}
img_url_list = {
    'https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_1.jpg',
    'https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_2.jpg',
    'https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_3.jpg'
}

for img_url in img_url_list:
    res = requests.get(img_url, headers=req_header_dict)
    print(res.status_code)
    if res.ok:
        # binary data 가져올때 .content 속성사용
        img_data = res.content

        # url에서 파일명만 추출하기
        file_name = os.path.basename(img_url)
        file_name = 'data/' + file_name

        # 서버에서 가져온 binary data를 file로 저장하기
        with open(file_name, 'wb') as file:
            print(f'Write to file {file_name} ({len(img_data):,}) bytes')
            file.write(img_data)

200
Write to file data/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_1.jpg (115,541) bytes
200
Write to file data/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_2.jpg (117,154) bytes
200
Write to file data/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_3.jpg (139,410) bytes


### 웹툰의 특정회차의 모든 image 다운로드 하기
* Attribute Selector를 사용하여 jpg 파일명들을 모두 추출하여 리스트에 저장하기
* 리스트를 순회하면서 image 다운로드 하기

In [5]:
!pip install bs4

Collecting bs4
  Downloading bs4-0.0.1.tar.gz (1.1 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting beautifulsoup4
  Downloading beautifulsoup4-4.10.0-py3-none-any.whl (97 kB)
Collecting soupsieve>1.2
  Downloading soupsieve-2.3.1-py3-none-any.whl (37 kB)
Using legacy 'setup.py install' for bs4, since package 'wheel' is not installed.
Installing collected packages: soupsieve, beautifulsoup4, bs4
    Running setup.py install for bs4: started
    Running setup.py install for bs4: finished with status 'done'
Successfully installed beautifulsoup4-4.10.0 bs4-0.0.1 soupsieve-2.3.1

<!DOCTYPE html>

<html lang="ko">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
<title>독립일기 :: 네이버 만화</title>
<meta content="독립일기 - 시즌2 42화 시간제 캠핑" property="og:title"/>
<meta content="https://shared-comic.pstatic.net/thumb/webtoon/748105/145/thumbnail_600x315_753eff7c-e34c-4cfd-95ab-c7e76f88ddee.jpg" property="og:ima

In [15]:
import requests
from bs4 import BeautifulSoup

main_url = 'https://comic.naver.com/webtoon/detail?titleId=748105&no=145&weekday=thu'
res = requests.get(main_url)

if res.ok:
    soup = BeautifulSoup(res.text, 'html.parser')
    img_tags = soup.select("img[src$='.jpg']")
    # print(len(img_tags), type(img_tags))
    # print(len(soup.select("img")))
    # print(len(soup.select("img[src$='.jpg']")))

    img_url_list = []
    for img_tag in img_tags:
        # print(type(img_tag), img_tag)
        img_url = img_tag['src']
        # print(img_url)
        img_url_list.append(img_url)

print(len(img_url_list))
print(img_url_list[:4])

22
['https://shared-comic.pstatic.net/thumb/webtoon/748105/thumbnail/thumbnail_IMAG04_2b479f04-a16b-4fa2-9a05-bc60cd84022c.jpg', 'https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_1.jpg', 'https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_2.jpg', 'https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_3.jpg']


In [16]:
# img 디렉토리 생성하기
import os

dir_path = 'img'

# img 디렉토리가 없으면
if not os.path.isdir(dir_path):
    os.mkdir(dir_path)

### 디렉토리 생성하는 2가지 함수
* os.mkdir() / osk.makedirs()
  1. os.mkdir() 은 1개 폴더만 생성
  2. os.makedirs() 는 a/b/c 처럼 하위 폴더를 생성

* 리스트를 순회하면서 image 다운로드 하기

In [19]:
for idx, img_url in enumerate(img_url_list, 1):
    print(f'다운로드 번호{idx} URL = {img_url}')
    req_header = {'referer':main_url}
    res = requests.get(img_url, headers=req_header)
    if res.ok:
        img_data = res.content
        file_name = os.path.basename(img_url)
        file_name = 'img/' + file_name
        with open(file_name, 'wb') as file:
            file.write(img_data)

다운로드 번호1 URL = https://shared-comic.pstatic.net/thumb/webtoon/748105/thumbnail/thumbnail_IMAG04_2b479f04-a16b-4fa2-9a05-bc60cd84022c.jpg
다운로드 번호2 URL = https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_1.jpg
다운로드 번호3 URL = https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_2.jpg
다운로드 번호4 URL = https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_3.jpg
다운로드 번호5 URL = https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_4.jpg
다운로드 번호6 URL = https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_5.jpg
다운로드 번호7 URL = https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_6.jpg
다운로드 번호8 URL = https://image-comic.pstatic.net/webtoon/748105/145/20220126180009_7606331a182af3f83de6

In [23]:
# 파일들이 있는 디렉토리 삭제
import shutil
import os

dir_path = 'img'
# img 디렉토리가 있다면
if os.path.exists(dir_path):
    shutil.rmtree(dir_path)

* 웹툰의 타이틀과 특정회차 url을 아규먼트로 받아서 다운로드 하는 함수 구현하기

In [36]:
def download_image(title, round_url):
    import requests
    from bs4 import BeautifulSoup
    import os
    import shutil

    # img 풀더가 있으면 삭제하기
    dir_path = 'img'
    if os.path.exists(dir_path):
        shutil.rmtree(dir_path)

    # img 디렉토리가 없으면
    if not os.path.isdir(dir_path):
        title_path = os.path.join(dir_path, title)
        print(title_path)
        os.makedirs(title_path)

    # img url 목록을 알아냐기 위한 요청을 보내기
    res = requests.get(round_url)
    if res.ok:
        # 응답으로 받은 html 텍스트를 파싱하기 위한 파서(BeautifulSoup) 객체 생성
        soup = BeautifulSoup(res.text, 'html.parser')
        # img태그 중에서 src 속성의 값이 ".jpg"로 끝나는 태그들만 선택하기
        img_tags = soup.select("img[src$='.jpg']")
        # print(len(img_tags), type(img_tags))
        # print(len(soup.select("img")))
        # print(len(soup.select("img[src$='.jpg']")))

        # 특정회차 url을 referer 헤더로 설정하기
        req_header = {'referer' : round_url}

        for idx, img_tag in enumerate(img_tags, 1):
            # img 태그의 src 속성의 값을 추출하기
            print(f'------> 다운로드 번호 {idx}')
            img_url = img_tag['src']

            # jpg image 데이터 요청하기
            res_img = requests.get(img_url, headers=req_header)
            if res_img.ok:
                # image binary 데이터 가져오기
                img_data = res_img.content

                # img\독립일기\thumbnail_IMAG04_2b479f04-a16b-4fa2-9a05-bc60cd84022c.jpg
                file_name = title_path + '\\' + os.path.basename(img_url)

                with open(file_name, 'wb') as file:
                    print(f'{file_name}({len(img_data)}) bytes')
                    file.write(img_data)

In [37]:
download_image('독립일기','https://comic.naver.com/webtoon/detail?titleId=748105&no=145&weekday=thu')

img\독립일기
------> 다운로드 번호 1
img\독립일기\thumbnail_IMAG04_2b479f04-a16b-4fa2-9a05-bc60cd84022c.jpg(15357) bytes
------> 다운로드 번호 2
img\독립일기\20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_1.jpg(115541) bytes
------> 다운로드 번호 3
img\독립일기\20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_2.jpg(117154) bytes
------> 다운로드 번호 4
img\독립일기\20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_3.jpg(139410) bytes
------> 다운로드 번호 5
img\독립일기\20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_4.jpg(137058) bytes
------> 다운로드 번호 6
img\독립일기\20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_5.jpg(156686) bytes
------> 다운로드 번호 7
img\독립일기\20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_6.jpg(145753) bytes
------> 다운로드 번호 8
img\독립일기\20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_7.jpg(185411) bytes
------> 다운로드 번호 9
img\독립일기\20220126180009_7606331a182af3f83de682399ffcc311_IMAG01_8.jpg(156712) bytes
------> 다운로드 번호 10
img\독립일기\20220126180009_7606331a182af3f83de682399ffcc311_I

### Image File Upload
* https://httpbin.org/post URL로 요청을 보내서 다운로드 받은 img 파일을 업로드 하기
* requests.post() 함수를 사용하고, files 속성에 image data를 지정한다.
* files - (optional) Dictionary of 'filename' : file-objects for multipart encoding upload.

In [39]:
import requests

upload_file_dict = {
    'img1' : open('data\\f1.jpg', 'rb'),
    'img2' : open('data\\f2.jpg', 'rb'),
    'img3' : open('data\\f3.jpg', 'rb')
}

url = 'https://httpbin.org/post'
res = requests.post(url, files=upload_file_dict)
print(res.status_code)
res_data = res.json()
img1_data = res_data['files']['img1']
print(img1_data)

200
data:application/octet-stream;base64,/9j/4AAQSkZJRgABAgAAAQABAAD/2wBDAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRT/2wBDAQMEBAUEBQkFBQkUDQsNFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBT/wAARCAZAArIDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD9U6KKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooo