## 뉴스 제목 가져오기


In [33]:
import requests
import bs4
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [3]:
!pip show requests

Name: requests
Version: 2.25.1
Summary: Python HTTP for Humans.
Home-page: https://requests.readthedocs.io
Author: Kenneth Reitz
Author-email: me@kennethreitz.org
License: Apache 2.0
Location: c:\users\vega2\anaconda3\lib\site-packages
Requires: certifi, idna, chardet, urllib3
Required-by: Sphinx, jupyterlab-server, conda, conda-repo-cli, conda-build, anaconda-project, anaconda-client


In [4]:
#! pip install --upgrade requests

In [5]:
print('requests version' , requests.__version__ )

requests version 2.25.1


In [10]:
print('bs4 version', bs4.__version__)

bs4 version 4.9.3


In [35]:
# 생활/문화 뉴스
url = 'https://news.naver.com/main/main.naver?mode=LSD&mid=shm&sid1=103'
# 요청 헤더 : 브라우저 정보 
req_header = {
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'
}

res = requests.get(url, headers=req_header)
print(res.ok) # 200
print(res.status_code)
#print(type(res))
if res.ok:
    # 소스보기에서 보여지는 텍스트
    html = res.text
    # parser 역힐을 하는 BeautifulSoup 객체 생성
    soup = BeautifulSoup(html, 'html.parser')
    #print(len(soup.select("a[href*='read.naver']")))
    # a 태그를 찾기 : href 속성의 값에 read.naver 문자열이 부분적으로 매칭되는 a tag
    atag_list = soup.select("a[href*='read.naver']")
    for atag in atag_list:
        #print(type(atag), atag)
        #<a href="">제목</a> atag.text => 제목
        title = atag.text.strip()
        if title:
            href = urljoin(url, atag['href'])
            #print(title, href)
else:
    print('Error 발생 : ', res.status_code)

True
200


### 웹툰 이미지 다운로드 하기
* referer 헤더를 반드시 설정해야 합니다.

In [48]:
import requests
import os

req_header = {
    'referer':'https://comic.naver.com/webtoon/detail?titleId=183559&no=531&weekday=mon'
}
img_urls = [
    'https://image-comic.pstatic.net/webtoon/183559/531/20220204165510_02a7b0a87388bc8f89111d78c0c3ec1c_IMAG01_2.jpg',
    'https://image-comic.pstatic.net/webtoon/183559/531/20220204165510_02a7b0a87388bc8f89111d78c0c3ec1c_IMAG01_3.jpg',
    'https://image-comic.pstatic.net/webtoon/183559/531/20220204165510_02a7b0a87388bc8f89111d78c0c3ec1c_IMAG01_4.jpg'
]
for img_url in img_urls:
    res = requests.get(img_url, headers=req_header)
    print(res.status_code)
    if res.ok:
        # binary data는 content 속성 사용
        img_data = res.content
        # url의 filename만 추출하기
        file_name = os.path.basename(img_url)
        
        # open() 함수 mode : r(read), w(write), rb(read binary), wb(write binary), a(append)
        with open(file_name, 'wb') as file:
            #print('writing to {} ({}) bytes'.format(file_name, len(img_data)))
            print(f'writing to {file_name} ({len(img_data):,}) bytes')
            # binary data를 image file로 저장하기
            file.write(img_data)

200
writing to 20220204165510_02a7b0a87388bc8f89111d78c0c3ec1c_IMAG01_2.jpg (209,392) bytes
200
writing to 20220204165510_02a7b0a87388bc8f89111d78c0c3ec1c_IMAG01_3.jpg (173,550) bytes
200
writing to 20220204165510_02a7b0a87388bc8f89111d78c0c3ec1c_IMAG01_4.jpg (120,260) bytes


### 웹툰 이미지 업로드 하기
* http://httpbin.org/post 업로드 요청할 수 있는 url

In [50]:
import requests

upload_files_dict = {
    'img1': open('f2.jpg','rb'),
    'img2': open('f3.jpg','rb'),
}
url = 'http://httpbin.org/post'
res = requests.post(url, files=upload_files_dict)
res.status_code

200

In [55]:
#res.json()['files']['img1']

### urllib 기반 파파고 예제

In [56]:
import os
import sys
import urllib.request

client_id = "mldU421g1Whc8Jv0wPt5" # 개발자센터에서 발급받은 Client ID 값
client_secret = "QEctC2tbME" # 개발자센터에서 발급받은 Client Secret 값
encText = urllib.parse.quote("Yesterday all my troubles seemed so far away.")
data = "source=en&target=ko&text=" + encText
url = "https://openapi.naver.com/v1/papago/n2mt"
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
response = urllib.request.urlopen(request, data=data.encode("utf-8"))
rescode = response.getcode()
if(rescode==200):
    response_body = response.read()
    print(response_body.decode('utf-8'))
else:
    print("Error Code:" + rescode)

{"message":{"result":{"srcLangType":"en","tarLangType":"ko","translatedText":"어제는 내 모든 고민이 너무 멀리 있는 것 같았어.","engineType":"N2MT","pivot":null,"dict":null,"tarDict":null,"modelVer":"Unknown"},"@type":"response","@service":"naverservice.nmt.proxy","@version":"1.0.0"}}


### requests 기반 파파고 사용 예제

In [59]:
import requests

client_id = "mldU421g1Whc8Jv0wPt5" 
client_secret = "QEctC2tbME" 
url = "https://openapi.naver.com/v1/papago/n2mt"

enc_text = "Suddenly I'm not half the man I used to be."
req_param = {
    'source':'en',
    'target':'ko',
    'text':enc_text
}
req_header = {
    'X-Naver-Client-Id':client_id,
    'X-Naver-Client-Secret':client_secret
}
req_header

{'X-Naver-Client-Id': 'mldU421g1Whc8Jv0wPt5',
 'X-Naver-Client-Secret': 'QEctC2tbME'}