In [4]:
!pip show requests

Name: requests
Version: 2.25.1
Summary: Python HTTP for Humans.
Home-page: https://requests.readthedocs.io
Author: Kenneth Reitz
Author-email: me@kennethreitz.org
License: Apache 2.0
Location: c:\users\vega2\anaconda3\lib\site-packages
Requires: urllib3, certifi, chardet, idna
Required-by: Sphinx, jupyterlab-server, conda, conda-repo-cli, conda-build, anaconda-project, anaconda-client


### 네이버 뉴스 제목 가져오기
* requests library import
* user-agent 헤더 설정 필요

In [6]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [14]:
url = 'https://news.naver.com/'
req_header = {
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
res = requests.get(url, headers=req_header)
print(type(res))
print(res.status_code)
print(res.ok)

<class 'requests.models.Response'>
200
True


* 응답헤더들과 요청헤더값 확인하기

In [10]:
# 응답헤더들
res.headers

{'date': 'Thu, 11 Nov 2021 02:29:29 GMT', 'cache-control': 'no-cache', 'expires': 'Thu, 01 Jan 1970 00:00:00 GMT', 'set-cookie': 'JSESSIONID=E6B1147D5C346C735C167B705DD3580D; Path=/main; HttpOnly', 'content-language': 'ko-KR', 'vary': 'Accept-Encoding', 'content-encoding': 'gzip', 'transfer-encoding': 'chunked', 'content-type': 'text/html;charset=EUC-KR', 'referrer-policy': 'unsafe-url', 'server': 'nfront'}

In [13]:
# 요청헤더들
res.request.headers

{'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}

* response에 포함된 text 추출하기
* 파싱하기 위해서 BeautifulSoup 객체생성
* 특정 html 엘리먼트를 추출하기 위해서 BeautifulSoup객체의 select(css 선택자) 함수 사용

In [31]:
if res.ok:
    # 소스보기에서 보여지는 text
    html = res.text
    soup = BeautifulSoup(html, 'html.parser')
    print(type(soup))
    print(len(soup.select("a[href*='read.naver']")))
    atags = soup.select("a[href*='read.naver']")
    print(type(atags))
    for atag in atags:
        title = atag.text.strip()
        if title:
            href = urljoin(url, atag['href'])
            print(title, href)
        

<class 'bs4.BeautifulSoup'>
197
<class 'bs4.element.ResultSet'>
수능 앞두고 고교 원격수업… 고3, 확진 땐 교육… https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=102&oid=005&aid=0001483474
오피스텔 바닥난방 120㎡까지 가능…건축기준 개… https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=101&oid=056&aid=0011154868
롯데, 크라우드 펀딩 플랫폼 '와디즈'에 800억 투자 https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=101&oid=374&aid=0000264058
[속보]2520명 확진, 사망 21명…위중증 473명 또 최다 https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=102&oid=421&aid=0005716322
정성호 “조건부 특검 수용”···이준석 “임명권은 야당이 행사하겠다" https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=100&oid=011&aid=0003984021
요소수 판매처 주유소로 제한...승용차 1대당 10ℓ까지만 판매 https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=102&oid=032&aid=0003109500
테슬라 대항마 '리비안' 상장 첫날 주가 30% 폭등…'빅3' 시총 넘었다 https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=104&oid=032&aid=0003109505
'정치 1번지' 종로 재보궐의 의미…대선 러닝메이트vs필승카드 [레이더P] http://news.naver.com/main/read.naver?

### 네이버 웹툰Image 다운로드 
* referer 라는 header 설정이 필요함
* binary data를 추출할때는 response.content 속성을 사용함

In [32]:
import requests
import os

In [44]:
req_header = {
    'referer':'https://comic.naver.com/webtoon/detail?titleId=748105&no=123&amp;weekday=thu'
}
img_urls = ['https://image-comic.pstatic.net/webtoon/748105/123/20211019104128_21e29756995a8563f22ee06b74862b3c_IMAG01_1.jpg',
            'https://image-comic.pstatic.net/webtoon/748105/123/20211019104128_21e29756995a8563f22ee06b74862b3c_IMAG01_2.jpg',
            'https://image-comic.pstatic.net/webtoon/748105/123/20211019104128_21e29756995a8563f22ee06b74862b3c_IMAG01_3.jpg']
for img_url in img_urls:
    res = requests.get(img_url, headers=req_header)
    print(res.status_code)
    #binary data
    img_data = res.content
    file_name = os.path.basename(img_url)
    #file mode : wb(write binary)
    with open(file_name, 'wb') as file:
        print(f'writing to {file_name} ({len(img_data)}) bytes')
        file.write(img_data)

200
writing to 20211019104128_21e29756995a8563f22ee06b74862b3c_IMAG01_1.jpg (119570) bytes
200
writing to 20211019104128_21e29756995a8563f22ee06b74862b3c_IMAG01_2.jpg (83072) bytes
200
writing to 20211019104128_21e29756995a8563f22ee06b74862b3c_IMAG01_3.jpg (139399) bytes


### Image 업로드
* http://httpbin.org 에 다운로드 받은 image를 업로드하기
* requests의 post() 함수사용, files속성에 image data 설정하기

In [49]:
import requests

# rb(read binary) mode
upload_files_dict = {
    'img1': open('f1.jpg','rb'),
    'img2': open('f2.jpg','rb'),
    'img3': open('f3.jpg','rb')
}

url = 'http://httpbin.org/post'
res = requests.post(url, files=upload_files_dict)
print(res.status_code)
#res.json()

200


In [51]:
img2 = res.json()['files']['img2']
#img2

### Papago 번역 API 사용하기
* urllib 을 사용한 예제

In [52]:
import os
import sys
import urllib.request
client_id = "GgzbgTBW1vPxjyg0W7ud" # 개발자센터에서 발급받은 Client ID 값
client_secret = "gpTTLglJV_" # 개발자센터에서 발급받은 Client Secret 값
encText = urllib.parse.quote("반갑습니다")
data = "source=ko&target=en&text=" + encText
url = "https://openapi.naver.com/v1/papago/n2mt"
request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
response = urllib.request.urlopen(request, data=data.encode("utf-8"))
rescode = response.getcode()
if(rescode==200):
    response_body = response.read()
    print(response_body.decode('utf-8'))
else:
    print("Error Code:" + rescode)

{"message":{"@type":"response","@service":"naverservice.nmt.proxy","@version":"1.0.0","result":{"srcLangType":"ko","tarLangType":"en","translatedText":"Nice to meet you.","engineType":"PRETRANS","pivot":null}}}


* requests 사용한 예제로 변경하기

In [60]:
import requests

client_id = "GgzbgTBW1vPxjyg0W7ud" # 개발자센터에서 발급받은 Client ID 값
client_secret = "gpTTLglJV_" # 개발자센터에서 발급받은 Client Secret 값
url = "https://openapi.naver.com/v1/papago/n2mt"

encText = 'Yesterday all my troubles seemed so far away.'

# request header 설정
req_header_dict = {
    "X-Naver-Client-Id":client_id,
    "X-Naver-Client-Secret":client_secret
}
# request parameter 설정
req_param_dict = {
    "source":"en",
    "target":"ko",
    "text":encText
}
res = requests.post(url, headers=req_header_dict, data=req_param_dict)
print(res.status_code)
if res.ok:
    #print(type(res.text), type(res.json()))
    result = res.json()
    print(result)
    transText = result['message']['result']['translatedText']
    print(transText)
else:
    print(f'Error code : {res.status_code}')

200
{'message': {'@type': 'response', '@service': 'naverservice.nmt.proxy', '@version': '1.0.0', 'result': {'srcLangType': 'en', 'tarLangType': 'ko', 'translatedText': '어제는 내 모든 문제가 너무 멀리 있는 것 같았다.', 'engineType': 'N2MT', 'pivot': None}}}
어제는 내 모든 문제가 너무 멀리 있는 것 같았다.
