In [54]:
!pip show requests

Name: requests
Version: 2.25.1
Summary: Python HTTP for Humans.
Home-page: https://requests.readthedocs.io
Author: Kenneth Reitz
Author-email: me@kennethreitz.org
License: Apache 2.0
Location: c:\users\vega2\anaconda3\lib\site-packages
Requires: urllib3, certifi, idna, chardet
Required-by: Sphinx, jupyterlab-server, conda, conda-repo-cli, conda-build, anaconda-project, anaconda-client


In [55]:
!pip show beautifulsoup4

Name: beautifulsoup4
Version: 4.9.3
Summary: Screen-scraping library
Home-page: http://www.crummy.com/software/BeautifulSoup/bs4/
Author: Leonard Richardson
Author-email: leonardr@segfault.org
License: MIT
Location: c:\users\vega2\anaconda3\lib\site-packages
Requires: soupsieve
Required-by: conda-build


### 네이버 뉴스 제목 가져오기
* user-agent 헤더 값을 반드시 설정

In [56]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin


In [57]:
url = 'https://news.naver.com/'
req_header_dict = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
res = requests.get(url, headers=req_header_dict)

print(res.status_code)
print(res.ok)

200
True


In [58]:
if res.ok:
    # 소스보기 했을 때 보여지는 텍스트
    html = res.text
    #BeautifulSoup 객체생성
    soup = BeautifulSoup(html, 'html.parser')
    #print(len(soup.select("a[href*='read.naver']")))
    atag_list = soup.select("a[href*='read.naver']")
    print(type(atag_list))
    for idx, atag in enumerate(atag_list,1):
        #print(type(atag))
        if atag:
            title = atag.text.strip()
            href = urljoin(url, atag['href'])
        if title:
            print(f'{idx} - {title} - {href}')

<class 'bs4.element.ResultSet'>
1 - 시진핑 “기후변화·팬데믹 대응위해서는 중미 관… - https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=104&oid=011&aid=0003985779
2 - 더 걷힐 세금이 19조? 10조?…돈풀기 선거에 고… - https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=101&oid=421&aid=0005727966
3 - 애플과 소송 ‘포트나이트’ 게임 CEO “앱마켓 독점, 메타버스 저해” - https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=105&oid=028&aid=0002568037
4 - 손준성 측 “공수처 압색 절차 위반”…공수처 “적법 집행” - https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=102&oid=056&aid=0011157803
5 - 도이치모터스 권오수 회장, 주가조작 의혹에 묵묵부답 - https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=102&oid=028&aid=0002567967
6 - SK, 백신 이어 치료제까지 글로벌 생산 주도 - https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=105&oid=015&aid=0004629213
7 - 다주택자 역대 최대… 정부 대책 안 먹혔다 - https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1=101&oid=081&aid=0003230522
9 - 안철수 "제2의 조국자녀 없을 것" 메타버스에서 청년공약 발표 - https://news.naver.com/main/read.naver?mode=LSD&mid=shm&sid1

### 웹툰 이미지 다운로드 & 업로드
* referer 헤더 설정이 필요합니다.

In [60]:
import requests
import os

In [61]:
req_header = {
    'referer':'https://comic.naver.com/webtoon/detail?titleId=780506&no=32&amp;weekday=tue'
}
img_url_list = ['https://image-comic.pstatic.net/webtoon/780506/32/20211115143459_7181f6c9b607b12cf6f4da17353e77a3_IMAG01_1.jpg',
                'https://image-comic.pstatic.net/webtoon/780506/32/20211115143459_7181f6c9b607b12cf6f4da17353e77a3_IMAG01_2.jpg',
                'https://image-comic.pstatic.net/webtoon/780506/32/20211115143459_7181f6c9b607b12cf6f4da17353e77a3_IMAG01_3.jpg']

for img_url in img_url_list:
    res = requests.get(img_url, headers=req_header)
    print(res.status_code)
    if res.ok:
        #response 객체에 담겨진 binary data를 추출할때는 content 속성사용
        img_data = res.content
        file_name = os.path.basename(img_url)
        
        with open(file_name, 'wb') as file:
            print(f'writing to {file_name} ({len(img_data)}) bytes')
            file.write(img_data)

200
writing to 20211115143459_7181f6c9b607b12cf6f4da17353e77a3_IMAG01_1.jpg (135942) bytes
200
writing to 20211115143459_7181f6c9b607b12cf6f4da17353e77a3_IMAG01_2.jpg (177381) bytes
200
writing to 20211115143459_7181f6c9b607b12cf6f4da17353e77a3_IMAG01_3.jpg (137491) bytes


### 이미지 업로드
* http://httpbin.org/post  URL로 다운받은 img 파일을 업로드
* requests의 post() 함수를 사용하고, files 속성에 image data를 지정한다

In [62]:
import requests

upload_files_dict = {
    'img1': open('f1.jpg','rb'),
    'img2': open('f2.jpg','rb'),
    'img3': open('f3.jpg','rb')
}
url = "http://httpbin.org/post"
res = requests.post(url, files=upload_files_dict)
print(res.status_code)

200


In [63]:
img1_data = res.json()['files']['img1']
#img1_data

### 파파고 오픈 API 사용
* urllib 내부 라이브러리를 사용한 코드

In [64]:
import os
import sys
import urllib.request

client_id = "Txlmbz8_3MnvRcDWLoZo" # 개발자센터에서 발급받은 Client ID 값
client_secret = "pmYRthuGCZ" # 개발자센터에서 발급받은 Client Secret 값

encText = urllib.parse.quote("Suddenly, I'm not half the man I used to be")
data = "source=en&target=ko&text=" + encText
url = "https://openapi.naver.com/v1/papago/n2mt"

request = urllib.request.Request(url)
request.add_header("X-Naver-Client-Id",client_id)
request.add_header("X-Naver-Client-Secret",client_secret)
response = urllib.request.urlopen(request, data=data.encode("utf-8"))
rescode = response.getcode()
if(rescode==200):
    response_body = response.read()
    print(response_body.decode('utf-8'))
else:
    print("Error Code:" + rescode)

{"message":{"@type":"response","@service":"naverservice.nmt.proxy","@version":"1.0.0","result":{"srcLangType":"en","tarLangType":"ko","translatedText":"갑자기, 나는 예전의 절반도 되지 않았다.","engineType":"N2MT","pivot":null}}}


* requests를 사용한 코드로 변경하기

In [65]:
import requests

client_id = "Txlmbz8_3MnvRcDWLoZo" # 개발자센터에서 발급받은 Client ID 값
client_secret = "pmYRthuGCZ" # 개발자센터에서 발급받은 Client Secret 값
url = "https://openapi.naver.com/v1/papago/n2mt"

#request parameter(요청 파라미터)
req_param_dict = {
    "source":"en",
    "target":"ko",
    "text":"Suddenly, I'm not half the man I used to be"
}
#request header(요청 헤더)
req_header_dict = {
    "X-Naver-Client-Id":client_id,
    "X-Naver-Client-Secret":client_secret
}

res = requests.post(url, data=req_param_dict, headers=req_header_dict)
print(res.ok)
print(f' 응답 헤더 : {res.headers}')
print(f' 요청 헤더 : {res.request.headers}')
print(f' 응답 코드 : {res.status_code}')  

True
 응답 헤더 : {'Server': 'nginx', 'Date': 'Tue, 16 Nov 2021 12:47:28 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Keep-Alive': 'timeout=5', 'apigw-uuid': '16170b26-0798-44ee-a1d4-4dab81d7fd97', 'X-QUOTA': '43', 'Content-Encoding': 'gzip'}
 요청 헤더 : {'User-Agent': 'python-requests/2.25.1', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive', 'X-Naver-Client-Id': 'Txlmbz8_3MnvRcDWLoZo', 'X-Naver-Client-Secret': 'pmYRthuGCZ', 'Content-Length': '72', 'Content-Type': 'application/x-www-form-urlencoded'}
 응답 코드 : 200


In [66]:
if res.ok:
    print(type(res.text))
    print(type(res.json()))
    print(res.text)
    print(res.json()['message']['result']['translatedText'])

<class 'str'>
<class 'dict'>
{"message":{"@type":"response","@service":"naverservice.nmt.proxy","@version":"1.0.0","result":{"srcLangType":"en","tarLangType":"ko","translatedText":"갑자기, 나는 예전의 절반도 되지 않았다.","engineType":"N2MT","pivot":null}}}
갑자기, 나는 예전의 절반도 되지 않았다.
