#### 【 PYTHON LIB - urllib 】 
- 인터넷 주소 url 관련된 처리 하는 패키지
- 내부 3개
    * urllib.request
    * urllib.response
    * urllib.parse 
    * urllib.robotparse


[1] 모듈 로딩 <hr>

In [13]:
#from urllib import request as req                       ## - request 모듈 전체
from urllib.request import urlopen, urlretrieve          ## - request 모듈 내 함수 2개만 
import os                                                ## - 파일/폴더 경로 체크 

[2] URL에 해당하는 데이터 로딩<hr>

In [14]:
## =============================================================
## [2-1] urlretrieve 경우 ===>  url의 데이터를 파일로 저장 
## =============================================================
IMG_URL   = r'https://search.pstatic.net/sunny/?src=https%3A%2F%2Fimages.rawpixel.com%2Fimage_800%2FczNmcy1wcml2YXRlL3Jhd3BpeGVsX2ltYWdlcy93ZWJzaXRlX2NvbnRlbnQvdXB3azYxODA3MDIzLXdpa2ltZWRpYS1pbWFnZS1qb2I2MTNfMi1sMGh0a2pjcC5qcGc.jpg&type=a340'
SAVE_FILE = './img.jpg'

f_path, msg = urlretrieve(IMG_URL, SAVE_FILE)

if os.path.exists(SAVE_FILE):
    print(f'f_path => {f_path}')
    print(f'msg    => {msg}')
else:
    print('URL 체크 바랍니다. 저장 실패!')


f_path => ./img.jpg
msg    => accept-ranges: bytes
cache-control: max-age=2592000
content-length: 13999
content-type: image/jpeg
expires: Fri, 21 Nov 2025 09:12:54 GMT
last-modified: Wed, 22 Oct 2025 09:12:54 GMT
p3p: CP="ALL CURa ADMa DEVa TAIa OUR BUS IND PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA POL HEA PRE LOC OTC"
date: Wed, 22 Oct 2025 09:12:54 GMT
referrer-policy: unsafe-url
server: Testa/6.2.4
age: 2487558
strict-transport-security: max-age=31536000
connection: close




In [15]:
## =================================================================
## [2-2] urlopen 경우 ===> url의 데이터를 저장
##                        http.client.HTTPResponse 객체에 담아서 반환
## =================================================================
WEB_URL = r'https://docs.python.org/3/library/http.server.html#http.server.BaseHTTPRequestHandler.headers'

#imgReq=urlopen(IMG_URL)
imgReq=urlopen(WEB_URL)

print(f'imgReq => {imgReq}')
print(f'속  성 : imgReq.headers => {imgReq.headers}')
print(f'메서드 : imgReq.read()  => {imgReq.read()}')

imgReq => <http.client.HTTPResponse object at 0x00000206DDFC6620>
속  성 : imgReq.headers => Connection: close
Content-Length: 94788
server: nginx
content-type: text/html
last-modified: Thu, 20 Nov 2025 01:03:16 GMT
etag: "691e68d4-17244"
x-clacks-overhead: GNU Terry Pratchett
strict-transport-security: max-age=315360000; includeSubDomains; preload
Via: 1.1 varnish, 1.1 varnish
Accept-Ranges: bytes
Age: 899
Date: Thu, 20 Nov 2025 04:13:32 GMT
X-Served-By: cache-lga21969-LGA, cache-icn1450074-ICN
X-Cache: HIT, HIT
X-Cache-Hits: 3, 0
X-Timer: S1763612012.461089,VS0,VE176
Vary: Accept-Encoding




In [18]:
# 이미지 로우 데이터 읽어서 저장 

# - 웹에서 가져오기 
resObj = imgReq=urlopen(IMG_URL)  
# - 바이너리 데이터 추출
raw_data = resObj.read() 
# - 저장할 이미지 파일명
SAVE_FILE = './img2.jpg'

# - 바이너리 모드로 파일 open & write
with open(SAVE_FILE, mode="wb") as f:    
    f.write(raw_data)
    print("저장되었습니다...!")


저장되었습니다...!


In [20]:
# ==========================================
# 텍스트 로우 데이터 읽어서 저장 
# ==========================================

# URL과 저장 경로 지정
#DATA_URL = 'http://api.aoikujira.com/ip/ini'
DATA_URL  = r'https://docs.python.org/3/library/http.server.html#http.server.BaseHTTPRequestHandler.headers'

# 다운로드 & 데이터 읽어 들이기
res  = urlopen(DATA_URL)
data = res.read()		        	

# 바이트 데이터 읽어오기
print('type(data) =', type(data))     	# 타입 체크

# 바이너리 => 디코딩해서 str로 저장 
text = data.decode("utf-8")
print('type(text) =', type(text))
print(text)


type(data) = <class 'bytes'>
type(text) = <class 'str'>
<!DOCTYPE html>

<html lang="en" data-content_root="../">
  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta property="og:title" content="http.server — HTTP servers" />
<meta property="og:type" content="website" />
<meta property="og:url" content="https://docs.python.org/3/library/http.server.html" />
<meta property="og:site_name" content="Python documentation" />
<meta property="og:description" content="Source code: Lib/http/server.py This module defines classes for implementing HTTP servers. Availability: not WASI. This module does not work or is not available on WebAssembly. See WebAssembly plat..." />
<meta property="og:image:width" content="1146" />
<meta property="og:image:height" content="600" />
<meta property="og:image" content="https://docs.python.org/3.14/_images/social_previews/sum

[3] URL 관련 예외처리 <hr>

In [22]:
# 모듈 로딩
from urllib.request import urlopen 
from urllib.error   import HTTPError, URLError

# URL 데이터 로딩
try:
    html = urlopen('http://www.pythonscraping.com/pages/error.html')
except HTTPError as e:
    print(f'ERROR MSG : {e}')
except URLError as u:
    print(f'ERROR MSG : {u}')
else:
    print("OK", html.read(), sep='\n\n')
finally:
    print('---END---')

ERROR MSG : HTTP Error 404: Not Found
---END---
