# URL操作したい


- `URL`の構成要素
  - `scheme://netloc/path;parameters?query#fragment`


In [25]:
TEST_URL = (
    "https://example.com/path1/path2/path3.html?key1=value1&key2=value2#fragment1"
)


# urlparse

- `urllib.parse.urlparse`を使って、URLをパースする
- `urlparse` -> `ParseResult` = named tuple-likeオブジェクト
- URLにアクセスせずにURLを分解できる

In [26]:
from urllib.parse import urlparse


In [37]:
p = urlparse(url=TEST_URL)
print(f"{p=}")
print(f"{p.scheme=}")
print(f"{p.netloc=}")
print(f"{p.path=}")
print(f"{p.params=}")
print(f"{p.query=}")
print(f"{p.fragment=}")


p=ParseResult(scheme='https', netloc='example.com', path='/path1/path2/path3.html', params='', query='key1=value1&key2=value2', fragment='fragment1')
p.scheme='https'
p.netloc='example.com'
p.path='/path1/path2/path3.html'
p.params=''
p.query='key1=value1&key2=value2'
p.fragment='fragment1'


`allow_fragments=False`にすると、フラグメントはクエリに含まれる

In [34]:
p = urlparse(url=TEST_URL, allow_fragments=False)
# print(f"{p=}")
print(f"{p.query=}")
print(f"{p.fragment=}")


p.query='key1=value1&key2=value2#fragment1'
p.fragment=''


# urlsplit

- `urlsplit`でURLを分割できる
- `urlsplit` -> `SplitResult`オブジェクト

In [35]:
from urllib.parse import urlsplit

s = urlsplit(url=TEST_URL)
print(f"{s=}")
print(f"{s.scheme=}")
print(f"{s.netloc=}")
print(f"{s.path=}")
print(f"{s.query=}")
print(f"{s.fragment=}")


s=SplitResult(scheme='https', netloc='example.com', path='/path1/path2/path3.html', query='key1=value1&key2=value2', fragment='fragment1')
s.scheme='https'
s.netloc='example.com'
s.path='/path1/path2/path3.html'
s.query='key1=value1&key2=value2'
s.fragment='fragment1'


# urldefrag

- `urldefrag`でフラグメントを抽出できる
- `urldefrag` -> `DefragResult`オブジェクト

In [41]:
from urllib.parse import urldefrag

d = urldefrag(url=TEST_URL)
print(f"{d=}")


d=DefragResult(url='https://example.com/path1/path2/path3.html?key1=value1&key2=value2', fragment='fragment1')


# parse_qs

- `parse_qs`でクエリを辞書型に変換できる
- `parse_qsl`でクエリをリスト型に変換できる

In [42]:
from urllib.parse import parse_qs, parse_qsl

p = urlparse(url=TEST_URL)
s = urlsplit(url=TEST_URL)


In [43]:
parse_qs(p.query)


{'key1': ['value1'], 'key2': ['value2']}

In [48]:
parse_qs(s.query)


{'key1': ['value1'], 'key2': ['value2']}

In [45]:
parse_qsl(p.query)


[('key1', 'value1'), ('key2', 'value2')]

In [54]:
import requests

response = requests.get(url="https://httpbin.org/get", params=parse_qs(s.query))
response.url


'https://httpbin.org/get?key1=value1&key2=value2'

In [55]:
response = requests.get(url="https://httpbin.org/get", params=parse_qsl(s.query))
response.url


'https://httpbin.org/get?key1=value1&key2=value2'