11from __future__ import annotations
22
33import codecs
4- import unittest
54from typing import Any
65
76from w3lib .encoding import (
1413)
1514
1615
17- class RequestEncodingTests ( unittest . TestCase ) :
16+ class TestRequestEncoding :
1817 utf8_fragments = [
1918 # Content-Type as meta http-equiv
2019 b"""<meta http-equiv="content-type" content="text/html;charset=UTF-8" />""" ,
@@ -44,84 +43,84 @@ def test_bom(self):
4443 assert bom_encoding is not None
4544 assert bom is not None
4645 decoded = string [len (bom ) :].decode (bom_encoding )
47- self . assertEqual ( water_unicode , decoded )
46+ assert water_unicode == decoded
4847 # Body without BOM
4948 enc , bom = read_bom (b"foo" )
50- self . assertEqual ( enc , None )
51- self . assertEqual ( bom , None )
49+ assert enc is None
50+ assert bom is None
5251 # Empty body
5352 enc , bom = read_bom (b"" )
54- self . assertEqual ( enc , None )
55- self . assertEqual ( bom , None )
53+ assert enc is None
54+ assert bom is None
5655
5756 def test_http_encoding_header (self ):
5857 header_value = "Content-Type: text/html; charset=ISO-8859-4"
5958 extracted = http_content_type_encoding (header_value )
60- self . assertEqual ( extracted , "iso8859-4" )
61- self . assertEqual ( None , http_content_type_encoding ("something else" ))
59+ assert extracted == "iso8859-4"
60+ assert http_content_type_encoding ("something else" ) is None
6261
6362 def test_html_body_declared_encoding (self ):
6463 for fragment in self .utf8_fragments :
6564 encoding = html_body_declared_encoding (fragment )
66- self .assertEqual (encoding , "utf-8" , fragment )
67- self .assertEqual (None , html_body_declared_encoding (b"something else" ))
68- self .assertEqual (
69- None ,
65+ assert encoding == "utf-8" , fragment
66+ assert None is html_body_declared_encoding (b"something else" )
67+ assert (
7068 html_body_declared_encoding (
7169 b"""
7270 <head></head><body>
7371 this isn't searched
7472 <meta charset="utf-8">
7573 """
76- ),
74+ )
75+ is None
7776 )
78- self .assertEqual (
79- None ,
77+ assert (
8078 html_body_declared_encoding (
8179 b"""<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">"""
82- ),
80+ )
81+ is None
8382 )
8483
8584 def test_html_body_declared_encoding_unicode (self ):
8685 # html_body_declared_encoding should work when unicode body is passed
87- self . assertEqual ( None , html_body_declared_encoding ("something else" ))
86+ assert html_body_declared_encoding ("something else" ) is None
8887
8988 for fragment in self .utf8_fragments :
9089 encoding = html_body_declared_encoding (fragment .decode ("utf8" ))
91- self . assertEqual ( encoding , "utf-8" , fragment )
90+ assert encoding == "utf-8" , fragment
9291
93- self .assertEqual (
94- None ,
92+ assert (
9593 html_body_declared_encoding (
9694 """
9795 <head></head><body>
9896 this isn't searched
9997 <meta charset="utf-8">
10098 """
101- ),
99+ )
100+ is None
102101 )
103- self .assertEqual (
104- None ,
102+ assert (
105103 html_body_declared_encoding (
106104 """<meta http-equiv="Fake-Content-Type-Header" content="text/html; charset=utf-8">"""
107- ),
105+ )
106+ is None
108107 )
109108
110109
111- class CodecsEncodingTestCase ( unittest . TestCase ) :
110+ class TestCodecsEncoding :
112111 def test_resolve_encoding (self ):
113- self . assertEqual ( resolve_encoding ("latin1" ), "cp1252" )
114- self . assertEqual ( resolve_encoding (" Latin-1" ), "cp1252" )
115- self . assertEqual ( resolve_encoding ("gb_2312-80" ), "gb18030" )
116- self . assertEqual ( resolve_encoding ("unknown encoding" ), None )
112+ assert resolve_encoding ("latin1" ) == "cp1252"
113+ assert resolve_encoding (" Latin-1" ) == "cp1252"
114+ assert resolve_encoding ("gb_2312-80" ) == "gb18030"
115+ assert resolve_encoding ("unknown encoding" ) is None
117116
118117
119- class UnicodeDecodingTestCase ( unittest . TestCase ) :
118+ class TestUnicodeDecoding :
120119 def test_utf8 (self ):
121- self . assertEqual ( to_unicode (b"\xc2 \xa3 " , "utf-8" ), "\xa3 " )
120+ assert to_unicode (b"\xc2 \xa3 " , "utf-8" ) == "\xa3 "
122121
123122 def test_invalid_utf8 (self ):
124- self . assertEqual ( to_unicode (b"\xc2 \xc2 \xa3 " , "utf-8" ), "\ufffd \xa3 " )
123+ assert to_unicode (b"\xc2 \xc2 \xa3 " , "utf-8" ) == "\ufffd \xa3 "
125124
126125
127126def ct (charset : str | None ) -> str | None :
@@ -132,14 +131,14 @@ def norm_encoding(enc: str) -> str:
132131 return codecs .lookup (enc ).name
133132
134133
135- class HtmlConversionTests ( unittest . TestCase ) :
134+ class TestHtmlConversion :
136135 def test_unicode_body (self ):
137136 unicode_string = "\u043a \u0438 \u0440 \u0438 \u043b \u043b \u0438 \u0447 \u0435 \u0441 \u043a \u0438 \u0439 \u0442 \u0435 \u043a \u0441 \u0442 "
138137 original_string = unicode_string .encode ("cp1251" )
139138 encoding , body_unicode = html_to_unicode (ct ("cp1251" ), original_string )
140139 # check body_as_unicode
141- self . assertTrue ( isinstance (body_unicode , str ) )
142- self . assertEqual ( body_unicode , unicode_string )
140+ assert isinstance (body_unicode , str )
141+ assert body_unicode == unicode_string
143142
144143 def _assert_encoding (
145144 self ,
@@ -150,15 +149,14 @@ def _assert_encoding(
150149 ) -> None :
151150 assert not isinstance (body , str )
152151 encoding , body_unicode = html_to_unicode (ct (content_type ), body )
153- self . assertTrue ( isinstance (body_unicode , str ) )
154- self . assertEqual ( norm_encoding (encoding ), norm_encoding (expected_encoding ) )
152+ assert isinstance (body_unicode , str )
153+ assert norm_encoding (encoding ) == norm_encoding (expected_encoding )
155154
156155 if isinstance (expected_unicode , str ):
157- self . assertEqual ( body_unicode , expected_unicode )
156+ assert body_unicode == expected_unicode
158157 else :
159- self .assertTrue (
160- body_unicode in expected_unicode ,
161- f"{ body_unicode } is not in { expected_unicode } " ,
158+ assert body_unicode in expected_unicode , (
159+ f"{ body_unicode } is not in { expected_unicode } "
162160 )
163161
164162 def test_content_type_and_conversion (self ):
@@ -227,8 +225,8 @@ def _assert_encoding_detected(
227225 ) -> None :
228226 assert not isinstance (body , str )
229227 encoding , body_unicode = html_to_unicode (ct (content_type ), body , ** kwargs )
230- self . assertTrue ( isinstance (body_unicode , str ) )
231- self . assertEqual ( norm_encoding (encoding ), norm_encoding (expected_encoding ) )
228+ assert isinstance (body_unicode , str )
229+ assert norm_encoding (encoding ) == norm_encoding (expected_encoding )
232230
233231 def test_BOM (self ):
234232 # utf-16 cases already tested, as is the BOM detection function
0 commit comments