/
uscis_glossary_test.py
120 lines (80 loc) · 3.2 KB
/
uscis_glossary_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# pyright: reportUntypedFunctionDecorator=false
# pyright: reportOptionalMemberAccess=false
# pyright: reportUnusedImport=false
from more_itertools import first, last
from scrapy.http.response.html import HtmlResponse
from pytest import fixture, mark
from public_law.dates import today
from public_law.metadata import Subject
from public_law.models.glossary import GlossaryParseResult
from public_law.text import URL, NonemptyString
from public_law.parsers.usa.uscis_glossary import parse_glossary
def parsed_fixture(filename: str, url: str) -> GlossaryParseResult:
with open(f"tests/fixtures/usa/{filename}", encoding="utf8") as f:
html = HtmlResponse(
url=url,
body=f.read(),
encoding="UTF-8",
)
return parse_glossary(html)
def parsed_glossary() -> GlossaryParseResult:
return parsed_fixture(
filename="uscis-glossary.html",
url="https://www.uscis.gov/tools/glossary",
)
GLOSSARY = parsed_glossary()
METADATA = GLOSSARY.metadata
#
# Metadata tests
#
def test_gets_the_name():
assert METADATA.dcterms_title == "USCIS Glossary"
def test_gets_the_url():
assert METADATA.dcterms_source == "https://www.uscis.gov/tools/glossary"
def test_gets_the_author():
assert METADATA.dcterms_creator == "https://public.law"
def test_the_original_creator():
assert METADATA.publiclaw_sourceCreator == "U.S. Citizenship and Immigration Services"
def test_gets_coverage():
assert METADATA.dcterms_coverage == "USA"
def test_gets_the_source_modified_date():
assert METADATA.publiclaw_sourceModified == "unknown"
def test_gets_the_scrape_date():
assert METADATA.dcterms_modified == today()
def test_subjects():
assert METADATA.dcterms_subject == (
Subject(
uri=URL("http://id.loc.gov/authorities/subjects/sh85042790"),
rdfs_label=NonemptyString("Emigration and immigration law"),
),
Subject(
uri=URL("https://www.wikidata.org/wiki/Q231147"),
rdfs_label=NonemptyString("immigration law"),
),
)
#
# Content tests
#
def test_phrase():
assert first(GLOSSARY.entries).phrase == "Alien Registration Number"
def test_definition():
assert (
first(GLOSSARY.entries).definition == (
'<p>A unique seven-, eight- or nine-digit number assigned to a noncitizen '
'by the Department of Homeland Security. Also see '
'<a aria-label="Show glossary definition for USCIS Number" data-entity-substitution="canonical" data-entity-type="node" data-lang="en" data-linktype="glossary" data-nid="50674" href="#">USCIS Number</a>.'
'</p>'
)
)
def test_gets_proper_number_of_entries():
assert len(tuple(GLOSSARY.entries)) == 266
def test_gets_the_last_phrase():
last_entry = last(GLOSSARY.entries)
assert last_entry.phrase == "Withdrawal"
def test_gets_the_last_definition():
last_entry = last(GLOSSARY.entries)
assert last_entry.definition == (
'<p>This is an arriving noncitizen’s voluntary retraction of an application '
'for admission to the United States in lieu of a removal hearing before an '
'immigration judge or an expedited removal.</p>'
)