-
Notifications
You must be signed in to change notification settings - Fork 66
Expand file tree
/
Copy pathtest_features.py
More file actions
92 lines (70 loc) · 2.75 KB
/
Copy pathtest_features.py
File metadata and controls
92 lines (70 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from fake_news.utils.features import compute_bin_idx
from fake_news.utils.features import normalize_and_clean_counts
from fake_news.utils.features import normalize_and_clean_party_affiliations
from fake_news.utils.features import normalize_and_clean_speaker_title
from fake_news.utils.features import normalize_and_clean_state_info
from fake_news.utils.features import normalize_labels
def test_compute_bin_idx():
bins = [0, 4, 10, 12]
assert compute_bin_idx(0, bins) == 0
assert compute_bin_idx(3, bins) == 1
assert compute_bin_idx(4, bins) == 1
assert compute_bin_idx(12, bins) == 3
def test_normalize_labels():
datapoints = [
{"label": "pants-fire", "ignored_field": "blah"},
{"label": "barely-true"},
{"label": "false"},
{"label": "true"},
{"label": "half-true"},
{"label": "mostly-true"}
]
expected_converted_datapoints = [
{"label": False, "ignored_field": "blah"},
{"label": False},
{"label": False},
{"label": True},
{"label": True},
{"label": True}
]
assert normalize_labels(datapoints) == expected_converted_datapoints
def test_normalize_speaker_title():
datapoints = [
{"speaker_title": "mr-president ", "ignored_label": "true"},
{"speaker_title": " U. S. CONGRESSMAN"}
]
expected_converted_datapoints = [
{"speaker_title": "mr president", "ignored_label": "true"},
{"speaker_title": "u.s. congressman"}
]
assert normalize_and_clean_speaker_title(datapoints) == expected_converted_datapoints
def test_normalize_party_affiliations():
datapoints = [
{"party_affiliation": "democrat", "ignored_label": "true"},
{"party_affiliation": "boston tea"}
]
expected_converted_datapoints = [
{"party_affiliation": "democrat", "ignored_label": "true"},
{"party_affiliation": "none"}
]
assert normalize_and_clean_party_affiliations(datapoints) == expected_converted_datapoints
def test_normalize_state_info():
datapoints = [
{"state_info": " Virgina ", "ignored_label": "true"},
{"state_info": " TEX "}
]
expected_converted_datapoints = [
{"state_info": "virginia", "ignored_label": "true"},
{"state_info": "texas"}
]
assert normalize_and_clean_state_info(datapoints) == expected_converted_datapoints
def test_normalize_counts():
datapoints = [
{"barely_true_count": "23.0", "ignored_label": "true"},
{"false_count": "1.0"}
]
expected_converted_datapoints = [
{"barely_true_count": 23, "ignored_label": "true"},
{"false_count": 1}
]
assert normalize_and_clean_counts(datapoints) == expected_converted_datapoints