-
Notifications
You must be signed in to change notification settings - Fork 1
/
rist_løg
executable file
·276 lines (249 loc) · 9.18 KB
/
rist_løg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
#!/usr/bin/env python3
#
# Generate burgerforslag text.
#
# Simple usage: The sole argument is a path to a borgerforslag json file.
# Prints the resulting burgerforslag prettified to standard output.
#
# Advanced usage: The first argument is '-i', the next N arguments are input
# json paths, and the final N arguments are output html paths. Use this for
# batch processing.
import sys
import os.path
import re
import random
import json
import copy
import textwrap
# 0. Ubestemt ental, fælleskøn
# 1. Ubestemt ental, intetkøn
# 2. Bestemt ental
# 3. Ubestemt flertal
# 4. Bestemt flertal
replacement_nouns = [
('burger', None, 'burgeren', 'burgere', 'burgerne'),
('tomatskive', None, 'tomatskiven', 'tomatskiver', 'tomatskiverne'),
(None, None, None, 'ristede løg', 'de ristede løg'),
('bøf', None, 'bøffen', 'bøffer', 'bøfferne'),
('bolle', None, 'bollen', 'boller', 'bollerne'),
('syltet agurk', None, 'den syltede agurk', 'syltede agurker', 'de syltede agurker'),
('osteskive', None, 'osteskiven', 'osteskiver', 'osteskiverne'),
('baconskive', None, 'baconskiven', 'baconskiver', 'baconskiverne'),
('vegetarbøf', None, 'vegetarbøffen', 'vegetarbøffer', 'vegetarbøfferne'),
(None, None, 'mayonnaisen', None, None),
(None, None, 'ketchuppen', None, None),
(None, None, 'remouladen', None, None),
('pomfrit', None, 'pomfritten', 'pomfritter', 'pomfritterne'),
('løgring', None, 'løgringen', 'løgringe', 'løgringene'),
('salatskive', None, 'salatskiven', 'salatskiver', 'salatskiverne'),
('champignon', None, 'champignonen', 'champignoner', 'champignonerne'),
(None, None, 'sennepen', None, None),
(None, None, 'guacamolen', None, None),
('röstibolle', None, 'röstibollen', 'röstiboller', 'röstibollerne'),
(None, None, None, 'syltede rødbeder', 'de syltede rødbeder'),
(None, 'Happy Meal', None, 'Happy Meals', None),
(None, None, None, 'nuggets', None),
('Whopper', None, 'Whopperen', 'Whoppers', None),
('pickle', None, None, 'pickles', None),
(None, 'løg', 'løget', 'løg', 'løgene'),
('salat', None, 'salaten', 'salater', 'salaterne'),
(None, 'salatblad', 'salatbladet', 'salatblade', 'salatbladene'),
('Big Mac', None, None, 'Big Macs', None),
('nummer 21', None, None, 'nummer 21', None),
('cheeseburger', None, 'cheeseburgeren', 'cheeseburgere', 'cheeseburgerne'),
('sesambolle', None, 'sesambollen', 'sesamboller', 'sesambollerne'),
('briochebolle', None, 'briochebollen', 'briocheboller', 'briochebollerne'),
(None, None, 'den hemmelige dressing', None, None),
]
# 0. Ental, fælleskøn
# 1. Ental, intetkøn
# 2. Flertal/bestemthed
replacement_adjectives = [
('sjasket', 'sjasket', 'sjaskede'),
('well-done', 'well-done', 'well-done'),
('optøet', 'optøet', 'optøede'),
('friturestegt', 'friturestegt', 'friturestegte'),
('hjemmelavet', 'hjemmelavet', 'hjemmelavede'),
('gennemstegt', 'gennemstegt', 'gennemstegte'),
('saltet', 'saltet', 'saltede'),
]
# -------------------------------------------------------------------
replacement_nouns_kind = []
for i in range(5):
ws = []
for r in replacement_nouns:
if r[i] is not None:
ws.append(r[i])
replacement_nouns_kind.append(ws)
replacement_adjectives_kind = []
for i in range(3):
ws = []
for r in replacement_adjectives:
if r[i] is not None:
ws.append(r[i])
replacement_adjectives_kind.append(ws)
_basedir = os.path.dirname(__file__)
# Regarding the nouns and adjectives files: They contain all *unique*
# words. For example, 'helt' is not present in any of the files,
# since it can both be a noun and an adjective.
def load_nouns():
kinds = [
['navneord-ubestemt_ental-fælleskøn'],
['navneord-ubestemt_ental-intetkøn'],
['navneord-bestemt_ental-fælleskøn',
'navneord-bestemt_ental-intetkøn'],
['navneord-ubestemt_flertal-fælleskøn',
'navneord-ubestemt_flertal-intetkøn',
'navneord_flertal-ubestemt'],
['navneord-bestemt_flertal-fælleskøn',
'navneord-bestemt_flertal-intetkøn',
'navneord_flertal-bestemt'],
]
nouns = []
for filenames in kinds:
kind_nouns = []
for name in filenames:
with open(os.path.join(_basedir, 'words', 'navneord', name)) as f:
kind_nouns.extend(f.read().strip().split('\n'))
nouns.append(set(kind_nouns))
return nouns
def load_adjectives():
with open(os.path.join(_basedir, 'words', 'tillægsord', 'ignore')) as f:
ignore_words = set(f.read().strip().split('\n'))
kinds = [
['tillægsord-fælleskøn'],
['tillægsord-intetkøn'],
['tillægsord-generisk'],
]
adjectives = []
for filenames in kinds:
kind_adjectives = []
for name in filenames:
with open(os.path.join(_basedir, 'words', 'tillægsord', name)) as f:
kind_adjectives.extend(f.read().strip().split('\n'))
adjectives.append(set(kind_adjectives).difference(ignore_words))
return adjectives
nouns = load_nouns()
adjectives = load_adjectives()
def is_noun(words, i):
try:
word = words[i]
except IndexError:
return False
for ng in nouns:
if word in ng:
return True
return False
def find_words(s):
words = list(re.findall(r'\w+', s.lower()))
d = {}
for w, i in zip(words, range(len(words))):
try:
e = d[w]
except KeyError:
d[w] = [set(), 0]
e = d[w]
e[0].add(i)
e[1] += 1
return words, list((w, w_aux[0], w_aux[1]) for w, w_aux in d.items())
def fix_case(orig, new):
if all(c.isupper() for c in orig):
return new.upper()
elif len(orig) >= 1 and orig[0].isupper():
return new[:1].upper() + new[1:]
else:
return new
def fix_word(word, word_indices, words,
replacement_nouns_kind_loc,
replacement_adjectives_kind_loc):
# Check if unique.
found = False
for ng in nouns:
if word in ng:
if found:
return
found = True
for ag in adjectives:
if word in ag:
if found:
return
if any(is_noun(words, i + 1) for i in word_indices):
found = True
if not found:
return
# Find replacement.
for i in range(len(nouns)):
if word in nouns[i]:
replacements = replacement_nouns_kind_loc[i]
if replacements:
word_fixed = random.choice(replacements)
replacements.remove(word_fixed)
return word_fixed
for i in range(len(adjectives)):
if word in adjectives[i]:
replacements = replacement_adjectives_kind_loc[i]
if replacements:
word_fixed = random.choice(replacements)
replacements.remove(word_fixed)
return word_fixed
def fix(conv, s):
def fix_word_conv(w):
try:
w_fixed = conv[w.lower()]
return fix_case(w, w_fixed)
except KeyError:
return w
return re.sub(r'\w+', lambda m: fix_word_conv(m.group(0)), s)
fname = sys.argv[1]
if fname != '-b':
inputs = [fname]
outputs = [None]
else:
args = sys.argv[2:]
inputs = args[:len(args) // 2]
outputs = args[len(args) // 2:]
for input_json_file, output_json_file in zip(inputs, outputs):
with open(input_json_file) as f:
borgerforslag = json.load(f)
title = borgerforslag['title']
teaser = borgerforslag['teaser']
body = borgerforslag['body']
# Prioritize replacing words that are most used.
words_orig, words = find_words(title + ' ' + teaser + ' ' + body)
random.shuffle(words)
words.sort(key=lambda t: t[2], reverse=True)
replacement_nouns_kind_loc = copy.deepcopy(replacement_nouns_kind)
replacement_adjectives_kind_loc = copy.deepcopy(replacement_adjectives_kind)
conv = {}
for w, w_indices, _ in words:
if 'borger' in w:
conv[w] = w.replace('borger', 'burger')
elif w.startswith('lov'):
conv[w] = w.replace('lov', 'menu', 1)
elif w.endswith('lov'):
conv[w] = re.sub(r'lov$', 'menu', w)
elif w.endswith('loven'):
conv[w] = re.sub(r'loven$', 'menuen', w)
elif w.endswith('lovens'):
conv[w] = re.sub(r'lovens$', 'menuens', w)
else:
fixed = fix_word(w, w_indices, words_orig,
replacement_nouns_kind_loc,
replacement_adjectives_kind_loc)
if fixed:
conv[w] = fixed
res = {
'title': fix(conv, title),
'teaser': fix(conv, teaser),
'body': fix(conv, body),
'id': borgerforslag['id'],
'approved': borgerforslag['approved']
}
if output_json_file:
with open(output_json_file, 'w') as f:
json.dump(res, f, sort_keys=True, indent=2, ensure_ascii=False)
else:
fill = lambda s: '\n'.join(textwrap.fill(line) for line in s.split('\n'))
print('Title:\n{line}\n{title}\n\nTeaser:\n{line}\n{teaser}\n\nBody:\n{line}\n{body}'.format(
line='-' * 70, title=fill(res['title']),
teaser=fill(res['teaser']), body=fill(res['body'])))