-
Notifications
You must be signed in to change notification settings - Fork 4
/
babel_data.py
34 lines (25 loc) · 990 Bytes
/
babel_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import datetime
import random
from typing import List
import babel
from babel.dates import format_date
def writelines(lines: List[str], fname: str):
lines = [l + "\n" for l in lines]
with open(fname, "w") as f:
f.writelines(lines)
formats = ["short", "medium", "long"]
locales = babel.localedata.locale_identifiers()
def create(prefix: str, N: int, create_vocab: bool):
startdate = datetime.date(1900, 1, 1)
target = []
source = []
for i in range(N):
date = startdate + datetime.timedelta(random.randint(0, 200 * 365))
target.append(date.isoformat())
datestr = format_date(date, format=random.choice(formats), locale=random.choice(locales))
source.append(datestr)
writelines(source, "%s-source.txt" % prefix)
writelines(target, "%s-target.txt" % prefix)
if create_vocab:
writelines(list(set("".join(source))), "source-vocab.txt")
writelines(list(set("".join(target))), "target-vocab.txt")