forked from timvieira/arsenal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
alphabet.py
129 lines (104 loc) · 3.02 KB
/
alphabet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
class Alphabet(object):
"""
Bijective mapping from strings to integers.
>>> a = Alphabet()
>>> [a[x] for x in 'abcd']
[0, 1, 2, 3]
>>> map(a.lookup, range(4))
['a', 'b', 'c', 'd']
>>> a.stop_growth()
>>> a['e']
>>> a.freeze()
>>> a.add('z')
Traceback (most recent call last):
...
ValueError: Alphabet is frozen. Key "z" not found.
>>> print a.plaintext()
a
b
c
d
"""
def __init__(self):
self._mapping = {} # str -> int
self._flip = {} # int -> str; timv: consider using array or list
self._i = 0
self._frozen = False
self._growing = True
def freeze(self):
self._frozen = True
def stop_growth(self):
self._growing = False
@classmethod
def from_iterable(cls, s):
inst = cls()
for x in s:
inst.add(x)
inst.freeze()
return inst
def keys(self):
return self._mapping.iterkeys()
def imap(self, seq, emit_none=False):
"""
Apply alphabet to sequence while filtering. By default, `None` is not
emitted, so the Note that the output sequence may have fewer items.
"""
if emit_none:
for s in seq:
yield self[s]
else:
for s in seq:
x = self[s]
if x is not None:
yield x
def map(self, seq, *args, **kwargs):
return list(self.imap(seq, *args, **kwargs))
def add_many(self, x):
for k in x:
self.add(k)
def lookup(self, i):
if i is None:
return None
# assert isinstance(i, int)
return self._flip[i]
def lookup_many(self, x):
for k in x:
yield self.lookup(k)
def __contains__(self, k):
assert isinstance(k, basestring)
return k in self._mapping
def __getitem__(self, k):
try:
return self._mapping[k]
except KeyError:
if not isinstance(k, basestring):
raise ValueError("Invalid key (%s): only strings allowed." % (k,))
if self._frozen:
raise ValueError('Alphabet is frozen. Key "%s" not found.' % (k,))
if not self._growing:
return None
x = self._mapping[k] = self._i
self._flip[x] = k
self._i += 1
return x
add = __getitem__
def __iter__(self):
for i in xrange(len(self)):
yield self._flip[i]
def enum(self):
for i in xrange(len(self)):
yield (i, self._flip[i])
def __len__(self):
return len(self._mapping)
def plaintext(self):
return '\n'.join(self)
@classmethod
def load(cls, filename):
with file(filename) as f:
return cls.from_iterable(l.strip() for l in f)
def save(self, filename):
with file(filename, 'wb') as f:
f.write(self.plaintext())
if __name__ == '__main__':
import doctest
doctest.testmod()