forked from Acid-ZdS/PyAcid
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
215 lines (167 loc) · 4.97 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#!/usr/bin/env python3.4
# coding: utf-8
"""
Declares the Parser class, which can transform a code string into an AST.
Contributors: myrma
"""
import functools
from collections import defaultdict
from acid.parser.ast import *
from acid.parser.lexer import TokenType, tokenize
from acid.parser.types import SourcePos
from acid.exception import ParseError
# todo: refactor consume_stmt and consume_expr, register_stmt and register_expr
class Parser:
"""
Registers some consumers to parse the AST.
"""
consumers = defaultdict(list)
def __init__(self, code, path=None):
self.path = path
self.code = code
self.token_queue = list(tokenize(self.code)) # the tokenized string
if self.token_queue:
self.end_pos = self.token_queue[-1].pos
else:
self.end_pos = SourcePos(1, 1)
self.error = None
@classmethod
def from_file(cls, path):
with open(path) as file:
code = file.read()
return cls(code, path)
@classmethod
def from_string(cls, code, path=None):
"""
Parses the given code, without needing to instantiate a Parser object.
"""
parser = cls(code, path)
ast = parser.run()
return ast
@classmethod
def register(cls, node_type, priority=1):
"""
Registers a given consumer function with a priority. `priority` is an
integer defining the order in which expression types try to parse from
the token queue. The closest this number if from 1, the highest will be
its priority.
`priority` must be greater than one (not strictly).
"""
def _decorator_wrapper(consumer):
@functools.wraps(consumer)
def _consumer_wrapper(self):
# copies the token list
tmp_queue = self.token_queue[:]
try:
node = consumer(self)
if node is None:
raise ParseError(self.code, pos, 'Consumer returned None')
except ParseError:
# restore previous token list value
# assign tmp_queue to reference token_queue
self.token_queue[:] = tmp_queue
raise
except IndexError:
# assign tmp_queue to reference token_queue
self.token_queue[:] = tmp_queue
# when the user tries to call token_queue.pop(0) but all
# tokens were consumed
raise ParseError(
self.code,
self.end_pos,
'Unexpected EOF') from None
else:
return node
# decrement because highest priority is 1, not 0
_consumer_wrapper.priority = priority - 1
cls.consumers[node_type].append(_consumer_wrapper)
return _decorator_wrapper
def get_consumer_queue(self, node_type):
"""
Returns the list of consumers that parses nodes of a give type, taking
into account the priorities.
"""
consumers = list(self.consumers[node_type])
# reverse MRO: walks down the subclass tree
for sub_node_type in node_type.sub_types():
consumers.extend(self.consumers[sub_node_type])
# sort the list by priority
consumers.sort(key=lambda cons: cons.priority)
return consumers
def consume(self, node_type):
"""
Tries to consume a node of type `node_type` from the token list.
This does not affect the list if the function failed to parse.
"""
consumers = self.get_consumer_queue(node_type)
# tries every concrete nodes of type node_type
for consumer in consumers:
try:
node = consumer(self)
except ParseError as e:
error = e
continue
else:
return node
else:
# when every node has been tried, but none succeeded to parse
raise error
def parse(self, node_type):
"""
Tries to parse a node of type `node_type` from the token list.
This does not affect the list if the function failed to parse.
Fails if the entire token list is not matched.
"""
consumers = self.get_consumer_queue(node_type)
# tries every concrete nodes of type node_type
for consumer in consumers:
try:
tmp_queue = self.token_queue[:]
node = consumer(self)
# raises a ParseError if tokens are remaining unconsumed
if self.token_queue:
err = ParseError(
self.code,
self.token_queue[0].pos,
'The entire code could not be consumed.')
self.token_queue[:] = tmp_queue
raise err
except ParseError as e:
error = e
continue
else:
return node
else:
# when every node has been tried, but none succeeded to parse
raise error
def expect(self, token_type):
"""
Tries to consume a single token from the token queue.
Returns the token if the next token is of the given type, raises a
ParseError otherwise.
"""
token = self.token_queue.pop(0)
# if the next token is not of the expected type
if token.type != token_type:
msg = 'Expected {}, got {}'.format(token_type.name, token.type.name)
raise ParseError(self.code, token.pos, msg)
return token
def many(self, node_type):
"""
Consumes zero or more occurences of a node of a given type.
"""
consumed = []
while True:
try:
nxt = self.consume(node_type)
except ParseError:
break
else:
consumed.append(nxt)
return consumed
def run(self):
"""
Parses a given string into a Program object.
"""
program = self.parse(Program)
return program