-
Notifications
You must be signed in to change notification settings - Fork 0
/
grammar.go
267 lines (235 loc) · 7.08 KB
/
grammar.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
package lr
import (
"fmt"
"github.com/npillmayer/gorgo/lr/iteratable"
)
// --- Symbol ----------------------------------------------------------------
// Symbols' values must adhere to these ranges.
const (
EpsilonType = 0
EOFType = -1 // pseudo terminal token for end of input
NonTermType = -1000 // IDs of terminals MUST be in { -2 … -999 }
)
// Serial no. for lrSymbol IDs
var lrSymbolIDSerial = NonTermType - 1
// Symbol is a symbol type used for grammars and grammar builders.
type Symbol struct {
Name string // visual representation, if any
Value int // ID or token value
}
func (lrsym *Symbol) String() string {
//return fmt.Sprintf("<%s|%d>", lrsym.Name, lrsym.Value)
return fmt.Sprintf("%s", lrsym.Name)
}
// IsTerminal returns true if this symbol represents a terminal.
func (lrsym *Symbol) IsTerminal() bool {
return lrsym.Value > NonTermType
}
// Token is just an alias for lrsym.Value.
func (lrsym *Symbol) Token() int {
return lrsym.Value
}
func newSymbol(s string) *Symbol {
lrSymbolIDSerial--
return &Symbol{
Name: s,
Value: lrSymbolIDSerial,
}
}
// --- Rules -----------------------------------------------------------------
// A Rule is a type for rules of a grammar. Rules cannot be shared between grammars.
type Rule struct {
Serial int // order number of this rule within a grammar
LHS *Symbol // symbols of left hand side
rhs []*Symbol // symbols of right hand side
}
func newRule() *Rule {
r := &Rule{}
r.rhs = make([]*Symbol, 0, 5)
return r
}
func (r *Rule) String() string {
return fmt.Sprintf("%v ➞ %v", r.LHS, r.rhs)
}
// RHS gets the right hand side of a rule as a shallow copy. Clients should treat
// it as read-only.
func (r *Rule) RHS() []*Symbol {
dup := make([]*Symbol, len(r.rhs))
copy(dup, r.rhs)
return dup
}
// IsEps returns true if this an epsilon-rule.
func (r *Rule) IsEps() bool {
return len(r.rhs) == 0
}
// Check if two RHS are identical. If parameter prefix is true, this function
// returns true when handle is a prefix of r, even if handle is of length 0.
func (r *Rule) eqRHS(handle []*Symbol, prefix bool) bool {
if r.rhs == nil && handle == nil {
return true
}
if r.rhs == nil || handle == nil {
return false
}
if !prefix && (len(r.rhs) != len(handle)) {
return false
}
for i := range r.rhs {
if i >= len(handle) {
return true
}
if r.rhs[i] != handle[i] {
return false
}
}
return true
}
// --- Grammar ---------------------------------------------------------------
// Grammar is a type for a grammar. Usually created using a GrammarBuilder.
type Grammar struct {
Name string // a grammar has a name, for documentation only
rules []*Rule // grammar productions, first one is start rule
Epsilon *Symbol // a special symbol representing epsilon
EOF *Symbol // a special symbol representing end of input
nonterminals map[int]*Symbol // all non-terminals
terminals map[int]*Symbol // all terminals
//terminalsByToken map[int]*Symbol // terminals, indexed by token value
}
func newLRGrammar(gname string) *Grammar {
g := &Grammar{}
g.Name = gname
g.rules = make([]*Rule, 0, 30)
g.terminals = make(map[int]*Symbol)
g.nonterminals = make(map[int]*Symbol)
//g.terminalsByToken = make(map[int]Symbol)
g.Epsilon = &Symbol{Name: "_eps", Value: EpsilonType}
g.EOF = &Symbol{Name: "_eof", Value: EOFType}
return g
}
// Size returns the number of rules in the grammar.
func (g *Grammar) Size() int {
return len(g.rules)
}
// Rule gets a grammar rule.
func (g *Grammar) Rule(no int) *Rule {
if no < 0 || no >= len(g.rules) {
return nil
}
return g.rules[no]
}
// Terminal returns the terminal symbol for a given token value, if it
// is defined in the grammar.
func (g *Grammar) Terminal(tokval int) *Symbol {
if t, ok := g.terminals[tokval]; ok {
return t
}
return nil
}
// SymbolByName gets a symbol for a given name, if found in the grammar.
func (g *Grammar) SymbolByName(name string) *Symbol {
var found *Symbol
g.EachSymbol(func(sym *Symbol) interface{} {
if sym.Name == name {
found = sym
}
return nil
})
return found
}
// FindNonTermRules returns a set of Earley-items, where each item stems from
// a rule with a given LHS and the dot is at position 0.
func (g *Grammar) FindNonTermRules(sym *Symbol, includeEpsRules bool) *iteratable.Set {
iset := iteratable.NewSet(0)
for _, r := range g.rules {
if r.LHS == sym {
if !r.IsEps() || includeEpsRules {
item, _ := StartItem(r)
iset.Add(item)
}
}
}
return iset
}
func (g *Grammar) matchesRHS(lhs *Symbol, handle []*Symbol) (*Rule, int) {
//func (g *Grammar) matchesRHS(lhs Symbol, handle []Symbol, prefix bool) (*Rule, int) {
for i, r := range g.rules {
if r.LHS == lhs {
if r.eqRHS(handle, false) {
return r, i
}
}
}
return nil, -1
}
// EachNonTerminal iterates over all non-terminal symbols of the grammar.
// Return values of the mapper function for all non-terminals are returned as an
// array.
func (g *Grammar) EachNonTerminal(mapper func(sym *Symbol) interface{}) []interface{} {
var r = make([]interface{}, 0, len(g.nonterminals))
for _, A := range g.nonterminals {
r = append(r, mapper(A))
}
return r
}
// EachTerminal iterates over all terminals of the grammar.
// Return values of the mapper function for all terminals are returned as an array.
func (g *Grammar) EachTerminal(mapper func(sym *Symbol) interface{}) []interface{} {
var r = make([]interface{}, 0, len(g.terminals))
for _, B := range g.terminals {
r = append(r, mapper(B))
}
return r
}
// EachSymbol iterates over all symbols of the grammar.
// Return values of the mapper function are returned as an array.
func (g *Grammar) EachSymbol(mapper func(sym *Symbol) interface{}) []interface{} {
var r = make([]interface{}, 0, len(g.terminals)+len(g.nonterminals))
for _, A := range g.nonterminals {
r = append(r, mapper(A))
}
for _, B := range g.terminals {
r = append(r, mapper(B))
}
return r
}
func (g *Grammar) resolveOrDefineNonTerminal(s string) *Symbol {
for _, nt := range g.nonterminals {
if nt.Name == s {
return nt
}
}
lrsym := newSymbol(s)
g.nonterminals[lrsym.Value] = lrsym
return lrsym
}
func (g *Grammar) resolveOrDefineTerminal(s string, tokval int) *Symbol {
for _, t := range g.terminals {
if t.Name == s {
return t
}
}
t := &Symbol{Name: s, Value: tokval}
g.terminals[tokval] = t
// if g.terminalsByToken[tokval] != nil {
// T().Errorf("duplicate terminal symbol for token value = %d", tokval)
// // proceed with fingers crossed
// }
//g.terminalsByToken[tokval] = lrsym
return t
}
// Dump is a debugging helper: dump symbols and rules to stdout.
func (g *Grammar) Dump() {
T().Debugf("--- %s --------------------------------------------\n", g.Name)
T().Debugf("epsilon = %d\n", g.Epsilon.Value)
T().Debugf("EOF = %d\n", g.EOF.Value)
for _, A := range g.nonterminals {
T().Debugf("N %v = %d\n", A, A.Value)
}
for _, A := range g.terminals {
T().Debugf("T %v = %d\n", A, A.Token())
}
for _, r := range g.rules {
T().Debugf("%3d: %s\n", r.Serial, r.String())
}
T().Debugf("-------------------------------------------------------")
}