-
Notifications
You must be signed in to change notification settings - Fork 6
/
ast.go
545 lines (474 loc) · 17.3 KB
/
ast.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
// Package ast declares the types used to represent syntax trees for bibtex
// files.
package ast
import (
gotok "go/token"
"github.com/jschaf/bibtex/token"
)
type Node interface {
Pos() gotok.Pos
End() gotok.Pos
Kind() NodeKind
}
type NodeKind int
const (
KindTexComment NodeKind = iota
KindTexCommentGroup
KindBadExpr
KindIdent
KindNumber
KindAuthors
KindAuthor
KindUnparsedText
KindParsedText
KindText
KindTextComma
KindTextEscaped
KindTextHyphen
KindTextMath
KindTextNBSP
KindTextSpace
KindTextMacro
KindConcatExpr
KindBadStmt
KindTagStmt
KindBadDecl
KindAbbrevDecl
KindBibDecl
KindPreambleDecl
KindFile
KindPackage
)
var kindNames = [...]string{
KindTexComment: "TexComment",
KindTexCommentGroup: "TexCommentGroup",
KindBadExpr: "BadExpr",
KindIdent: "Ident",
KindNumber: "Number",
KindAuthors: "Authors",
KindAuthor: "Author",
KindUnparsedText: "UnparsedText",
KindParsedText: "ParsedText",
KindText: "Text",
KindTextComma: "TextComma",
KindTextEscaped: "TextEscaped",
KindTextHyphen: "TextHyphen",
KindTextMath: "TextMath",
KindTextNBSP: "TextNBSP",
KindTextSpace: "TextSpace",
KindTextMacro: "TextMacro",
KindConcatExpr: "ConcatExpr",
KindBadStmt: "BadStmt",
KindTagStmt: "TagStmt",
KindBadDecl: "BadDecl",
KindAbbrevDecl: "AbbrevDecl",
KindBibDecl: "BibDecl",
KindPreambleDecl: "PreambleDecl",
KindFile: "File",
KindPackage: "Package",
}
func (k NodeKind) String() string {
return kindNames[k]
}
// All expression nodes implement the Expr interface.
type Expr interface {
Node
exprNode()
}
// All statement nodes implement the Stmt interface, like bibtex entry tags.
type Stmt interface {
Node
stmtNode()
}
// All declaration nodes implement the Decl interface, like the @article,
// @STRING, @COMMENT, and @PREAMBLE entries.
type Decl interface {
Node
declNode()
}
// ----------------------------------------------------------------------------
// Comments
// A TexComment node represents a single %-style comment.
type TexComment struct {
Start gotok.Pos // position of the '%' starting the comment
Text string // comment text excluding '\n'
}
func (c *TexComment) Pos() gotok.Pos { return c.Start }
func (c *TexComment) End() gotok.Pos { return gotok.Pos(int(c.Start) + len(c.Text)) }
func (c *TexComment) Kind() NodeKind { return KindTexComment }
// A TexCommentGroup represents a sequence of comments with no other tokens and
// no empty lines between.
type TexCommentGroup struct {
List []*TexComment // len(List) > 0
}
func (g *TexCommentGroup) Pos() gotok.Pos { return g.List[0].Pos() }
func (g *TexCommentGroup) End() gotok.Pos { return g.List[len(g.List)-1].End() }
func (g *TexCommentGroup) Kind() NodeKind { return KindTexCommentGroup }
// ----------------------------------------------------------------------------
// Expressions
type TextDelimiter int
func (t TextDelimiter) String() string {
switch t {
case QuoteDelimiter:
return "QuoteDelimiter"
case BraceDelimiter:
return "BraceDelimiter"
default:
return "UnknownDelimiter"
}
}
const (
QuoteDelimiter TextDelimiter = iota
BraceDelimiter
)
// An expression is represented by a tree consisting of one or more of the
// following concrete expressions.
type (
// A BadExpr node is a placeholder for expressions containing syntax errors
// for which no correct expression nodes can be created.
BadExpr struct {
From, To gotok.Pos
}
// An Ident node represents an identifier like a bibtex citation key or tag
// key.
Ident struct {
NamePos gotok.Pos // identifier position
Name string // identifier name
Obj *Object // denoted object; or nil
}
// A Number node represents an unquoted number, like:
// year = 2004
Number struct {
ValuePos gotok.Pos
Value string
}
// An Authors node represents a list of authors, typically in the author or
// editor fields of a bibtex declaration.
Authors []*Author
// An Author node represents a single bibtex author.
Author struct {
From, To gotok.Pos
First Expr // given name
Prefix Expr // often called the 'von' part
Last Expr // family name
Suffix Expr // often called the 'jr' part
}
// An UnparsedText is a bibtex string as it appears in source. Only appears
// when Mode.ParseStrings == 0 is passed to ParseFile.
UnparsedText struct {
ValuePos gotok.Pos // literal position
Type token.Token // token.String or token.BraceString
Value string // excluding delimiters
}
// A ParsedText node represents a parsed bibtex string.
ParsedText struct {
Opener gotok.Pos // opening delimiter
Depth int // the brace depth
Delim TextDelimiter
Values []Expr // Text, ParsedText, or any of the Text* types
Closer gotok.Pos // closing delimiter
}
// A Text node is a string of simple text.
Text struct {
ValuePos gotok.Pos // literal position
Value string
}
// A TextComma node is a string of exactly 1 comma. Useful because a comma has
// semantic meaning for parsing authors as a separator for names.
TextComma struct {
ValuePos gotok.Pos // literal position
}
// A TextEscaped node is a string of exactly 1 escaped character. The only
// escapable characters are:
// '\\', '$', '&', '%', '{', '}'
// In all other cases, a backslash is interpreted as the start of a TeX macro.
TextEscaped struct {
ValuePos gotok.Pos // literal position
Value string // the escaped char without the backslash
}
// A TextHyphen node is a string of exactly 1 hyphen (-). Hyphens are
// important in some cases of parsing author names so keep it as a separate
// node.
TextHyphen struct {
ValuePos gotok.Pos // literal position
}
// A TextMath node is the string delimited by dollar signs, representing Math
// in TeX.
TextMath struct {
ValuePos gotok.Pos // literal position
Value string // the text in-between the $...$, not including the $'s.
}
// A TextNBSP node is a single non-breaking space, represented in TeX as '~'.
TextNBSP struct {
ValuePos gotok.Pos // literal position
}
// A TextSpace node is any consecutive whitespace '\n', '\r', '\t', ' ' in a
// bibtex string.
TextSpace struct {
ValuePos gotok.Pos // literal position
Value string
}
// A TextMacro node represents a piece of ParsedText that's a latex macro
// invocation.
TextMacro struct {
Cmd gotok.Pos // command position
Name string // command name without backslash, i.e. 'url'
Values []Expr // parameters: Text, ParsedText, or TextMacro
RBrace gotok.Pos // position of the closing }, if any
}
// A ConcatExpr node represents a bibtex concatenation like:
// "foo" # "bar"
ConcatExpr struct {
X Expr
OpPos gotok.Pos
Y Expr
}
)
func (x *BadExpr) Pos() gotok.Pos { return x.From }
func (x *BadExpr) End() gotok.Pos { return x.To }
func (x *BadExpr) Kind() NodeKind { return KindBadExpr }
func (*BadExpr) exprNode() {}
func (x *Ident) Pos() gotok.Pos { return x.NamePos }
func (x *Ident) End() gotok.Pos { return gotok.Pos(int(x.NamePos) + len(x.Name)) }
func (x *Ident) Kind() NodeKind { return KindIdent }
func (*Ident) exprNode() {}
func (x *Number) Pos() gotok.Pos { return x.ValuePos }
func (x *Number) End() gotok.Pos { return gotok.Pos(int(x.ValuePos) + len(x.Value)) }
func (x *Number) Kind() NodeKind { return KindNumber }
func (*Number) exprNode() {}
func (x Authors) Pos() gotok.Pos {
if len(x) == 0 {
return gotok.NoPos
} else {
return x[0].From
}
}
func (x Authors) End() gotok.Pos {
if len(x) == 0 {
return gotok.NoPos
} else {
return x[len(x)-1].To
}
}
func (x Authors) Kind() NodeKind { return KindAuthors }
func (Authors) exprNode() {}
func (x *Author) Pos() gotok.Pos { return x.From }
func (x *Author) End() gotok.Pos { return x.To }
func (x *Author) Kind() NodeKind { return KindAuthor }
func (x *Author) IsEmpty() bool {
if s, ok := x.First.(*Text); !ok || s.Value != "" {
return false
}
if s, ok := x.Prefix.(*Text); !ok || s.Value != "" {
return false
}
if s, ok := x.Last.(*Text); !ok || s.Value != "" {
return false
}
if s, ok := x.Suffix.(*Text); !ok || s.Value != "" {
return false
}
return true
}
// IsOthers returns true if this author was created from the "and others"
// suffix in from authors field.
func (x *Author) IsOthers() bool {
if s, ok := x.First.(*Text); !ok || s.Value != "" {
return false
}
if s, ok := x.Prefix.(*Text); !ok || s.Value != "" {
return false
}
if s, ok := x.Last.(*Text); !ok || s.Value != "others" {
return false
}
if s, ok := x.Suffix.(*Text); !ok || s.Value != "" {
return false
}
return true
}
func (x *Author) exprNode() {}
func (x *UnparsedText) Pos() gotok.Pos { return x.ValuePos }
func (x *UnparsedText) End() gotok.Pos { return gotok.Pos(int(x.ValuePos) + len(x.Value)) }
func (x *UnparsedText) Kind() NodeKind { return KindUnparsedText }
func (*UnparsedText) exprNode() {}
func (x *ParsedText) Pos() gotok.Pos { return x.Opener }
func (x *ParsedText) End() gotok.Pos {
if len(x.Values) > 0 {
return x.Values[len(x.Values)-1].Pos()
}
return x.Opener
}
func (x *ParsedText) Kind() NodeKind { return KindParsedText }
func (*ParsedText) exprNode() {}
func (x *Text) Pos() gotok.Pos { return x.ValuePos }
func (x *Text) End() gotok.Pos { return gotok.Pos(int(x.ValuePos) + len(x.Value)) }
func (x *Text) Kind() NodeKind { return KindText }
func (*Text) exprNode() {}
func (x *TextComma) Pos() gotok.Pos { return x.ValuePos }
func (x *TextComma) End() gotok.Pos { return gotok.Pos(int(x.ValuePos) + len(",")) }
func (x *TextComma) Kind() NodeKind { return KindTextComma }
func (*TextComma) exprNode() {}
func (x *TextEscaped) Pos() gotok.Pos { return x.ValuePos }
func (x *TextEscaped) End() gotok.Pos { return gotok.Pos(int(x.ValuePos) + len(x.Value) + len(`\`)) }
func (x *TextEscaped) Kind() NodeKind { return KindTextEscaped }
func (*TextEscaped) exprNode() {}
func (x *TextHyphen) Pos() gotok.Pos { return x.ValuePos }
func (x *TextHyphen) End() gotok.Pos { return gotok.Pos(int(x.ValuePos) + len("-")) }
func (x *TextHyphen) Kind() NodeKind { return KindTextHyphen }
func (*TextHyphen) exprNode() {}
func (x *TextMath) Pos() gotok.Pos { return x.ValuePos }
func (x *TextMath) End() gotok.Pos { return gotok.Pos(int(x.ValuePos) + 2*len("$") + len(x.Value)) }
func (x *TextMath) Kind() NodeKind { return KindTextMath }
func (*TextMath) exprNode() {}
func (x *TextNBSP) Pos() gotok.Pos { return x.ValuePos }
func (x *TextNBSP) End() gotok.Pos { return gotok.Pos(int(x.ValuePos) + len("~")) }
func (x *TextNBSP) Kind() NodeKind { return KindTextNBSP }
func (*TextNBSP) exprNode() {}
func (x *TextSpace) Pos() gotok.Pos { return x.ValuePos }
func (x *TextSpace) End() gotok.Pos { return gotok.Pos(int(x.ValuePos) + len(x.Value)) }
func (x *TextSpace) Kind() NodeKind { return KindTextSpace }
func (*TextSpace) exprNode() {}
func (x *TextMacro) Pos() gotok.Pos { return x.Cmd }
func (x *TextMacro) End() gotok.Pos {
if x.RBrace != gotok.NoPos {
return x.RBrace
}
if len(x.Values) == 0 {
return x.Cmd
}
return x.Values[len(x.Values)-1].Pos()
}
func (x *TextMacro) Kind() NodeKind { return KindTextMacro }
func (*TextMacro) exprNode() {}
func (x *ConcatExpr) Pos() gotok.Pos { return x.X.Pos() }
func (x *ConcatExpr) End() gotok.Pos { return x.Y.Pos() }
func (x *ConcatExpr) Kind() NodeKind { return KindConcatExpr }
func (*ConcatExpr) exprNode() {}
// ----------------------------------------------------------------------------
// Statements
// An statement is represented by a tree consisting of one or more of the
// following concrete statement nodes.
type (
// A BadStmt node is a placeholder for statements containing syntax errors
// for which no correct statement nodes can be created.
BadStmt struct {
From, To gotok.Pos // position range of bad statement
}
// An TagStmt node represents a tag in an BibDecl or AbbrevDecl, i.e.
// author = "foo".
TagStmt struct {
Doc *TexCommentGroup // associated documentation; or nil
NamePos gotok.Pos // identifier position
Name string // identifier name, normalized with lowercase
RawName string // identifier name as it appeared in source
Value Expr // denoted expression
}
)
func (x *BadStmt) Pos() gotok.Pos { return x.From }
func (x *BadStmt) End() gotok.Pos { return x.To }
func (x *BadStmt) Kind() NodeKind { return KindBadStmt }
func (*BadStmt) stmtNode() {}
func (x *TagStmt) Pos() gotok.Pos { return x.NamePos }
func (x *TagStmt) End() gotok.Pos { return x.Value.Pos() }
func (x *TagStmt) Kind() NodeKind { return KindTagStmt }
func (*TagStmt) stmtNode() {}
// ----------------------------------------------------------------------------
// Declarations
// An declaration is represented by one of the following declaration nodes.
type (
// A BadDecl node is a placeholder for declarations containing syntax errors
// for which no correct declaration nodes can be created.
BadDecl struct {
From, To gotok.Pos // position range of bad declaration
}
// An AbbrevDecl node represents a bibtex abbreviation, like:
// @STRING { foo = "bar" }
AbbrevDecl struct {
Doc *TexCommentGroup // associated documentation; or nil
Entry gotok.Pos // position of the "@STRING" token
Tag *TagStmt
RBrace gotok.Pos // position of the closing right brace token: "}".
}
// An BibDecl node represents a bibtex entry, like:
// @article { author = "bar" }
BibDecl struct {
Type string // type of entry, e.g. "article"
Doc *TexCommentGroup // associated documentation; or nil
Entry gotok.Pos // position of the start token, e.g. "@article"
Key *Ident // the first key in the declaration
ExtraKeys []*Ident // any other keys in the declaration, usually nil
Tags []*TagStmt // all tags in the declaration
RBrace gotok.Pos // position of the closing right brace token: "}".
}
// An PreambleDecl node represents a bibtex preamble, like:
// @PREAMBLE { "foo" }
PreambleDecl struct {
Doc *TexCommentGroup // associated documentation; or nil
Entry gotok.Pos // position of the "@PREAMBLE" token
Text Expr // The content of the preamble node
RBrace gotok.Pos // position of the closing right brace token: "}"
}
)
func (e *BadDecl) Pos() gotok.Pos { return e.From }
func (e *BadDecl) End() gotok.Pos { return e.To }
func (e *BadDecl) Kind() NodeKind { return KindBadDecl }
func (*BadDecl) declNode() {}
func (e *AbbrevDecl) Pos() gotok.Pos { return e.Entry }
func (e *AbbrevDecl) End() gotok.Pos { return e.RBrace }
func (e *AbbrevDecl) Kind() NodeKind { return KindAbbrevDecl }
func (*AbbrevDecl) declNode() {}
func (e *BibDecl) Pos() gotok.Pos { return e.Entry }
func (e *BibDecl) End() gotok.Pos { return e.RBrace }
func (e *BibDecl) Kind() NodeKind { return KindBibDecl }
func (*BibDecl) declNode() {}
func (e *PreambleDecl) Pos() gotok.Pos { return e.Entry }
func (e *PreambleDecl) End() gotok.Pos { return e.RBrace }
func (e *PreambleDecl) Kind() NodeKind { return KindPreambleDecl }
func (*PreambleDecl) declNode() {}
// ----------------------------------------------------------------------------
// Files and packages
// A File node represents a bibtex source file.
//
// The Comments list contains all comments in the source file in order of
// appearance, including the comments that are pointed to from other nodes
// via Doc and Comment fields.
//
// For correct printing of source code containing comments (using packages
// go/format and go/printer), special care must be taken to update comments
// when a File's syntax tree is modified: For printing, comments are interspersed
// between tokens based on their position. If syntax tree nodes are
// removed or moved, relevant comments in their vicinity must also be removed
// (from the File.Comments list) or moved accordingly (by updating their
// positions). A CommentMap may be used to facilitate some of these operations.
//
// Whether and how a comment is associated with a node depends on the
// interpretation of the syntax tree by the manipulating program: Except for Doc
// and Comment comments directly associated with nodes, the remaining comments
// are "free-floating".
type File struct {
Name string
Doc *TexCommentGroup // associated documentation; or nil
Entries []Decl // top-level entries; or nil
Scope *Scope // package scope (this file only)
Unresolved []*Ident // unresolved identifiers in this file
Comments []*TexCommentGroup // list of all comments in the source file
}
func (f *File) Pos() gotok.Pos { return gotok.Pos(1) }
func (f *File) End() gotok.Pos {
if n := len(f.Entries); n > 0 {
return f.Entries[n-1].End()
}
return gotok.Pos(1)
}
func (f *File) Kind() NodeKind { return KindFile }
// A Package node represents a set of source files collectively representing
// a single, unified bibliography.
type Package struct {
Scope *Scope // package scope across all files
Objects map[string]*Object // map of package id -> package object
Files map[string]*File // Bibtex source files by filename
}
func (p *Package) Pos() gotok.Pos { return gotok.NoPos }
func (p *Package) End() gotok.Pos { return gotok.NoPos }
func (p *Package) Kind() NodeKind { return KindPackage }