Skip to content

Commit 3ff9869

Browse files
committed
implements strongSpaces parsing mode
1 parent 798ae48 commit 3ff9869

File tree

8 files changed

+123
-43
lines changed

8 files changed

+123
-43
lines changed

compiler/canonicalizer.nim

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ proc hashSym(c: var MD5Context, s: PSym) =
8181

8282
proc hashTree(c: var MD5Context, n: PNode) =
8383
if n == nil:
84-
c &= "noTreeKind"
84+
c &= "\255"
8585
return
8686
var k = n.kind
8787
md5Update(c, cast[cstring](addr(k)), 1)
@@ -107,7 +107,7 @@ proc hashTree(c: var MD5Context, n: PNode) =
107107
proc hashType(c: var MD5Context, t: PType) =
108108
# modelled after 'typeToString'
109109
if t == nil:
110-
c &= "noTypeKind"
110+
c &= "\254"
111111
return
112112

113113
var k = t.kind
@@ -168,7 +168,7 @@ proc canonConst(n: PNode): TUid =
168168
c.hashType(n.typ)
169169
md5Final(c, MD5Digest(result))
170170

171-
proc canonSym(s: PSym): TUid
171+
proc canonSym(s: PSym): TUid =
172172
var c: MD5Context
173173
md5Init(c)
174174
c.hashSym(s)

compiler/lexer.nim

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ type
110110
fNumber*: BiggestFloat # the parsed floating point literal
111111
base*: TNumericalBase # the numerical base; only valid for int
112112
# or float literals
113+
strongSpaceA*: int8 # leading spaces of an operator
114+
strongSpaceB*: int8 # trailing spaces of an operator
113115
literal*: string # the parsed (string) literal; and
114116
# documentation comments are here too
115117
line*, col*: int
@@ -119,6 +121,7 @@ type
119121
indentAhead*: int # if > 0 an indendation has already been read
120122
# this is needed because scanning comments
121123
# needs so much look-ahead
124+
strongSpaces*: bool
122125

123126

124127
var gLinesCompiled*: int # all lines that have been compiled
@@ -183,6 +186,7 @@ proc initToken*(L: var TToken) =
183186
L.tokType = tkInvalid
184187
L.iNumber = 0
185188
L.indent = 0
189+
L.strongSpaceA = 0
186190
L.literal = ""
187191
L.fNumber = 0.0
188192
L.base = base10
@@ -192,6 +196,7 @@ proc fillToken(L: var TToken) =
192196
L.tokType = tkInvalid
193197
L.iNumber = 0
194198
L.indent = 0
199+
L.strongSpaceA = 0
195200
setLen(L.literal, 0)
196201
L.fNumber = 0.0
197202
L.base = base10
@@ -634,6 +639,14 @@ proc getOperator(L: var TLexer, tok: var TToken) =
634639
h = h !& ord(c)
635640
inc(pos)
636641
endOperator(L, tok, pos, h)
642+
# advance pos but don't store it in L.bufpos so the next token (which might
643+
# be an operator too) gets the preceeding spaces:
644+
tok.strongSpaceB = 0
645+
while buf[pos] == ' ':
646+
inc pos
647+
inc tok.strongSpaceB
648+
if buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
649+
tok.strongSpaceB = -1
637650

638651
proc scanComment(L: var TLexer, tok: var TToken) =
639652
var pos = L.bufpos
@@ -677,10 +690,12 @@ proc scanComment(L: var TLexer, tok: var TToken) =
677690
proc skip(L: var TLexer, tok: var TToken) =
678691
var pos = L.bufpos
679692
var buf = L.buf
693+
tok.strongSpaceA = 0
680694
while true:
681695
case buf[pos]
682696
of ' ':
683697
inc(pos)
698+
inc(tok.strongSpaceA)
684699
of Tabulator:
685700
lexMessagePos(L, errTabulatorsAreNotAllowed, pos)
686701
inc(pos)
@@ -691,6 +706,7 @@ proc skip(L: var TLexer, tok: var TToken) =
691706
while buf[pos] == ' ':
692707
inc(pos)
693708
inc(indent)
709+
tok.strongSpaceA = 0
694710
if buf[pos] > ' ':
695711
tok.indent = indent
696712
break

compiler/parser.nim

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ type
3838
inSemiStmtList: int
3939

4040
proc parseAll*(p: var TParser): PNode
41-
proc openParser*(p: var TParser, filename: string, inputstream: PLLStream)
4241
proc closeParser*(p: var TParser)
4342
proc parseTopLevelStmt*(p: var TParser): PNode
4443
# implements an iterator. Returns the next top-level statement or
@@ -50,7 +49,6 @@ proc parseString*(s: string, filename: string = "", line: int = 0): PNode
5049
# correct error messages referring to the original source.
5150

5251
# helpers for the other parsers
53-
proc getPrecedence*(tok: TToken): int
5452
proc isOperator*(tok: TToken): bool
5553
proc getTok*(p: var TParser)
5654
proc parMessage*(p: TParser, msg: TMsgKind, arg: string = "")
@@ -77,14 +75,17 @@ proc parseCase(p: var TParser): PNode
7775
proc getTok(p: var TParser) =
7876
rawGetTok(p.lex, p.tok)
7977

80-
proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream) =
78+
proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream,
79+
strongSpaces=false) =
8180
initToken(p.tok)
8281
openLexer(p.lex, fileIdx, inputStream)
8382
getTok(p) # read the first token
8483
p.firstTok = true
84+
p.strongSpaces = strongSpaces
8585

86-
proc openParser*(p: var TParser, filename: string, inputStream: PLLStream) =
87-
openParser(p, filename.fileInfoIdx, inputstream)
86+
proc openParser*(p: var TParser, filename: string, inputStream: PLLStream,
87+
strongSpaces=false) =
88+
openParser(p, filename.fileInfoIdx, inputstream, strongSpaces)
8889

8990
proc closeParser(p: var TParser) =
9091
closeLexer(p.lex)
@@ -193,34 +194,52 @@ proc isSigilLike(tok: TToken): bool {.inline.} =
193194
proc isLeftAssociative(tok: TToken): bool {.inline.} =
194195
result = tok.tokType != tkOpr or relevantOprChar(tok.ident) != '^'
195196

196-
proc getPrecedence(tok: TToken): int =
197+
proc getPrecedence(tok: TToken, strongSpaces: bool): int =
198+
template considerStrongSpaces(x): expr =
199+
x + (if strongSpaces: 100 - tok.strongSpaceA.int*10 else: 0)
200+
197201
case tok.tokType
198202
of tkOpr:
199203
let L = tok.ident.s.len
200204
let relevantChar = relevantOprChar(tok.ident)
201205

202-
template considerAsgn(value: expr) =
203-
result = if tok.ident.s[L-1] == '=': 1 else: value
206+
template considerAsgn(value: expr) =
207+
result = if tok.ident.s[L-1] == '=': 1 else: considerStrongSpaces(value)
204208

205209
case relevantChar
206210
of '$', '^': considerAsgn(10)
207211
of '*', '%', '/', '\\': considerAsgn(9)
208-
of '~': result = 8
212+
of '~': result = considerStrongSpaces(8)
209213
of '+', '-', '|': considerAsgn(8)
210214
of '&': considerAsgn(7)
211-
of '=', '<', '>', '!': result = 5
215+
of '=', '<', '>', '!': result = considerStrongSpaces(5)
212216
of '.': considerAsgn(6)
213-
of '?': result = 2
217+
of '?': result = considerStrongSpaces(2)
214218
else: considerAsgn(2)
215219
of tkDiv, tkMod, tkShl, tkShr: result = 9
216220
of tkIn, tkNotin, tkIs, tkIsnot, tkNot, tkOf, tkAs: result = 5
217-
of tkDotDot: result = 6
221+
of tkDotDot: result = considerStrongSpaces(6)
218222
of tkAnd: result = 4
219223
of tkOr, tkXor: result = 3
220-
else: result = - 10
221-
222-
proc isOperator(tok: TToken): bool =
223-
result = getPrecedence(tok) >= 0
224+
else: result = -10
225+
226+
proc isOperator(tok: TToken): bool =
227+
tok.tokType in {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs,
228+
tkIsnot, tkNot, tkOf, tkAs, tkDotDot, tkAnd, tkOr, tkXor}
229+
230+
proc isUnary(p: TParser): bool =
231+
p.strongSpaces and p.tok.tokType in {tkOpr, tkDotDot} and
232+
p.tok.strongSpaceB == 0 and
233+
p.tok.strongSpaceA > 0
234+
235+
proc checkBinary(p: TParser) {.inline.} =
236+
# we don't check '..' here as that's too annoying
237+
if p.strongSpaces and p.tok.tokType == tkOpr:
238+
if p.tok.strongSpaceB > 0 and p.tok.strongSpaceA != p.tok.strongSpaceB:
239+
parMessage(p, errGenerated, "number of spaces around '$#' not consistent"%
240+
prettyTok(p.tok))
241+
elif p.tok.strongSpaceA notin {0,1,2,4,8}:
242+
parMessage(p, errGenerated, "number of spaces must be 0,1,2,4 or 8")
224243

225244
#| module = stmt ^* (';' / IND{=})
226245
#|
@@ -650,6 +669,7 @@ proc primarySuffix(p: var TParser, r: PNode): PNode =
650669
while p.tok.indent < 0:
651670
case p.tok.tokType
652671
of tkParLe:
672+
if p.strongSpaces and p.tok.strongSpaceA > 0: break
653673
result = namedParams(p, result, nkCall, tkParRi)
654674
if result.len > 1 and result.sons[1].kind == nkExprColonExpr:
655675
result.kind = nkObjConstr
@@ -664,8 +684,10 @@ proc primarySuffix(p: var TParser, r: PNode): PNode =
664684
result = dotExpr(p, result)
665685
result = parseGStrLit(p, result)
666686
of tkBracketLe:
687+
if p.strongSpaces and p.tok.strongSpaceA > 0: break
667688
result = namedParams(p, result, nkBracketExpr, tkBracketRi)
668689
of tkCurlyLe:
690+
if p.strongSpaces and p.tok.strongSpaceA > 0: break
669691
result = namedParams(p, result, nkCurlyExpr, tkCurlyRi)
670692
of tkSymbol, tkAccent, tkIntLit..tkCharLit, tkNil, tkCast:
671693
if p.inPragma == 0:
@@ -695,10 +717,11 @@ proc primary(p: var TParser, mode: TPrimaryMode): PNode
695717
proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode =
696718
result = primary(p, mode)
697719
# expand while operators have priorities higher than 'limit'
698-
var opPrec = getPrecedence(p.tok)
720+
var opPrec = getPrecedence(p.tok, p.strongSpaces)
699721
let modeB = if mode == pmTypeDef: pmTypeDesc else: mode
700722
# the operator itself must not start on a new line:
701-
while opPrec >= limit and p.tok.indent < 0:
723+
while opPrec >= limit and p.tok.indent < 0 and not isUnary(p):
724+
checkBinary(p)
702725
var leftAssoc = ord(isLeftAssociative(p.tok))
703726
var a = newNodeP(nkInfix, p)
704727
var opNode = newIdentNodeP(p.tok.ident, p) # skip operator:
@@ -710,7 +733,7 @@ proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode =
710733
addSon(a, result)
711734
addSon(a, b)
712735
result = a
713-
opPrec = getPrecedence(p.tok)
736+
opPrec = getPrecedence(p.tok, p.strongSpaces)
714737

715738
proc simpleExpr(p: var TParser, mode = pmNormal): PNode =
716739
result = simpleExprAux(p, -1, mode)
@@ -1933,7 +1956,9 @@ proc parseString(s: string, filename: string = "", line: int = 0): PNode =
19331956
stream.lineOffset = line
19341957

19351958
var parser: TParser
1936-
openParser(parser, filename, stream)
1959+
# XXX for now the builtin 'parseStmt/Expr' functions do not know about strong
1960+
# spaces...
1961+
openParser(parser, filename, stream, false)
19371962

19381963
result = parser.parseAll
19391964
closeParser(parser)

compiler/pragmas.nim

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,25 +97,21 @@ proc makeExternImport(s: PSym, extname: string) =
9797
incl(s.flags, sfImportc)
9898
excl(s.flags, sfForward)
9999

100-
const invalidIdentChars = AllChars - IdentChars
101-
102100
proc validateExternCName(s: PSym, info: TLineInfo) =
103101
## Validates that the symbol name in s.loc.r is a valid C identifier.
104102
##
105103
## Valid identifiers are those alphanumeric including the underscore not
106104
## starting with a number. If the check fails, a generic error will be
107105
## displayed to the user.
108106
let target = ropeToStr(s.loc.r)
109-
if target.len < 1 or (not (target[0] in IdentStartChars)) or
110-
(not target.allCharsInSet(IdentChars)):
107+
if target.len < 1 or target[0] notin IdentStartChars or
108+
not target.allCharsInSet(IdentChars):
111109
localError(info, errGenerated, "invalid exported symbol")
112110

113111
proc makeExternExport(s: PSym, extname: string, info: TLineInfo) =
114112
setExternName(s, extname)
115-
case gCmd
116-
of cmdCompileToC, cmdCompileToCpp, cmdCompileToOC:
113+
if gCmd in {cmdCompileToC, cmdCompileToCpp, cmdCompileToOC}:
117114
validateExternCName(s, info)
118-
else: discard
119115
incl(s.flags, sfExportc)
120116

121117
proc processImportCompilerProc(s: PSym, extname: string) =

compiler/syntaxes.nim

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@ type
1717
TFilterKind* = enum
1818
filtNone, filtTemplate, filtReplace, filtStrip
1919
TParserKind* = enum
20-
skinStandard, skinBraces, skinEndX
20+
skinStandard, skinStrongSpaces, skinBraces, skinEndX
2121

2222
const
23-
parserNames*: array[TParserKind, string] = ["standard", "braces", "endx"]
24-
filterNames*: array[TFilterKind, string] = ["none", "stdtmpl", "replace",
25-
"strip"]
23+
parserNames*: array[TParserKind, string] = ["standard", "strongspaces",
24+
"braces", "endx"]
25+
filterNames*: array[TFilterKind, string] = ["none", "stdtmpl", "replace",
26+
"strip"]
2627

27-
type
28+
type
2829
TParsers*{.final.} = object
2930
skin*: TParserKind
3031
parser*: TParser
@@ -54,7 +55,7 @@ proc parseFile(fileIdx: int32): PNode =
5455

5556
proc parseAll(p: var TParsers): PNode =
5657
case p.skin
57-
of skinStandard:
58+
of skinStandard, skinStrongSpaces:
5859
result = parser.parseAll(p.parser)
5960
of skinBraces:
6061
result = pbraces.parseAll(p.parser)
@@ -65,7 +66,7 @@ proc parseAll(p: var TParsers): PNode =
6566

6667
proc parseTopLevelStmt(p: var TParsers): PNode =
6768
case p.skin
68-
of skinStandard:
69+
of skinStandard, skinStrongSpaces:
6970
result = parser.parseTopLevelStmt(p.parser)
7071
of skinBraces:
7172
result = pbraces.parseTopLevelStmt(p.parser)
@@ -170,7 +171,9 @@ proc openParsers(p: var TParsers, fileIdx: int32, inputstream: PLLStream) =
170171
else: s = inputstream
171172
case p.skin
172173
of skinStandard, skinBraces, skinEndX:
173-
parser.openParser(p.parser, fileIdx, s)
174+
parser.openParser(p.parser, fileIdx, s, false)
175+
of skinStrongSpaces:
176+
parser.openParser(p.parser, fileIdx, s, true)
174177

175-
proc closeParsers(p: var TParsers) =
178+
proc closeParsers(p: var TParsers) =
176179
parser.closeParser(p.parser)

doc/manual.txt

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -480,8 +480,8 @@ precedence and associativity; this is useful for meta programming.
480480
Associativity
481481
-------------
482482

483-
All binary operators are left-associative, except binary operators whose
484-
relevant char is ``^``.
483+
Binary operators whose relevant character is ``^`` are right-associative, all
484+
other binary operators are left-associative.
485485

486486
Precedence
487487
----------
@@ -508,14 +508,54 @@ Precedence level Operators Relevant char
508508
7 ``+ -`` ``+ ~ |`` OP7
509509
6 ``&`` ``&`` OP6
510510
5 ``..`` ``.`` OP5
511-
4 ``== <= < >= > != in not_in is isnot not of`` ``= < > !`` OP4
511+
4 ``== <= < >= > != in notin is isnot not of`` ``= < > !`` OP4
512512
3 ``and`` OP3
513513
2 ``or xor`` OP2
514514
1 ``@ : ?`` OP1
515515
0 (lowest) *assignment operator* (like ``+=``, ``*=``) OP0
516516
================ =============================================== ================== ===============
517517

518518

519+
Strong spaces
520+
-------------
521+
522+
The number of spaces preceeding a non-keyword operator affects precedence
523+
if the experimental parser directive ``#!strongSpaces`` is used. Indentation
524+
is not used to determine the number of spaces. If 2 or more operators have the
525+
same number of preceeding spaces the precedence table applies, so ``1 + 3 * 4``
526+
is still parsed as ``1 + (3 * 4)``, but ``1+3 * 4`` is parsed as ``(1+3) * 4``:
527+
528+
.. code-block:: nimrod
529+
#! strongSpaces
530+
if foo+4 * 4 == 8 and b&c | 9 ++
531+
bar:
532+
echo ""
533+
# is parsed as
534+
if ((foo+4)*4 == 8) and (((b&c) | 9) ++ bar): echo ""
535+
536+
537+
Furthermore whether an operator is used a prefix operator is affected by the
538+
number of spaces:
539+
540+
.. code-block:: nimrod
541+
#! strongSpaces
542+
echo $foo
543+
# is parsed as
544+
echo($foo)
545+
546+
This also affects whether ``[]``, ``{}``, ``()`` are parsed as constructors
547+
or as accessors:
548+
549+
.. code-block:: nimrod
550+
#! strongSpaces
551+
echo (1,2)
552+
# is parsed as
553+
echo((1,2))
554+
555+
556+
Grammar
557+
-------
558+
519559
The grammar's start symbol is ``module``.
520560

521561
.. include:: grammar.txt

0 commit comments

Comments
 (0)