Skip to content

Commit

Permalink
DOC Add EBNF grammar of NGLess language
Browse files Browse the repository at this point in the history
This also lead to a minor improvement in the grammar as implemented in
Parse.hs
  • Loading branch information
luispedro committed Dec 28, 2018
1 parent 5d66989 commit 930294d
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 3 deletions.
2 changes: 1 addition & 1 deletion NGLess/Parse.hs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ base_expression = pexpression
<|> rawexpr
<|> (Lookup Nothing <$> variable)

pexpression = operator '(' *> expression <* operator ')'
pexpression = operator '(' *> innerexpression <* operator ')'

tokf :: (Token -> Maybe a) -> Parser a
tokf f = token (show .snd) fst (f . snd)
Expand Down
123 changes: 121 additions & 2 deletions docs/sources/Language.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ This document describes the NGLess language.
## Tokenization

Tokenization follows the standard C-family rules. A word is anything that
matches `[A-Za-z_]`. The language is case-sensitive. All files are assumed to
be in UTF-8.
matches `[A-Za-z_][A-Za-z_0-9]*`. The language is case-sensitive. All files are
assumed to be in UTF-8.

Both LF and CRLF are accepted as line endings (Unix-style LF is preferred).

Expand Down Expand Up @@ -207,3 +207,122 @@ built-in functions or those added by modules can be used.
Methods are called using the syntax `object . methodName ( <ARGS> )`. As with
functions, one argument may be unnamed, all others must be passed by name.

## Grammar


This is the extended Backus-Naur form grammar for the NGLess language (using
the [ISO
14977](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form)
conventions). Briefly, the comma (`,`) is used for concatenation, `[x]`
denotes _optional_, and `{x}` denotes _zero or more of `x`_.


string = ? a quoted string, produced by the tokenizer ? ;
word = ? a word produced by the tokenizer ? ;

eol =
';'
| '\n' {'\n'}
;


ngless = [header], body;

header = {eol}, ngless_version, {eol}, {import}, {eol}

ngless_version = "ngless", string, eol ;

import = ["local"], "import", string, "version", string, eol ;

body = {expression, eol} ;

expression =
conditional
| "discard"
| "continue"
| assignment
| innerexpression
;

innerexpression = left_expression, binop, innerexpression
| left_expression
;

left_expression = uoperator
| method_call
| indexexpr
| base_expression
;

base_expression = pexpression
| funccall
| listexpr
| constant
| variable
;

pexpression = '(', innerexpression, ')' ;

constant =
"true"
| "True"
| "false"
| "False"
| double
| integer
| symbol
;

double = integer, '.', integer ;
integer = digit, {digit} ;
digit = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ;
symbol = '{', word, '}' ;


indentation = ' ', {' '} ;
binop = '+' | '-' | '*' | "!=" | "==" | "</>" | "<=" | "<" | ">=" | ">" | "+" | "-" ;

uoperator =
lenop
| unary_minus
| not_expr
;

lenop = "len", '(', expression, ')'
unary_minus = '-', base_expression ;
not_expr = "not", innerexpression ;

funccall = paired
| word, '(', innerexpression, kwargs, ')', [ funcblock ]
;

(* paired is a special-case function with two arguments *)
paired = "paired", '(', innerexpression, ',', innerexpression, kwargs ;

funcblock = "using", '|', [ variablelist ], '|', ':', block ;


kwargs = {',', variable, '=', innerexpression} ;

assignment = variable, '=', expression ;

method_call = base_expression, '.', word, '(', [ method_args ], ')';
method_args =
innerexpression, kwargs
| variable, '=', innerexpression, kwargs
; (* note that kwargs is defined as starting with a comma *)

indexexpr = base_expression, '[', [ indexing ], ']' ;

indexing = [ innerexpression ], ':', [ innerexpression ] ;

listexpr = '[', [ list_contents ] , ']' ;
list_contents = innerexpression, {',', innerexpression } ;

conditional = "if", innerexpression, ':', block, [ elseblock ] ;
elseblock = "else", ':', block ;
block = eol, indentation, expression, eol, {indentation, expression, eol} ;

variablelist = variable, {',', variable} ;
variable = word ;

0 comments on commit 930294d

Please sign in to comment.