-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
examples/uc: Add uc example to show the interaction between terms and…
… genlex.
- Loading branch information
Showing
6 changed files
with
741 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
all: lexer | ||
|
||
# uc.ebnf -> uc.json | ||
%.json: %.ebnf | ||
terms -indent -start File -skip "whitespace,comment" -o $@ $< | ||
|
||
# uc.json -> lexer and token packages | ||
lexer: uc.json | ||
genlex $< | ||
|
||
clean: | ||
rm -rf token lexer | ||
rm -f uc.json | ||
|
||
.PHONY: all clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
// generated by speak; DO NOT EDIT. | ||
|
||
// Package lexer implements lexical analysis of the source language. | ||
package lexer | ||
|
||
import ( | ||
"io" | ||
"io/ioutil" | ||
"regexp" | ||
|
||
"github.com/mewmew/speak/examples/uc/token" | ||
"github.com/pkg/errors" | ||
) | ||
|
||
// regstr specifies a regular expression for identifying the tokens of the input | ||
// grammar. | ||
const regstr = `^(('(?:\\n|a)')|([A-Z_a-z][0-9A-Z_a-z]*)|([0-9][0-9]*)|(!)|(!=)|(&&)|(\()|(\))|(\*)|(\+)|(,)|(-)|(/)|(;)|(<)|(<=)|(=)|(==)|(>)|(>=)|(\[)|(\])|(else)|(if)|(return)|(typedef)|(while)|(\{)|(\})|(//(?-s:.)*\n|#(?-s:.)*\n|/\*[^\*]*\*/)|([\t-\r ]))` | ||
|
||
// reg is a compiled version of regstr with leftmost-longest matching enabled. | ||
var reg *regexp.Regexp | ||
|
||
func init() { | ||
// Compile regexp for identifying tokens and enforce leftmost-longest | ||
// matching. | ||
reg = regexp.MustCompile(regstr) | ||
reg.Longest() | ||
} | ||
|
||
// A Lexer lexes the source input into a slice of tokens. | ||
type Lexer struct { | ||
// Source input. | ||
input []byte | ||
// Current position in the source input. | ||
pos int | ||
} | ||
|
||
// New returns a new scanner lexing from r. | ||
func New(r io.Reader) (*Lexer, error) { | ||
input, err := ioutil.ReadAll(r) | ||
if err != nil { | ||
return nil, errors.WithStack(err) | ||
} | ||
return NewFromBytes(input), nil | ||
} | ||
|
||
// Open returns a new scanner lexing from path. | ||
func Open(path string) (*Lexer, error) { | ||
input, err := ioutil.ReadFile(path) | ||
if err != nil { | ||
return nil, errors.WithStack(err) | ||
} | ||
return NewFromBytes(input), nil | ||
} | ||
|
||
// NewFromString returns a new scanner lexing from input. | ||
func NewFromString(input string) *Lexer { | ||
return NewFromBytes([]byte(input)) | ||
} | ||
|
||
// NewFromBytes returns a new scanner lexing from input. | ||
func NewFromBytes(input []byte) *Lexer { | ||
return &Lexer{input: input} | ||
} | ||
|
||
// Scan lexes and returns the next token of the source input. | ||
func (l *Lexer) Scan() (*token.Token, error) { | ||
// Handle EOF. | ||
if l.pos >= len(l.input) { | ||
return nil, errors.WithStack(io.EOF) | ||
} | ||
input := l.input[l.pos:] | ||
// Identify token locations matching start of input. | ||
loc, err := tokenLocs(input) | ||
if err != nil { | ||
return nil, errors.WithStack(err) | ||
} | ||
n, id, err := locateTokens(input, loc) | ||
if err != nil { | ||
return nil, errors.WithStack(err) | ||
} | ||
lit := input[:n] | ||
tok := &token.Token{ | ||
Pos: l.pos, | ||
ID: id, | ||
Lit: lit, | ||
} | ||
l.pos += n | ||
return tok, nil | ||
} | ||
|
||
// locateTokens searches for the longest token that match the start of the | ||
// input. | ||
func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) { | ||
n = -1 | ||
for i := 0; i < len(token.IDs); i++ { | ||
start := loc[2*i] | ||
if start == -1 { | ||
continue | ||
} | ||
if start != 0 { | ||
return 0, 0, errors.Errorf("invalid start index of token; expected 0, got %d", start) | ||
} | ||
end := loc[2*i+1] | ||
if n != -1 { | ||
return 0, 0, errors.Errorf("ambiguity detected; input matches both token %q and token %q", input[:n], input[:end]) | ||
} | ||
n = end | ||
id = token.ID(i) | ||
} | ||
if n == -1 { | ||
// no matching token located. | ||
return 0, 0, errors.Errorf("unable to identify valid token at %q", input) | ||
} | ||
return n, id, nil | ||
} | ||
|
||
// tokenLocs returns start and end location of each token types that match the | ||
// start of the input. | ||
func tokenLocs(input []byte) ([]int, error) { | ||
loc := reg.FindSubmatchIndex(input) | ||
if loc == nil { | ||
// no submatch located. | ||
return nil, errors.Errorf("unable to identify valid token at %q", input) | ||
} | ||
// Validate submatch indices length; expecting two indices - start and end - | ||
// per submatch, and in total 2 + (number of tokens) submatches. | ||
got := len(loc) | ||
want := 2 * (2 + len(token.IDs)) | ||
if got != want { | ||
return nil, errors.Errorf("invalid number of submatches; expected %d, got %d", want, got) | ||
} | ||
// Skip the first two submatches as they do not identify specific tokens. | ||
loc = loc[2*2:] | ||
return loc, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
// This program illustrates the quick sort algorithm by sorting an | ||
// array of char and printing the intermediate results. | ||
// | ||
// Adapted from N.Wirth: Algorithms + Data Structures = Programs | ||
|
||
|
||
void putstring(char s[]); | ||
|
||
char eol[2]; | ||
int n; | ||
|
||
|
||
void sort(char a[], int l, int r) { | ||
int i; | ||
int j; | ||
char x; | ||
char w; | ||
|
||
|
||
i = l; | ||
j = r; | ||
x = a[(l+r) / 2]; | ||
|
||
while ( i<= j) { | ||
while (a[i] < x) i = i + 1; | ||
while (x < a[j]) j = j - 1; | ||
if (i<= j) { | ||
w = a[i]; | ||
a[i] = a[j]; | ||
a[j] = w; | ||
i = i + 1; | ||
j = j - 1; | ||
} | ||
} | ||
|
||
putstring (a); | ||
putstring (eol); | ||
if (l < j) sort(a, l,j); | ||
if (i < r) sort(a, i, r); | ||
|
||
} | ||
|
||
int main(void) | ||
{ | ||
char s[27]; | ||
int i; | ||
char t; | ||
int q; | ||
|
||
eol[0] = '\n'; | ||
eol[1] = 0; | ||
|
||
n = 26; | ||
|
||
s[n] = 0; | ||
|
||
i = 0; | ||
|
||
// Fill the string with random-looking data | ||
q = 11; | ||
while (i<n) { | ||
t = q - (q / 26)*26; | ||
s[i] = 'a'+t; | ||
i = i + 1; | ||
q = q + 17; | ||
} | ||
|
||
|
||
putstring (s); // print it ... | ||
putstring (eol); | ||
sort(s, 0, n-1); // sort it ... | ||
putstring(s); // and print again | ||
putstring (eol); | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
// generated by speak; DO NOT EDIT. | ||
|
||
// Package token defines constants representing the lexical tokens of the source | ||
// language. | ||
package token | ||
|
||
import "fmt" | ||
|
||
// A Token represents a lexical token of the source language. | ||
type Token struct { | ||
// Start position in the source input. | ||
Pos int | ||
// Token type. | ||
ID ID | ||
// Token literal. | ||
Lit []byte | ||
} | ||
|
||
// String returns the string represenatation of the token. | ||
func (tok *Token) String() string { | ||
return fmt.Sprintf("Pos: %d, ID: %s, Lit: %q", tok.Pos, tok.ID, tok.Lit) | ||
} | ||
|
||
// ID is the set of lexical tokens of the source language. | ||
type ID uint | ||
|
||
// String returns the string represenatation of the token ID. | ||
func (id ID) String() string { | ||
if int(id) < len(IDs) { | ||
return IDs[id] | ||
} | ||
return fmt.Sprintf("<unknown token ID %d>", uint(id)) | ||
} | ||
|
||
// IDs specifies the string representation of each token ID. | ||
var IDs = [...]string{ | ||
"name(0, `char_lit`)", | ||
"name(1, `ident`)", | ||
"name(2, `int_lit`)", | ||
"token(3, `!`)", | ||
"token(4, `!=`)", | ||
"token(5, `&&`)", | ||
"token(6, `(`)", | ||
"token(7, `)`)", | ||
"token(8, `*`)", | ||
"token(9, `+`)", | ||
"token(10, `,`)", | ||
"token(11, `-`)", | ||
"token(12, `/`)", | ||
"token(13, `;`)", | ||
"token(14, `<`)", | ||
"token(15, `<=`)", | ||
"token(16, `=`)", | ||
"token(17, `==`)", | ||
"token(18, `>`)", | ||
"token(19, `>=`)", | ||
"token(20, `[`)", | ||
"token(21, `]`)", | ||
"token(22, `else`)", | ||
"token(23, `if`)", | ||
"token(24, `return`)", | ||
"token(25, `typedef`)", | ||
"token(26, `while`)", | ||
"token(27, `{`)", | ||
"token(28, `}`)", | ||
"skip(29, `comment`)", | ||
"skip(30, `whitespace`)", | ||
} |
Oops, something went wrong.