Skip to content

Commit

Permalink
examples/uc: Add uc example to show the interaction between terms and…
Browse files Browse the repository at this point in the history
… genlex.
  • Loading branch information
mewmew committed Feb 13, 2017
1 parent 41a3902 commit 476be9c
Show file tree
Hide file tree
Showing 6 changed files with 741 additions and 0 deletions.
15 changes: 15 additions & 0 deletions examples/uc/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
all: lexer

# uc.ebnf -> uc.json
%.json: %.ebnf
terms -indent -start File -skip "whitespace,comment" -o $@ $<

# uc.json -> lexer and token packages
lexer: uc.json
genlex $<

clean:
rm -rf token lexer
rm -f uc.json

.PHONY: all clean
135 changes: 135 additions & 0 deletions examples/uc/lexer/lexer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// generated by speak; DO NOT EDIT.

// Package lexer implements lexical analysis of the source language.
package lexer

import (
"io"
"io/ioutil"
"regexp"

"github.com/mewmew/speak/examples/uc/token"
"github.com/pkg/errors"
)

// regstr specifies a regular expression for identifying the tokens of the input
// grammar.
const regstr = `^(('(?:\\n|a)')|([A-Z_a-z][0-9A-Z_a-z]*)|([0-9][0-9]*)|(!)|(!=)|(&&)|(\()|(\))|(\*)|(\+)|(,)|(-)|(/)|(;)|(<)|(<=)|(=)|(==)|(>)|(>=)|(\[)|(\])|(else)|(if)|(return)|(typedef)|(while)|(\{)|(\})|(//(?-s:.)*\n|#(?-s:.)*\n|/\*[^\*]*\*/)|([\t-\r ]))`

// reg is a compiled version of regstr with leftmost-longest matching enabled.
var reg *regexp.Regexp

func init() {
// Compile regexp for identifying tokens and enforce leftmost-longest
// matching.
reg = regexp.MustCompile(regstr)
reg.Longest()
}

// A Lexer lexes the source input into a slice of tokens.
type Lexer struct {
// Source input.
input []byte
// Current position in the source input.
pos int
}

// New returns a new scanner lexing from r.
func New(r io.Reader) (*Lexer, error) {
input, err := ioutil.ReadAll(r)
if err != nil {
return nil, errors.WithStack(err)
}
return NewFromBytes(input), nil
}

// Open returns a new scanner lexing from path.
func Open(path string) (*Lexer, error) {
input, err := ioutil.ReadFile(path)
if err != nil {
return nil, errors.WithStack(err)
}
return NewFromBytes(input), nil
}

// NewFromString returns a new scanner lexing from input.
func NewFromString(input string) *Lexer {
return NewFromBytes([]byte(input))
}

// NewFromBytes returns a new scanner lexing from input.
func NewFromBytes(input []byte) *Lexer {
return &Lexer{input: input}
}

// Scan lexes and returns the next token of the source input.
func (l *Lexer) Scan() (*token.Token, error) {
// Handle EOF.
if l.pos >= len(l.input) {
return nil, errors.WithStack(io.EOF)
}
input := l.input[l.pos:]
// Identify token locations matching start of input.
loc, err := tokenLocs(input)
if err != nil {
return nil, errors.WithStack(err)
}
n, id, err := locateTokens(input, loc)
if err != nil {
return nil, errors.WithStack(err)
}
lit := input[:n]
tok := &token.Token{
Pos: l.pos,
ID: id,
Lit: lit,
}
l.pos += n
return tok, nil
}

// locateTokens searches for the longest token that match the start of the
// input.
func locateTokens(input []byte, loc []int) (n int, id token.ID, err error) {
n = -1
for i := 0; i < len(token.IDs); i++ {
start := loc[2*i]
if start == -1 {
continue
}
if start != 0 {
return 0, 0, errors.Errorf("invalid start index of token; expected 0, got %d", start)
}
end := loc[2*i+1]
if n != -1 {
return 0, 0, errors.Errorf("ambiguity detected; input matches both token %q and token %q", input[:n], input[:end])
}
n = end
id = token.ID(i)
}
if n == -1 {
// no matching token located.
return 0, 0, errors.Errorf("unable to identify valid token at %q", input)
}
return n, id, nil
}

// tokenLocs returns start and end location of each token types that match the
// start of the input.
func tokenLocs(input []byte) ([]int, error) {
loc := reg.FindSubmatchIndex(input)
if loc == nil {
// no submatch located.
return nil, errors.Errorf("unable to identify valid token at %q", input)
}
// Validate submatch indices length; expecting two indices - start and end -
// per submatch, and in total 2 + (number of tokens) submatches.
got := len(loc)
want := 2 * (2 + len(token.IDs))
if got != want {
return nil, errors.Errorf("invalid number of submatches; expected %d, got %d", want, got)
}
// Skip the first two submatches as they do not identify specific tokens.
loc = loc[2*2:]
return loc, nil
}
75 changes: 75 additions & 0 deletions examples/uc/testdata/input.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// This program illustrates the quick sort algorithm by sorting an
// array of char and printing the intermediate results.
//
// Adapted from N.Wirth: Algorithms + Data Structures = Programs


void putstring(char s[]);

char eol[2];
int n;


void sort(char a[], int l, int r) {
int i;
int j;
char x;
char w;


i = l;
j = r;
x = a[(l+r) / 2];

while ( i<= j) {
while (a[i] < x) i = i + 1;
while (x < a[j]) j = j - 1;
if (i<= j) {
w = a[i];
a[i] = a[j];
a[j] = w;
i = i + 1;
j = j - 1;
}
}

putstring (a);
putstring (eol);
if (l < j) sort(a, l,j);
if (i < r) sort(a, i, r);

}

int main(void)
{
char s[27];
int i;
char t;
int q;

eol[0] = '\n';
eol[1] = 0;

n = 26;

s[n] = 0;

i = 0;

// Fill the string with random-looking data
q = 11;
while (i<n) {
t = q - (q / 26)*26;
s[i] = 'a'+t;
i = i + 1;
q = q + 17;
}


putstring (s); // print it ...
putstring (eol);
sort(s, 0, n-1); // sort it ...
putstring(s); // and print again
putstring (eol);

}
68 changes: 68 additions & 0 deletions examples/uc/token/token.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// generated by speak; DO NOT EDIT.

// Package token defines constants representing the lexical tokens of the source
// language.
package token

import "fmt"

// A Token represents a lexical token of the source language.
type Token struct {
// Start position in the source input.
Pos int
// Token type.
ID ID
// Token literal.
Lit []byte
}

// String returns the string represenatation of the token.
func (tok *Token) String() string {
return fmt.Sprintf("Pos: %d, ID: %s, Lit: %q", tok.Pos, tok.ID, tok.Lit)
}

// ID is the set of lexical tokens of the source language.
type ID uint

// String returns the string represenatation of the token ID.
func (id ID) String() string {
if int(id) < len(IDs) {
return IDs[id]
}
return fmt.Sprintf("<unknown token ID %d>", uint(id))
}

// IDs specifies the string representation of each token ID.
var IDs = [...]string{
"name(0, `char_lit`)",
"name(1, `ident`)",
"name(2, `int_lit`)",
"token(3, `!`)",
"token(4, `!=`)",
"token(5, `&&`)",
"token(6, `(`)",
"token(7, `)`)",
"token(8, `*`)",
"token(9, `+`)",
"token(10, `,`)",
"token(11, `-`)",
"token(12, `/`)",
"token(13, `;`)",
"token(14, `<`)",
"token(15, `<=`)",
"token(16, `=`)",
"token(17, `==`)",
"token(18, `>`)",
"token(19, `>=`)",
"token(20, `[`)",
"token(21, `]`)",
"token(22, `else`)",
"token(23, `if`)",
"token(24, `return`)",
"token(25, `typedef`)",
"token(26, `while`)",
"token(27, `{`)",
"token(28, `}`)",
"skip(29, `comment`)",
"skip(30, `whitespace`)",
}

0 comments on commit 476be9c

Please sign in to comment.