tokenize c/c++ source code
JavaScript C
Latest commit 690c737 Oct 11, 2016 substack forgot to track files
Permalink
Failed to load latest commit information.
example forgot to track files Oct 12, 2016
test remove leading +- from numbers, use unary operator instead Oct 12, 2016
.travis.yml drop 0.8 Sep 17, 2013
LICENSE package.json etc Sep 17, 2013
array.js forgot to track files Oct 12, 2016
index.js array form Oct 12, 2016
package.json 1.1.0 Oct 12, 2016
readme.markdown array form Oct 12, 2016
rules.js forgot to track files Oct 12, 2016

readme.markdown

c-tokenizer

tokenize C/C++ source code

build status

example

var tokenize = require('tokenize')
var t = tokenize(function (src, token) {
  console.log(token.type + ' => ' + JSON.stringify(src))
})
process.stdin.pipe(t)

For the input file main.c:

#include "stdio.h"
#include "stdlib.h"

int main(int argc, char **argv) {
  printf("%d\n", foo(atoi(argv[1])));
  return 0;
}

output:

$ node example/tokens.js < example/main.c
directive => "#include"
whitespace => " "
quote => "\"stdio.h\""
whitespace => "\n"
directive => "#include"
whitespace => " "
quote => "\"stdlib.h\""
whitespace => "\n\n"
identifier => "int"
whitespace => " "
identifier => "main"
open paren => "("
identifier => "int"
whitespace => " "
identifier => "argc"
operator => ","
whitespace => " "
identifier => "char"
whitespace => " "
operator => "**"
identifier => "argv"
close paren => ")"
whitespace => " "
open curly => "{"
whitespace => "\n    "
identifier => "printf"
open paren => "("
quote => "\"%d\\n\""
operator => ","
whitespace => " "
identifier => "foo"
open paren => "("
identifier => "atoi"
open paren => "("
identifier => "argv"
open square => "["
number => "1"
close square => "]"
close paren => ")"
close paren => ")"
close paren => ")"
operator => ";"
whitespace => "\n    "
identifier => "return"
whitespace => " "
number => "0"
operator => ";"
whitespace => "\n"
close curly => "}"
whitespace => "\n"

or as an array instead of a stream:

var tokenize = require('c-tokenizer/array')
var src = process.argv[2]
var tokens = tokenize(src)
tokesn.forEach(function (t) {
  console.log(JSON.stringify(t))
})

api

var tokenize = require('c-tokenizer')
var tokenizeArray = require('c-tokenizer/array')

var t = tokenize(cb)

Return a new tokenize through stream with C/C++ syntax rules loaded into it.

Each parsed token will fire the cb(src, token) callback.

Each token has:

  • token.type - string type
  • token.source - original source string

t.addRule(regex, name)

Add additional rules as regex with a name.

var tokens tokenizeArray(src)

Return an array of tokens for the c source string src.

Each token has:

  • token.type - string type
  • token.source - original source string

install

With npm do:

npm install c-tokenizer

license

MIT