Skip to content

Commit

Permalink
Add Janet Lexer (#2557)
Browse files Browse the repository at this point in the history
  • Loading branch information
sogaiu committed Nov 19, 2023
1 parent 446b45c commit d92f2cc
Show file tree
Hide file tree
Showing 87 changed files with 1,130 additions and 1 deletion.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Other contributors, listed alphabetically, are:
* chebee7i -- Python traceback lexer improvements
* Hiram Chirino -- Scaml and Jade lexers
* Mauricio Caceres -- SAS and Stata lexers.
* Michael Camilleri, John Gabriele, sogaiu -- Janet lexer
* Ian Cooper -- VGL lexer
* David Corbett -- Inform, Jasmin, JSGF, Snowball, and TADS 3 lexers
* Leaf Corcoran -- MoonScript lexer
Expand Down
1 change: 1 addition & 0 deletions pygments/lexers/_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@
'JMESPathLexer': ('pygments.lexers.jmespath', 'JMESPath', ('jmespath', 'jp'), ('*.jp',), ()),
'JSLTLexer': ('pygments.lexers.jslt', 'JSLT', ('jslt',), ('*.jslt',), ('text/x-jslt',)),
'JagsLexer': ('pygments.lexers.modeling', 'JAGS', ('jags',), ('*.jag', '*.bug'), ()),
'JanetLexer': ('pygments.lexers.lisp', 'Janet', ('janet',), ('*.janet', '*.jdn'), ('text/x-janet', 'application/x-janet')),
'JasminLexer': ('pygments.lexers.jvm', 'Jasmin', ('jasmin', 'jasminxt'), ('*.j',), ()),
'JavaLexer': ('pygments.lexers.jvm', 'Java', ('java',), ('*.java',), ('text/x-java',)),
'JavascriptDjangoLexer': ('pygments.lexers.templates', 'JavaScript+Django/Jinja', ('javascript+django', 'js+django', 'javascript+jinja', 'js+jinja'), ('*.js.j2', '*.js.jinja2'), ('application/x-javascript+django', 'application/x-javascript+jinja', 'text/x-javascript+django', 'text/x-javascript+jinja', 'text/javascript+django', 'text/javascript+jinja')),
Expand Down
319 changes: 318 additions & 1 deletion pygments/lexers/lisp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

__all__ = ['SchemeLexer', 'CommonLispLexer', 'HyLexer', 'RacketLexer',
'NewLispLexer', 'EmacsLispLexer', 'ShenLexer', 'CPSALexer',
'XtlangLexer', 'FennelLexer']
'XtlangLexer', 'FennelLexer', 'JanetLexer']


class SchemeLexer(RegexLexer):
Expand Down Expand Up @@ -2848,3 +2848,320 @@ class FennelLexer(RegexLexer):
(r'#', Punctuation),
]
}


class JanetLexer(RegexLexer):
"""A lexer for the Janet programming language.
.. versionadded:: 2.17.0
"""
name = 'Janet'
url = 'https://janet-lang.org/'
aliases = ['janet']
filenames = ['*.janet', '*.jdn']
mimetypes = ['text/x-janet', 'application/x-janet']

# XXX: gets too slow
#flags = re.MULTILINE | re.VERBOSE

special_forms = (
'break', 'def', 'do', 'fn', 'if', 'quote', 'quasiquote', 'splice',
'set', 'unquote', 'upscope', 'var', 'while'
)

builtin_macros = (
'%=', '*=', '++', '+=', '--', '-=', '->', '->>', '-?>',
'-?>>', '/=', 'and', 'as->', 'as-macro', 'as?->',
'assert', 'case', 'catseq', 'chr', 'comment', 'compif',
'comptime', 'compwhen', 'cond', 'coro', 'def-',
'default', 'defdyn', 'defer', 'defmacro', 'defmacro-',
'defn', 'defn-', 'delay', 'doc', 'each', 'eachk',
'eachp', 'edefer', 'ev/do-thread', 'ev/gather',
'ev/spawn', 'ev/spawn-thread', 'ev/with-deadline',
'ffi/defbind', 'fiber-fn', 'for', 'forever', 'forv',
'generate', 'if-let', 'if-not', 'if-with', 'import',
'juxt', 'label', 'let', 'loop', 'match', 'or', 'prompt',
'protect', 'repeat', 'seq', 'short-fn', 'tabseq',
'toggle', 'tracev', 'try', 'unless', 'use', 'var-',
'varfn', 'when', 'when-let', 'when-with', 'with',
'with-dyns', 'with-syms', 'with-vars',
# obsolete builtin macros
'eachy'
)

builtin_functions = (
'%', '*', '+', '-', '/', '<', '<=', '=', '>', '>=',
'abstract?', 'accumulate', 'accumulate2', 'all',
'all-bindings', 'all-dynamics', 'any?', 'apply',
'array', 'array/clear', 'array/concat', 'array/ensure',
'array/fill', 'array/insert', 'array/new',
'array/new-filled', 'array/peek', 'array/pop',
'array/push', 'array/remove', 'array/slice',
'array/trim', 'array/weak', 'array?', 'asm',
'bad-compile', 'bad-parse', 'band', 'blshift', 'bnot',
'boolean?', 'bor', 'brshift', 'brushift', 'buffer',
'buffer/bit', 'buffer/bit-clear', 'buffer/bit-set',
'buffer/bit-toggle', 'buffer/blit', 'buffer/clear',
'buffer/fill', 'buffer/format', 'buffer/from-bytes',
'buffer/new', 'buffer/new-filled', 'buffer/popn',
'buffer/push', 'buffer/push-at', 'buffer/push-byte',
'buffer/push-string', 'buffer/push-word',
'buffer/slice', 'buffer/trim', 'buffer?', 'bxor',
'bytes?', 'cancel', 'cfunction?', 'cli-main', 'cmp',
'comp', 'compare', 'compare<', 'compare<=', 'compare=',
'compare>', 'compare>=', 'compile', 'complement',
'count', 'curenv', 'debug', 'debug/arg-stack',
'debug/break', 'debug/fbreak', 'debug/lineage',
'debug/stack', 'debug/stacktrace', 'debug/step',
'debug/unbreak', 'debug/unfbreak', 'debugger',
'debugger-on-status', 'dec', 'deep-not=', 'deep=',
'defglobal', 'describe', 'dictionary?', 'disasm',
'distinct', 'div', 'doc*', 'doc-format', 'doc-of',
'dofile', 'drop', 'drop-until', 'drop-while', 'dyn',
'eflush', 'empty?', 'env-lookup', 'eprin', 'eprinf',
'eprint', 'eprintf', 'error', 'errorf',
'ev/acquire-lock', 'ev/acquire-rlock',
'ev/acquire-wlock', 'ev/all-tasks', 'ev/call',
'ev/cancel', 'ev/capacity', 'ev/chan', 'ev/chan-close',
'ev/chunk', 'ev/close', 'ev/count', 'ev/deadline',
'ev/full', 'ev/give', 'ev/give-supervisor', 'ev/go',
'ev/lock', 'ev/read', 'ev/release-lock',
'ev/release-rlock', 'ev/release-wlock', 'ev/rselect',
'ev/rwlock', 'ev/select', 'ev/sleep', 'ev/take',
'ev/thread', 'ev/thread-chan', 'ev/write', 'eval',
'eval-string', 'even?', 'every?', 'extreme', 'false?',
'ffi/align', 'ffi/call', 'ffi/calling-conventions',
'ffi/close', 'ffi/context', 'ffi/free', 'ffi/jitfn',
'ffi/lookup', 'ffi/malloc', 'ffi/native',
'ffi/pointer-buffer', 'ffi/pointer-cfunction',
'ffi/read', 'ffi/signature', 'ffi/size', 'ffi/struct',
'ffi/trampoline', 'ffi/write', 'fiber/can-resume?',
'fiber/current', 'fiber/getenv', 'fiber/last-value',
'fiber/maxstack', 'fiber/new', 'fiber/root',
'fiber/setenv', 'fiber/setmaxstack', 'fiber/status',
'fiber?', 'file/close', 'file/flush', 'file/lines',
'file/open', 'file/read', 'file/seek', 'file/tell',
'file/temp', 'file/write', 'filter', 'find',
'find-index', 'first', 'flatten', 'flatten-into',
'flush', 'flycheck', 'freeze', 'frequencies',
'from-pairs', 'function?', 'gccollect', 'gcinterval',
'gcsetinterval', 'gensym', 'get', 'get-in', 'getline',
'getproto', 'group-by', 'has-key?', 'has-value?',
'hash', 'idempotent?', 'identity', 'import*', 'in',
'inc', 'index-of', 'indexed?', 'int/s64',
'int/to-bytes', 'int/to-number', 'int/u64', 'int?',
'interleave', 'interpose', 'invert', 'juxt*', 'keep',
'keep-syntax', 'keep-syntax!', 'keys', 'keyword',
'keyword/slice', 'keyword?', 'kvs', 'last', 'length',
'lengthable?', 'load-image', 'macex', 'macex1',
'maclintf', 'make-env', 'make-image', 'map', 'mapcat',
'marshal', 'math/abs', 'math/acos', 'math/acosh',
'math/asin', 'math/asinh', 'math/atan', 'math/atan2',
'math/atanh', 'math/cbrt', 'math/ceil', 'math/cos',
'math/cosh', 'math/erf', 'math/erfc', 'math/exp',
'math/exp2', 'math/expm1', 'math/floor', 'math/gamma',
'math/gcd', 'math/hypot', 'math/lcm', 'math/log',
'math/log-gamma', 'math/log10', 'math/log1p',
'math/log2', 'math/next', 'math/pow', 'math/random',
'math/rng', 'math/rng-buffer', 'math/rng-int',
'math/rng-uniform', 'math/round', 'math/seedrandom',
'math/sin', 'math/sinh', 'math/sqrt', 'math/tan',
'math/tanh', 'math/trunc', 'max', 'max-of', 'mean',
'memcmp', 'merge', 'merge-into', 'merge-module', 'min',
'min-of', 'mod', 'module/add-paths',
'module/expand-path', 'module/find', 'module/value',
'nan?', 'nat?', 'native', 'neg?', 'net/accept',
'net/accept-loop', 'net/address', 'net/address-unpack',
'net/chunk', 'net/close', 'net/connect', 'net/flush',
'net/listen', 'net/localname', 'net/peername',
'net/read', 'net/recv-from', 'net/send-to',
'net/server', 'net/setsockopt', 'net/shutdown',
'net/write', 'next', 'nil?', 'not', 'not=', 'number?',
'odd?', 'one?', 'os/arch', 'os/cd', 'os/chmod',
'os/clock', 'os/compiler', 'os/cpu-count',
'os/cryptorand', 'os/cwd', 'os/date', 'os/dir',
'os/environ', 'os/execute', 'os/exit', 'os/getenv',
'os/isatty', 'os/link', 'os/lstat', 'os/mkdir',
'os/mktime', 'os/open', 'os/perm-int', 'os/perm-string',
'os/pipe', 'os/posix-exec', 'os/posix-fork',
'os/proc-close', 'os/proc-kill', 'os/proc-wait',
'os/readlink', 'os/realpath', 'os/rename', 'os/rm',
'os/rmdir', 'os/setenv', 'os/shell', 'os/sigaction',
'os/sleep', 'os/spawn', 'os/stat', 'os/strftime',
'os/symlink', 'os/time', 'os/touch', 'os/umask',
'os/which', 'pairs', 'parse', 'parse-all',
'parser/byte', 'parser/clone', 'parser/consume',
'parser/eof', 'parser/error', 'parser/flush',
'parser/has-more', 'parser/insert', 'parser/new',
'parser/produce', 'parser/state', 'parser/status',
'parser/where', 'partial', 'partition', 'partition-by',
'peg/compile', 'peg/find', 'peg/find-all', 'peg/match',
'peg/replace', 'peg/replace-all', 'pos?', 'postwalk',
'pp', 'prewalk', 'prin', 'prinf', 'print', 'printf',
'product', 'propagate', 'put', 'put-in', 'quit',
'range', 'reduce', 'reduce2', 'repl', 'require',
'resume', 'return', 'reverse', 'reverse!',
'run-context', 'sandbox', 'scan-number', 'setdyn',
'signal', 'slice', 'slurp', 'some', 'sort', 'sort-by',
'sorted', 'sorted-by', 'spit', 'string',
'string/ascii-lower', 'string/ascii-upper',
'string/bytes', 'string/check-set', 'string/find',
'string/find-all', 'string/format', 'string/from-bytes',
'string/has-prefix?', 'string/has-suffix?',
'string/join', 'string/repeat', 'string/replace',
'string/replace-all', 'string/reverse', 'string/slice',
'string/split', 'string/trim', 'string/triml',
'string/trimr', 'string?', 'struct', 'struct/getproto',
'struct/proto-flatten', 'struct/to-table',
'struct/with-proto', 'struct?', 'sum', 'symbol',
'symbol/slice', 'symbol?', 'table', 'table/clear',
'table/clone', 'table/getproto', 'table/new',
'table/proto-flatten', 'table/rawget', 'table/setproto',
'table/to-struct', 'table/weak', 'table/weak-keys',
'table/weak-values', 'table?', 'take', 'take-until',
'take-while', 'thaw', 'trace', 'true?', 'truthy?',
'tuple', 'tuple/brackets', 'tuple/setmap',
'tuple/slice', 'tuple/sourcemap', 'tuple/type',
'tuple?', 'type', 'unmarshal', 'untrace', 'update',
'update-in', 'values', 'varglobal', 'walk',
'warn-compile', 'xprin', 'xprinf', 'xprint', 'xprintf',
'yield', 'zero?', 'zipcoll',
# obsolete builtin functions
'tarray/buffer', 'tarray/copy-bytes', 'tarray/length',
'tarray/new', 'tarray/properties', 'tarray/slice',
'tarray/swap-bytes', 'thread/close', 'thread/current',
'thread/exit', 'thread/new', 'thread/receive',
'thread/send'
)

builtin_variables = (
'debugger-env', 'default-peg-grammar', 'janet/build',
'janet/config-bits', 'janet/version', 'load-image-dict',
'make-image-dict', 'math/-inf', 'math/e', 'math/inf',
'math/int-max', 'math/int-min', 'math/int32-max',
'math/int32-min', 'math/nan', 'math/pi', 'module/cache',
'module/loaders', 'module/loading', 'module/paths',
'root-env', 'stderr', 'stdin', 'stdout'
)

constants = (
'false', 'nil', 'true'
)

# XXX: this form not usable to pass to `suffix=`
#_token_end = r'''
# (?= # followed by one of:
# \s # whitespace
# | \# # comment
# | [)\]] # end delimiters
# | $ # end of file
# )
#'''

# ...so, express it like this
_token_end = r'(?=\s|#|[)\]]|$)'

_first_char = r'[a-zA-Z!$%&*+\-./<=>?@^_]'
_rest_char = rf'([0-9:]|{_first_char})'

valid_name = rf'{_first_char}({_rest_char})*'

_radix_unit = r'[0-9a-zA-Z][0-9a-zA-Z_]*'

# exponent marker, optional sign, one or more alphanumeric
_radix_exp = r'&[+-]?[0-9a-zA-Z]+'

# 2af3__bee_
_hex_unit = r'[0-9a-fA-F][0-9a-fA-F_]*'

# 12_000__
_dec_unit = r'[0-9][0-9_]*'

# E-23
# lower or uppercase e, optional sign, one or more digits
_dec_exp = r'[eE][+-]?[0-9]+'

tokens = {
'root': [
(r'#.*$', Comment.Single),

(r'\s+', Whitespace),

# radix number
(rf'''(?x)
[+-]? [0-9]{{1,2}} r {_radix_unit} \. ({_radix_unit})?
({_radix_exp})?
''',
Number),

(rf'''(?x)
[+-]? [0-9]{{1,2}} r (\.)? {_radix_unit}
({_radix_exp})?
''',
Number),

# hex number
(rf'(?x) [+-]? 0x {_hex_unit} \. ({_hex_unit})?',
Number.Hex),

(rf'(?x) [+-]? 0x (\.)? {_hex_unit}',
Number.Hex),

# decimal number
(rf'(?x) [+-]? {_dec_unit} \. ({_dec_unit})? ({_dec_exp})?',
Number.Float),

(rf'(?x) [+-]? (\.)? {_dec_unit} ({_dec_exp})?',
Number.Float),

# strings and buffers
(r'@?"', String, 'string'),

# long-strings and long-buffers
#
# non-empty content enclosed by a pair of n-backticks
# with optional leading @
(r'@?(`+)(.|\n)+?\1', String),

# things that hang out on front
#
# ' ~ , ; |
(r"['~,;|]", Operator),

# collection delimiters
#
# @( ( )
# @[ [ ]
# @{ { }
(r'@?[(\[{]|[)\]}]', Punctuation),

# constants
(words(constants, suffix=_token_end), Keyword.Constants),

# keywords
(rf'(:({_rest_char})+|:)', Name.Constant),

# symbols
(words(builtin_variables, suffix=_token_end),
Name.Variable.Global),

(words(special_forms, prefix=r'(?<=\()', suffix=_token_end),
Keyword.Reserved),

(words(builtin_macros, prefix=r'(?<=\()', suffix=_token_end),
Name.Builtin),

(words(builtin_functions, prefix=r'(?<=\()', suffix=_token_end),
Name.Function),

# other symbols
(valid_name, Name.Variable),
],
'string': [
(r'\\(u[0-9a-fA-F]{4}|U[0-9a-fA-F]{6})', String.Escape),
(r'\\x[0-9a-fA-F]{2}', String.Escape),
(r'\\.', String.Escape),
(r'"', String, '#pop'),
(r'[^\\"]+', String),
]
}
6 changes: 6 additions & 0 deletions tests/snippets/janet/bool_lit-false.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---input---
false

---tokens---
'false' Keyword.Constants
'\n' Text.Whitespace
6 changes: 6 additions & 0 deletions tests/snippets/janet/bool_lit-true.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---input---
true

---tokens---
'true' Keyword.Constants
'\n' Text.Whitespace
9 changes: 9 additions & 0 deletions tests/snippets/janet/buf_lit-multiline.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---input---
@"this is the first line
and what is this one?"

---tokens---
'@"' Literal.String
'this is the first line\nand what is this one?' Literal.String
'"' Literal.String
'\n' Text.Whitespace
8 changes: 8 additions & 0 deletions tests/snippets/janet/buf_lit-simple.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---input---
@"good bye"

---tokens---
'@"' Literal.String
'good bye' Literal.String
'"' Literal.String
'\n' Text.Whitespace
12 changes: 12 additions & 0 deletions tests/snippets/janet/buf_lit-with-escape.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---input---
@"ant\fbee\rcougar"

---tokens---
'@"' Literal.String
'ant' Literal.String
'\\f' Literal.String.Escape
'bee' Literal.String
'\\r' Literal.String.Escape
'cougar' Literal.String
'"' Literal.String
'\n' Text.Whitespace

0 comments on commit d92f2cc

Please sign in to comment.