Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Janet Lexer #2557

Merged
merged 14 commits into from
Nov 19, 2023
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Other contributors, listed alphabetically, are:
* chebee7i -- Python traceback lexer improvements
* Hiram Chirino -- Scaml and Jade lexers
* Mauricio Caceres -- SAS and Stata lexers.
* Michael Camilleri, John Gabriele, sogaiu -- Janet lexer
* Ian Cooper -- VGL lexer
* David Corbett -- Inform, Jasmin, JSGF, Snowball, and TADS 3 lexers
* Leaf Corcoran -- MoonScript lexer
Expand Down
1 change: 1 addition & 0 deletions pygments/lexers/_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@
'JMESPathLexer': ('pygments.lexers.jmespath', 'JMESPath', ('jmespath', 'jp'), ('*.jp',), ()),
'JSLTLexer': ('pygments.lexers.jslt', 'JSLT', ('jslt',), ('*.jslt',), ('text/x-jslt',)),
'JagsLexer': ('pygments.lexers.modeling', 'JAGS', ('jags',), ('*.jag', '*.bug'), ()),
'JanetLexer': ('pygments.lexers.lisp', 'Janet', ('janet',), ('*.janet', '*.jdn'), ('text/x-janet', 'application/x-janet')),
'JasminLexer': ('pygments.lexers.jvm', 'Jasmin', ('jasmin', 'jasminxt'), ('*.j',), ()),
'JavaLexer': ('pygments.lexers.jvm', 'Java', ('java',), ('*.java',), ('text/x-java',)),
'JavascriptDjangoLexer': ('pygments.lexers.templates', 'JavaScript+Django/Jinja', ('javascript+django', 'js+django', 'javascript+jinja', 'js+jinja'), ('*.js.j2', '*.js.jinja2'), ('application/x-javascript+django', 'application/x-javascript+jinja', 'text/x-javascript+django', 'text/x-javascript+jinja', 'text/javascript+django', 'text/javascript+jinja')),
Expand Down
350 changes: 349 additions & 1 deletion pygments/lexers/lisp.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

__all__ = ['SchemeLexer', 'CommonLispLexer', 'HyLexer', 'RacketLexer',
'NewLispLexer', 'EmacsLispLexer', 'ShenLexer', 'CPSALexer',
'XtlangLexer', 'FennelLexer']
'XtlangLexer', 'FennelLexer', 'JanetLexer']


class SchemeLexer(RegexLexer):
Expand Down Expand Up @@ -2846,3 +2846,351 @@ class FennelLexer(RegexLexer):
(r'#', Punctuation),
]
}


class JanetLexer(RegexLexer):
"""A lexer for the Janet programming language.

.. versionadded:: 2.17.0
"""
name = 'Janet'
url = 'https://janet-lang.org/'
aliases = ['janet']
filenames = ['*.janet', '*.jdn']
mimetypes = ['text/x-janet', 'application/x-janet']

# XXX: gets too slow
#flags = re.MULTILINE | re.VERBOSE

special_forms = (
'break', 'def', 'do', 'fn', 'if', 'quote', 'quasiquote', 'splice',
'set', 'unquote', 'upscope', 'var', 'while'
)

builtin_macros = (
'%=', '*=', '++', '+=', '--', '-=', '->', '->>', '-?>',
'-?>>', '/=', 'and', 'as->', 'as-macro', 'as?->',
'assert', 'case', 'catseq', 'chr', 'comment', 'compif',
'comptime', 'compwhen', 'cond', 'coro', 'def-',
'default', 'defdyn', 'defer', 'defmacro', 'defmacro-',
'defn', 'defn-', 'delay', 'doc', 'each', 'eachk',
'eachp', 'edefer', 'ev/do-thread', 'ev/gather',
'ev/spawn', 'ev/spawn-thread', 'ev/with-deadline',
'ffi/defbind', 'fiber-fn', 'for', 'forever', 'forv',
'generate', 'if-let', 'if-not', 'if-with', 'import',
'juxt', 'label', 'let', 'loop', 'match', 'or', 'prompt',
'protect', 'repeat', 'seq', 'short-fn', 'tabseq',
'toggle', 'tracev', 'try', 'unless', 'use', 'var-',
'varfn', 'when', 'when-let', 'when-with', 'with',
'with-dyns', 'with-syms', 'with-vars',
# obsolete builtin macros
'eachy'
)

builtin_functions = (
'%', '*', '+', '-', '/', '<', '<=', '=', '>', '>=',
'abstract?', 'accumulate', 'accumulate2', 'all',
'all-bindings', 'all-dynamics', 'any?', 'apply',
'array', 'array/clear', 'array/concat', 'array/ensure',
'array/fill', 'array/insert', 'array/new',
'array/new-filled', 'array/peek', 'array/pop',
'array/push', 'array/remove', 'array/slice',
'array/trim', 'array/weak', 'array?', 'asm',
'bad-compile', 'bad-parse', 'band', 'blshift', 'bnot',
'boolean?', 'bor', 'brshift', 'brushift', 'buffer',
'buffer/bit', 'buffer/bit-clear', 'buffer/bit-set',
'buffer/bit-toggle', 'buffer/blit', 'buffer/clear',
'buffer/fill', 'buffer/format', 'buffer/from-bytes',
'buffer/new', 'buffer/new-filled', 'buffer/popn',
'buffer/push', 'buffer/push-at', 'buffer/push-byte',
'buffer/push-string', 'buffer/push-word',
'buffer/slice', 'buffer/trim', 'buffer?', 'bxor',
'bytes?', 'cancel', 'cfunction?', 'cli-main', 'cmp',
'comp', 'compare', 'compare<', 'compare<=', 'compare=',
'compare>', 'compare>=', 'compile', 'complement',
'count', 'curenv', 'debug', 'debug/arg-stack',
'debug/break', 'debug/fbreak', 'debug/lineage',
'debug/stack', 'debug/stacktrace', 'debug/step',
'debug/unbreak', 'debug/unfbreak', 'debugger',
'debugger-on-status', 'dec', 'deep-not=', 'deep=',
'defglobal', 'describe', 'dictionary?', 'disasm',
'distinct', 'div', 'doc*', 'doc-format', 'doc-of',
'dofile', 'drop', 'drop-until', 'drop-while', 'dyn',
'eflush', 'empty?', 'env-lookup', 'eprin', 'eprinf',
'eprint', 'eprintf', 'error', 'errorf',
'ev/acquire-lock', 'ev/acquire-rlock',
'ev/acquire-wlock', 'ev/all-tasks', 'ev/call',
'ev/cancel', 'ev/capacity', 'ev/chan', 'ev/chan-close',
'ev/chunk', 'ev/close', 'ev/count', 'ev/deadline',
'ev/full', 'ev/give', 'ev/give-supervisor', 'ev/go',
'ev/lock', 'ev/read', 'ev/release-lock',
'ev/release-rlock', 'ev/release-wlock', 'ev/rselect',
'ev/rwlock', 'ev/select', 'ev/sleep', 'ev/take',
'ev/thread', 'ev/thread-chan', 'ev/write', 'eval',
'eval-string', 'even?', 'every?', 'extreme', 'false?',
'ffi/align', 'ffi/call', 'ffi/calling-conventions',
'ffi/close', 'ffi/context', 'ffi/free', 'ffi/jitfn',
'ffi/lookup', 'ffi/malloc', 'ffi/native',
'ffi/pointer-buffer', 'ffi/pointer-cfunction',
'ffi/read', 'ffi/signature', 'ffi/size', 'ffi/struct',
'ffi/trampoline', 'ffi/write', 'fiber/can-resume?',
'fiber/current', 'fiber/getenv', 'fiber/last-value',
'fiber/maxstack', 'fiber/new', 'fiber/root',
'fiber/setenv', 'fiber/setmaxstack', 'fiber/status',
'fiber?', 'file/close', 'file/flush', 'file/lines',
'file/open', 'file/read', 'file/seek', 'file/tell',
'file/temp', 'file/write', 'filter', 'find',
'find-index', 'first', 'flatten', 'flatten-into',
'flush', 'flycheck', 'freeze', 'frequencies',
'from-pairs', 'function?', 'gccollect', 'gcinterval',
'gcsetinterval', 'gensym', 'get', 'get-in', 'getline',
'getproto', 'group-by', 'has-key?', 'has-value?',
'hash', 'idempotent?', 'identity', 'import*', 'in',
'inc', 'index-of', 'indexed?', 'int/s64',
'int/to-bytes', 'int/to-number', 'int/u64', 'int?',
'interleave', 'interpose', 'invert', 'juxt*', 'keep',
'keep-syntax', 'keep-syntax!', 'keys', 'keyword',
'keyword/slice', 'keyword?', 'kvs', 'last', 'length',
'lengthable?', 'load-image', 'macex', 'macex1',
'maclintf', 'make-env', 'make-image', 'map', 'mapcat',
'marshal', 'math/abs', 'math/acos', 'math/acosh',
'math/asin', 'math/asinh', 'math/atan', 'math/atan2',
'math/atanh', 'math/cbrt', 'math/ceil', 'math/cos',
'math/cosh', 'math/erf', 'math/erfc', 'math/exp',
'math/exp2', 'math/expm1', 'math/floor', 'math/gamma',
'math/gcd', 'math/hypot', 'math/lcm', 'math/log',
'math/log-gamma', 'math/log10', 'math/log1p',
'math/log2', 'math/next', 'math/pow', 'math/random',
'math/rng', 'math/rng-buffer', 'math/rng-int',
'math/rng-uniform', 'math/round', 'math/seedrandom',
'math/sin', 'math/sinh', 'math/sqrt', 'math/tan',
'math/tanh', 'math/trunc', 'max', 'max-of', 'mean',
'memcmp', 'merge', 'merge-into', 'merge-module', 'min',
'min-of', 'mod', 'module/add-paths',
'module/expand-path', 'module/find', 'module/value',
'nan?', 'nat?', 'native', 'neg?', 'net/accept',
'net/accept-loop', 'net/address', 'net/address-unpack',
'net/chunk', 'net/close', 'net/connect', 'net/flush',
'net/listen', 'net/localname', 'net/peername',
'net/read', 'net/recv-from', 'net/send-to',
'net/server', 'net/setsockopt', 'net/shutdown',
'net/write', 'next', 'nil?', 'not', 'not=', 'number?',
'odd?', 'one?', 'os/arch', 'os/cd', 'os/chmod',
'os/clock', 'os/compiler', 'os/cpu-count',
'os/cryptorand', 'os/cwd', 'os/date', 'os/dir',
'os/environ', 'os/execute', 'os/exit', 'os/getenv',
'os/isatty', 'os/link', 'os/lstat', 'os/mkdir',
'os/mktime', 'os/open', 'os/perm-int', 'os/perm-string',
'os/pipe', 'os/posix-exec', 'os/posix-fork',
'os/proc-close', 'os/proc-kill', 'os/proc-wait',
'os/readlink', 'os/realpath', 'os/rename', 'os/rm',
'os/rmdir', 'os/setenv', 'os/shell', 'os/sigaction',
'os/sleep', 'os/spawn', 'os/stat', 'os/strftime',
'os/symlink', 'os/time', 'os/touch', 'os/umask',
'os/which', 'pairs', 'parse', 'parse-all',
'parser/byte', 'parser/clone', 'parser/consume',
'parser/eof', 'parser/error', 'parser/flush',
'parser/has-more', 'parser/insert', 'parser/new',
'parser/produce', 'parser/state', 'parser/status',
'parser/where', 'partial', 'partition', 'partition-by',
'peg/compile', 'peg/find', 'peg/find-all', 'peg/match',
'peg/replace', 'peg/replace-all', 'pos?', 'postwalk',
'pp', 'prewalk', 'prin', 'prinf', 'print', 'printf',
'product', 'propagate', 'put', 'put-in', 'quit',
'range', 'reduce', 'reduce2', 'repl', 'require',
'resume', 'return', 'reverse', 'reverse!',
'run-context', 'sandbox', 'scan-number', 'setdyn',
'signal', 'slice', 'slurp', 'some', 'sort', 'sort-by',
'sorted', 'sorted-by', 'spit', 'string',
'string/ascii-lower', 'string/ascii-upper',
'string/bytes', 'string/check-set', 'string/find',
'string/find-all', 'string/format', 'string/from-bytes',
'string/has-prefix?', 'string/has-suffix?',
'string/join', 'string/repeat', 'string/replace',
'string/replace-all', 'string/reverse', 'string/slice',
'string/split', 'string/trim', 'string/triml',
'string/trimr', 'string?', 'struct', 'struct/getproto',
'struct/proto-flatten', 'struct/to-table',
'struct/with-proto', 'struct?', 'sum', 'symbol',
'symbol/slice', 'symbol?', 'table', 'table/clear',
'table/clone', 'table/getproto', 'table/new',
'table/proto-flatten', 'table/rawget', 'table/setproto',
'table/to-struct', 'table/weak', 'table/weak-keys',
'table/weak-values', 'table?', 'take', 'take-until',
'take-while', 'thaw', 'trace', 'true?', 'truthy?',
'tuple', 'tuple/brackets', 'tuple/setmap',
'tuple/slice', 'tuple/sourcemap', 'tuple/type',
'tuple?', 'type', 'unmarshal', 'untrace', 'update',
'update-in', 'values', 'varglobal', 'walk',
'warn-compile', 'xprin', 'xprinf', 'xprint', 'xprintf',
'yield', 'zero?', 'zipcoll',
# obsolete builtin functions
'tarray/buffer', 'tarray/copy-bytes', 'tarray/length',
'tarray/new', 'tarray/properties', 'tarray/slice',
'tarray/swap-bytes', 'thread/close', 'thread/current',
'thread/exit', 'thread/new', 'thread/receive',
'thread/send'
)

builtin_variables = (
'debugger-env', 'default-peg-grammar', 'janet/build',
'janet/config-bits', 'janet/version', 'load-image-dict',
'make-image-dict', 'math/-inf', 'math/e', 'math/inf',
'math/int-max', 'math/int-min', 'math/int32-max',
'math/int32-min', 'math/nan', 'math/pi', 'module/cache',
'module/loaders', 'module/loading', 'module/paths',
'root-env', 'stderr', 'stdin', 'stdout'
)

constants = (
'false', 'nil', 'true'
)

# XXX: this form not usable to pass to `suffix=`
#_token_end = r'''
# (?= # followed by one of:
# \s # whitespace
# | \# # comment
# | [)\]] # end delimiters
# | $ # end of file
# )
#'''

# ...so, express it like this
_token_end = r'(?=\s|#|[)\]]|$)'

_first_char = r'[a-zA-Z!$%&*+./<?=>@^_-]'
jeanas marked this conversation as resolved.
Show resolved Hide resolved
_following_chars_mb = r'[0-9:a-zA-Z!$%&*+./<?=>@^_-]*'
jeanas marked this conversation as resolved.
Show resolved Hide resolved

valid_name = _first_char + _following_chars_mb

_sign_mb = r'[+-]?'
jeanas marked this conversation as resolved.
Show resolved Hide resolved

_radix_unit = r'''
[0-9a-zA-Z]+ # one or more alphanumeric
( # optional group of:
_* # zero or more underscores
[0-9a-zA-Z]+ # one or more alphanumeric
_* # zero or more underscores
)*
'''
jeanas marked this conversation as resolved.
Show resolved Hide resolved

_radix_exp_mb = rf'''
(
& # exponent marker
{_sign_mb} # optional sign
[0-9a-zA-Z]+ # one or more alphanumeric
)?
'''

# 2af3__bee_
_hex_unit = r'''
[0-9a-fA-F]+ # one or more hex digits
( # followed by zero or more groups of:
_* # zero or more underscores
[0-9a-fA-F]+ # one or more hex digits
_* # zero or more underscores
)*
'''
jeanas marked this conversation as resolved.
Show resolved Hide resolved

# 12_000__
_dec_unit = r'''
[0-9]+ # one or more digits
( # followed by zero or more groups of:
_* # zero or more underscores
[0-9]+ # one or more digits
_* # zero or more underscores
)*
'''
jeanas marked this conversation as resolved.
Show resolved Hide resolved

# E-23
_dec_exp_mb = rf'''
( # optional group of:
[eE] # lower or uppercase e
{_sign_mb} # optional sign
[0-9]+ # one or more digits
)?
'''

_collection_delims = r'''(?x)
(
@\( | # arrays and tuples
\( |
\) |
@\[ |
\[ |
\] |
@\{ | # tables and structs
\{ |
\}
)
'''
jeanas marked this conversation as resolved.
Show resolved Hide resolved

tokens = {
'root': [
(r'#.*$', Comment.Single),

(r'\s+', Whitespace),

# radix number
(rf'''(?x)
{_sign_mb} [0-9][0-9]? r {_radix_unit} \. ({_radix_unit})?
jeanas marked this conversation as resolved.
Show resolved Hide resolved
{_radix_exp_mb}
''',
Number),

(rf'''(?x)
{_sign_mb} [0-9][0-9]? r (\.)? {_radix_unit}
jeanas marked this conversation as resolved.
Show resolved Hide resolved
{_radix_exp_mb}
''',
Number),

# hex number
(rf'(?x) {_sign_mb} 0x {_hex_unit} \. ({_hex_unit})?',
Number.Hex),

(rf'(?x) {_sign_mb} 0x (\.)? {_hex_unit}',
Number.Hex),

# decimal number
(rf'(?x) {_sign_mb} {_dec_unit} \. ({_dec_unit})? {_dec_exp_mb}',
Number.Float),

(rf'(?x) {_sign_mb} (\.)? {_dec_unit} {_dec_exp_mb}',
Number.Float),

# strings and buffers
(r'@?"(\\.|[^"])*"', String),
jeanas marked this conversation as resolved.
Show resolved Hide resolved

# long-strings and long-buffers
(r'(?s)@?(`+).+?\1', String),
jeanas marked this conversation as resolved.
Show resolved Hide resolved

# things that hang out on front
(r"('|~|,|;|\|)", Operator),
jeanas marked this conversation as resolved.
Show resolved Hide resolved

# collection delimiters
(_collection_delims, Punctuation),

# constants
(words(constants, suffix=_token_end), Keyword.Constants),

# keywords
(r'(:' + _following_chars_mb + r'|:)', Name.Constant),

# symbols
(words(builtin_variables, suffix=_token_end),
Name.Variable.Global),

(words(special_forms, prefix=r'(?<=\()', suffix=_token_end),
Keyword.Reserved),

(words(builtin_macros, prefix=r'(?<=\()', suffix=_token_end),
Name.Builtin),

(words(builtin_functions, prefix=r'(?<=\()', suffix=_token_end),
Name.Function),

# other symbols
(valid_name, Name.Variable),
]
}
6 changes: 6 additions & 0 deletions tests/snippets/janet/bool_lit-false.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---input---
false

---tokens---
'false' Keyword.Constants
'\n' Text.Whitespace
6 changes: 6 additions & 0 deletions tests/snippets/janet/bool_lit-true.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---input---
true

---tokens---
'true' Keyword.Constants
'\n' Text.Whitespace
7 changes: 7 additions & 0 deletions tests/snippets/janet/buf_lit-multiline.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---input---
@"this is the first line
and what is this one?"

---tokens---
'@"this is the first line\nand what is this one?"' Literal.String
'\n' Text.Whitespace
Loading