Skip to content

Commit

Permalink
Fix highlighting of regex token in Javascript mode.
Browse files Browse the repository at this point in the history
fixes #119.
  • Loading branch information
Atsuo Ishimoto committed Mar 22, 2015
1 parent 1bea5b6 commit a76de0d
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 71 deletions.
84 changes: 46 additions & 38 deletions kaa/filetype/javascript/javascriptmode.py
@@ -1,4 +1,5 @@
from collections import namedtuple
import kaa
from kaa.filetype.default import defaultmode
from kaa.highlight import Tokenizer, Span, Keywords, EndSection, SingleToken
from kaa.theme import Theme, Style
Expand All @@ -9,43 +10,52 @@


class JSRegex(Span):
OPERATORS = set('+\-*/~&?:|=%;<>^({[,:]')

def on_start(self, tokenizer, doc, pos, match):
valid_tokens = (
tokenizer.tokens.keywords,
tokenizer.tokens.punctuation1)
ignore_tokens = (tokenizer.tokens.comment1, tokenizer.tokens.comment2)

def _is_regex(self, tokenizer, doc, pos):
# check if current token is valid regex expr or not
ignore_tokens = {tokenizer.tokens.comment1, tokenizer.tokens.comment2}
p = pos - 1
while p >= 0:
style = doc.styles.getints(p, p + 1)[0]
token = tokenizer.get_token(style)
# if token is subtokenizer, get actual token inside subtokenizer.
if token:
token = token.get_token(style)

if not token or token in ignore_tokens:
# ignore comment tokens
oldp = p
p = doc.styles.rfindint([style], 0, p, comp_ne=True)
if p == -1:
break
continue

if token in valid_tokens:
# regex can be put here.
break

if token not in tokenizer.tokens:
# Token is not JS token. May be embedded in HTML.
break

ret = yield from tokenizer.tokens.punctuation1.on_start(
_tokenizer, token, prev = tokenizer.highlighter.get_prev_token(doc, p)

if token is None:
return True
else:
end_prev_token = token.find_token_end(doc, prev)

if end_prev_token <= p:
s = doc.gettext(end_prev_token, p+1).strip()
if s:
if s[-1] in self.OPERATORS:
# current token begin just after operator.
return True
else:
return False


if tokenizer is not _tokenizer:
return True

if not token:
return True

if token not in ignore_tokens:
return False

p = prev-1

return True

def on_start(self, tokenizer, doc, pos, match):
if not self._is_regex(tokenizer, doc, pos):
# This '/' is not regex, but divide operator
ret = yield from tokenizer.tokens.punctuation.on_start(
tokenizer, doc, pos, match)
return ret

ret = yield from super().on_start(tokenizer, doc, pos, match)
return ret
else:
ret = yield from super().on_start(tokenizer, doc, pos, match)
return ret

def resume_pos(self, highlighter, tokenizer, doc, pos):
t, token, p = self.get_prev_token(tokenizer, doc, pos)
Expand All @@ -58,7 +68,7 @@ def resume_pos(self, highlighter, tokenizer, doc, pos):
def build_tokenizer(stop=None, terminates=None):
JSTOKENS = namedtuple(
'jstokens', ['stop', 'keywords', 'number', 'comment1', 'comment2',
'string1', 'string2', 'regex', 'punctuation1', 'punctuation2'])
'string1', 'string2', 'regex', 'punctuation'])

keywords = Keywords(
'javascript-keyword', 'keyword',
Expand All @@ -80,15 +90,13 @@ def build_tokenizer(stop=None, terminates=None):
regex = JSRegex('javascript-regex', 'string',
r'/', r'/\w*', escape='\\')

punctuation1 = SingleToken('javascript-punctuation1', 'default',
[r'[+\-*/~&?:|=%;<>^({[,:]'])
punctuation = SingleToken('javascript-punctuation', 'default',
[r'[+\-*/~&?:|=%;<>^(){}[],:'])

punctuation2 = SingleToken('javascript-punctuation2', 'default',
[r'\S'])

tokens = JSTOKENS(
stop, keywords, number, comment1, comment2, string1, string2,
regex, punctuation1, punctuation2)
regex, punctuation)

return Tokenizer(tokens, terminates=terminates)

Expand Down
38 changes: 13 additions & 25 deletions kaa/highlight.py
@@ -1,4 +1,5 @@
import collections
import kaa
from kaa import document
from kaa import doc_re

Expand Down Expand Up @@ -38,6 +39,15 @@ def find_token_top(self, doc, pos):
return p + 1
return 0

def find_token_end(self, doc, pos):
# Returns end of current keyword
if 0 < pos < len(doc.styles):
p = doc.styles.findint(self.get_tokenids(), pos, len(doc.styles),
comp_ne=True)
if p != -1:
return p
return len(doc.styles)

def get_prev_token(self, tokenizer, doc, pos):
"""Find previous token"""

Expand Down Expand Up @@ -203,6 +213,7 @@ def find_token_top(self, doc, pos):
def resume_pos(self, highlighter, tokenizer, doc, pos):
ret = self.find_token_top(doc, pos)
if ret > 0:
# resume highlight before this token
return highlighter.get_resume_pos(doc, ret)
return 0

Expand Down Expand Up @@ -487,6 +498,8 @@ def get_prev_token(self, doc, pos):
# (e.g. white spaces)
tokenizer, token = pair
if token:
# if token is subtokenizer, get actual token inside subtokenizer.
token = token.get_token(style)
return tokenizer, token, pos

pos = doc.styles.rfindint([style], 0, pos, comp_ne=True)
Expand All @@ -497,31 +510,6 @@ def get_resume_pos(self, doc, pos):
if pos == 0:
return pos

# check a character proceeding to updated pos
# pos -= 1
# style = doc.styles.getints(pos, pos + 1)[0]
# if style == 0:
# not highlighted yet.
# p = doc.styles.rfindint([0], 0, pos, comp_ne=True)
# if p != -1:
# return p + 1
# return 0
#
# pair = self.tokenids.get(style)
# if not pair:
# Invalid. this style is not set by tokenizer.
# return 0
#
# tokenizer, token = self.tokenids.get(style)
# if not token:
# token is not defined here. (e.g. white spaces)
# resume at beggining of this non-defined area.
# p = doc.styles.rfindint([style], 0, pos, comp_ne=True)
# if p != -1:
# return p + 1
# return 0
# return token.resume_pos(self, tokenizer, doc, pos)

tokenizer, token, pos = self.get_prev_token(doc, pos)
if not token:
return 0
Expand Down
1 change: 0 additions & 1 deletion test/test_htmlmode.py
Expand Up @@ -239,7 +239,6 @@ def test_javascriptattr4(self):
def test_javascriptattr5(self):
hl = highlight.Highlighter(tokenizers=self.tokenizers)
doc = self._getdoc('''<a ona='/a/\'''')

assert [
(0, 1, hl.tokenizers[0].tokens.htmltag.span_lt),
(1, 2, hl.tokenizers[0].tokens.htmltag.span_elemname),
Expand Down
14 changes: 7 additions & 7 deletions test/test_javascriptmode.py
Expand Up @@ -19,12 +19,12 @@ def test_regex(self):

def test_not_regex(self):
hl = highlight.Highlighter(tokenizers=self.tokenizers)
doc = self._getdoc('a /abc/')
doc = self._getdoc('if a /abc/')
hl.update_style(doc)
styles = doc.styles.getints(0, 7)
assert styles == ([self.tokenizers[0].tokens.punctuation2.tokenid] +
[self.tokenizers[0].nulltoken] +
[self.tokenizers[0].tokens.punctuation1.tokenid] +
[self.tokenizers[0].tokens.punctuation2.tokenid]*3 +
[self.tokenizers[0].tokens.punctuation1.tokenid])
styles = doc.styles.getints(0, 10)
assert styles == ([self.tokenizers[0].tokens.keywords.tokenid]*2 +
[self.tokenizers[0].nulltoken]*3 +
[self.tokenizers[0].tokens.punctuation.tokenid] +
[self.tokenizers[0].nulltoken]*3 +
[self.tokenizers[0].tokens.punctuation.tokenid])

0 comments on commit a76de0d

Please sign in to comment.