Skip to content
This repository has been archived by the owner on May 22, 2019. It is now read-only.

Commit

Permalink
Apply suggestions from @vmarkovtsev
Browse files Browse the repository at this point in the history
Signed-off-by: Irina <irenekhismatullina@gmail.com>
  • Loading branch information
irinakhismatullina committed Apr 12, 2019
1 parent 41ab565 commit badee8d
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
6 changes: 3 additions & 3 deletions sourced/ml/algorithms/token_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ class TokenParser:
# if you do not want to filter small tokens set min_split_length=1.
SAVE_TOKEN_STYLE = False # whether yield metadata that can be used to reconstruct initial
# identifier.
ATTACH_UPPER = False # True to attach the last of several uppercase letters in a row to
# the next token. Example: 'HTMLResponce' -> ["html", "responce"] if True,
# 'HTMLResponce' -> ["htmlr", "esponce"] if False.
ATTACH_UPPER = True # True to attach the last of several uppercase letters in a row to
# the next token. Example: 'HTMLResponse' -> ["html", "response"] if True,
# 'HTMLResponse' -> ["htmlr", "esponse"] if False.

def __init__(self, stem_threshold=STEM_THRESHOLD, max_token_length=MAX_TOKEN_LENGTH,
min_split_length=MIN_SPLIT_LENGTH, single_shot=DEFAULT_SINGLE_SHOT,
Expand Down
4 changes: 2 additions & 2 deletions sourced/ml/tests/test_token_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

class TokenParserTests(unittest.TestCase):
def setUp(self):
self.tp = TokenParser(stem_threshold=4, max_token_length=20)
self.tp = TokenParser(stem_threshold=4, max_token_length=20, attach_upper=False)
self.tp._single_shot = False

def test_process_token(self):
Expand Down Expand Up @@ -55,7 +55,7 @@ def test_process_token(self):

def test_process_token_with_attach_upper(self):
tp = TokenParser(stem_threshold=100, single_shot=True, max_token_length=100,
min_split_length=1, attach_upper=True)
min_split_length=1)
tokens = [
("UpperCamelCase", ["upper", "camel", "case"]),
("camelCase", ["camel", "case"]),
Expand Down

0 comments on commit badee8d

Please sign in to comment.