Skip to content

Commit

Permalink
Merge pull request #81 from rillian/warnings
Browse files Browse the repository at this point in the history
Fix remaining travis warnings
  • Loading branch information
ageorgou committed Jun 25, 2019
2 parents c32960b + 6959cd9 commit 61cc9e5
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 50 deletions.
14 changes: 4 additions & 10 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,6 @@ before_install:
jython -c "print ''";
jython -c "import sys; print sys.version"
fi
if [[ $TRAVIS_OS_NAME == 'osx' ]]; then
brew update
brew upgrade
brew upgrade python
brew install python3
python3 --version
fi
install:
- |
Expand All @@ -55,7 +48,8 @@ install:
fi
$PIP install wheel
$PIP install setuptools
$PIP install ply pep8 mako
$PIP install ply mako
$PIP install pycodestyle
if [ "$MYPYTHON" != "jython" ]; then
$PIP install --upgrade pytest pytest-cov codecov
fi
Expand All @@ -67,12 +61,12 @@ script:
$MYPYTHON -c "from pyoracc import _generate_parsetab; _generate_parsetab()"
echo "Running tests"
if [ "$MYPYTHON" == "jython" ]; then
py.test
pytest
else
pytest --cov=pyoracc
fi
- pep8 --exclude=parsetab.py .
- pycodestyle --exclude=parsetab.py

after_success:
- |
Expand Down
80 changes: 40 additions & 40 deletions pyoracc/atf/common/atflex.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,16 @@ def resolve_keyword(self, value, source, fallback=None, extra=None):

states = AtfLexicon.STATES

t_AMPERSAND = r'\&'
t_HASH = r'\#'
t_EXCLAIM = r'\!'
t_AMPERSAND = r'&'
t_HASH = r'#'
t_EXCLAIM = r'!'
t_QUERY = r'\?'
t_STAR = r'\*'
t_DOLLAR = r'\$'
t_MINUS = r'\-'
t_FROM = r'\<\<'
t_TO = r'\>\>'
t_COMMA = r'\,'
t_MINUS = r'-'
t_FROM = r'<<'
t_TO = r'>>'
t_COMMA = r','
t_PARBAR = r'\|\|'

t_INITIAL_transctrl_PARENTHETICALID = r'\([^\n\r]*\)'
Expand All @@ -88,22 +88,22 @@ def t_INITIAL_transctrl_WHITESPACE(self, t):
# NO TOKEN

def t_MULTILINGUAL(self, t):
r'\=\='
r'=='
t.lexer.push_state("text")
return t

def t_EQUALBRACE(self, t):
r'^\=\{'
r'^=\{'
t.lexer.push_state('text')
return t

def t_EQUALS(self, t):
r'\='
r'='
t.lexer.push_state('flagged')
return t

def t_INITIAL_parallel_labeled_COMMENT(self, t):
r'^\#+(?![a-zA-Z]+\:)'
r'^#+(?![a-zA-Z]+:)'
# Negative lookahead to veto protocols as comments
t.lexer.push_state('absorb')
return t
Expand All @@ -121,7 +121,7 @@ def t_NEWLINE(self, t):
return t

def t_INITIAL_parallel_labeled_ATID(self, t):
r'^\@[a-zA-Z][a-zA-Z0-9\[\]]*\+?'
r'^@[a-zA-Z][a-zA-Z0-9\[\]]*\+?'
t.value = t.value[1:]
t.lexpos += 1
t.type = self.resolve_keyword(t.value,
Expand Down Expand Up @@ -171,13 +171,13 @@ def t_INITIAL_parallel_labeled_ATID(self, t):
return t

def t_labeled_OPENR(self, t):
r'\@\('
r'@\('
t.lexer.push_state("para")
t.lexer.push_state("transctrl")
return t

def t_INITIAL_parallel_labeled_HASHID(self, t):
r'\#[a-zA-Z][a-zA-Z0-9\[\]]+\:'
r'#[a-zA-Z][a-zA-Z0-9\[\]]+:'
# Note that \:? absorbs a trailing colon in protocol keywords
t.value = t.value[1:-1]
t.lexpos += 1
Expand Down Expand Up @@ -213,19 +213,19 @@ def t_INITIAL_parallel_labeled_HASHID(self, t):
return t

def t_LINELABEL(self, t):
r'^[^\ \t\n]*\.'
r'^[^ \t\n]*\.'
t.value = t.value[:-1]
t.lexer.push_state('text')
return t

def t_score_SCORELABEL(self, t):
r'^[^.:\ \t\#][^.:\ \t]*\:'
r'^[^.: \t#][^.: \t]*:'
t.value = t.value[:-1]
t.lexer.push_state('text')
return t

def t_ID(self, t):
u'[a-zA-Z0-9][a-zA-Z\'\u2019\xb4\/\.0-9\:\-\[\]_\u2080-\u2089]*'
r'[a-zA-Z0-9][a-zA-Z0-9/.:_\-\[\]' u'\'\u2019\xb4\u2080-\u2089]*'
t.value = t.value.replace(u'\u2019', "'")
t.value = t.value.replace(u'\xb4', "'")
t.type = self.resolve_keyword(t.value,
Expand Down Expand Up @@ -271,7 +271,7 @@ def t_flagged_text_lemmatize_transctrl_nonequals_absorb_NEWLINE(self, t):
# Unicode 2032 is PRIME
# All of these could be used as prime
def t_transctrl_ID(self, t):
u'[a-zA-Z0-9][a-zA-Z\'\u2019\u2032\u02CA\xb4\/\.0-9\:\-\[\]_' \
r'[a-zA-Z0-9][a-zA-Z0-9/.:_\-\[\]' u'\'\u2019\u2032\u02CA\xb4' \
u'\u2080-\u2089]*'
t.value = t.value.replace(u'\u2019', "'")
t.value = t.value.replace(u'\u2032', "'")
Expand Down Expand Up @@ -306,7 +306,7 @@ def t_transctrl_ID(self, t):
t_parallel_QUERY = r'\?'

def t_parallel_LINELABEL(self, t):
r'^([^\.\ \t]*)\.[\ \t]*'
r'^([^. \t]*)\.[ \t]*'
t.value = t.value.strip(" \t.")
return t

Expand All @@ -315,7 +315,7 @@ def t_parallel_labeled_DOLLAR(self, t):
t.lexer.push_state("absorb")
return t

t_transctrl_MINUS = r'\-\ '
t_transctrl_MINUS = r'- '

def t_transctrl_CLOSER(self, t):
r'\)'
Expand Down Expand Up @@ -347,12 +347,12 @@ def t_labeled_NEWLINE(self, t):
# Flag characters (#! etc ) don't apply in translations
# But reference anchors ^1^ etc do.
# lines beginning with a space are continuations
white = r'[\ \t]*'
white = r'[ \t]*'
# translation_regex1 and translation_regex2 are identical appart from the
# fact that the first character may not be a ?
# We are looking for a string that does not start with ? it may include
# newlines if they are followed by a whitespace.
translation_regex1 = r'([^\?\^\n\r]|([\n\r](?=[ \t])))'
translation_regex1 = r'([^?\^\n\r]|([\n\r](?=[ \t])))'
translation_regex2 = r'([^\^\n\r]|([\n\r](?=[ \t])))*'
translation_regex = white + translation_regex1 + translation_regex2 + white

Expand All @@ -366,7 +366,7 @@ def t_parallel_interlinear_ID(self, t):
return t

def t_parallel_labeled_AMPERSAND(self, t):
r'\&'
r'&'
# New document, so leave translation state
t.lexer.pop_state()
return t
Expand All @@ -383,9 +383,9 @@ def t_parallel_labeled_AMPERSAND(self, t):
# Used for states where only flag# characters! and ^1^ references
# Are separately tokenised

nonflagnonwhite = r'[^\ \t\#\!\^\*\?\n\r\=]'
internalonly = r'[^\n\^\r\=]'
nonflag = r'[^\ \t\#\!\^\*\?\n\r\=]'
nonflagnonwhite = r'[^ \t#!\^*?\n\r=]'
internalonly = r'[^\n\^\r=]'
nonflag = r'[^ \t#!\^*?\n\r=]'
many_int_then_nonflag = '(' + internalonly + '*' + nonflag + '+' + ')'
many_nonflag = nonflag + '*'
intern_or_nonflg = '(' + many_int_then_nonflag + '|' + many_nonflag + ')'
Expand All @@ -399,17 +399,17 @@ def t_flagged_ID(self, t):
t.value = t.value.strip()
return t

t_flagged_HASH = r'\#'
t_flagged_EXCLAIM = r'\!'
t_flagged_HASH = r'#'
t_flagged_EXCLAIM = r'!'
t_flagged_QUERY = r'\?'
t_flagged_STAR = r'\*'
t_flagged_parallel_para_HAT = r'[\ \t]*\^[\ \t]*'
t_flagged_EQUALS = r'\='
t_flagged_parallel_para_HAT = r'[ \t]*\^[ \t]*'
t_flagged_EQUALS = r'='
# --- Rules for paragaph state----------------------------------
# Free text, ended by double new line

terminates_para = \
"(\#|\@[^i][^\{]|\&|\Z|(^[0-9]+[\'\u2019\u2032\u02CA\xb4]?\.))"
r'(#|@[^i][^{]|&|\Z|(^[0-9]+' u'[\'\u2019\u2032\u02CA\xb4]?\\.))'

@lex.TOKEN(r'([^\^\n\r]|(\r?\n(?!\s*\r?\n)(?!' +
terminates_para + ')))+')
Expand All @@ -428,9 +428,9 @@ def t_para_NEWLINE(self, t):
# BUT, exceptionally to fix existing bugs in active members of corpus,
# it is also ended by an @label or an @(), or a new document,
# Or a linelabel, or the end of the stream. Importantly it does not end
# by @i{xxx} which is used for un translated words.
# and these tokens are not absorbed by this token
# Translation paragraph state is ended by a double newline
# by @i{xxx} which is used for untranslated words.
# Those tokens are not absorbed by this token.
# Translation paragraph state is ended by a double newline.
@lex.TOKEN(r'\r?\n(?=' + terminates_para + ')')
def t_para_MAGICNEWLINE(self, t):
t.lexer.lineno += t.value.count("\n")
Expand All @@ -441,11 +441,11 @@ def t_para_MAGICNEWLINE(self, t):
# --- RULES FOR THE nonequals STATE -----
# Absorb everything except an equals
def t_nonequals_ID(self, t):
r'[^\=\n\r]+'
r'[^=\n\r]+'
t.value = t.value.strip()
return t

t_nonequals_EQUALS = r'\='
t_nonequals_EQUALS = r'='

# --- RULES FOR THE absorb STATE -----
# Absorb everything
Expand All @@ -455,15 +455,15 @@ def t_absorb_ID(self, t):
return t

# --- RULES FOR THE text STATE ----
t_text_ID = r'[^\ \t \n\r]+'
t_text_ID = r'[^ \t\n\r]+'

def t_text_SPACE(self, t):
r'[\ \t]'
r'[ \t]'
# No token generated

# --- RULES FOR THE lemmatize STATE
t_lemmatize_ID = r'[^\;\n\r]+'
t_lemmatize_SEMICOLON = r'\;[\ \t]*'
t_lemmatize_ID = r'[^;\n\r]+'
t_lemmatize_SEMICOLON = r';[ \t]*'

# Error handling rule
def t_ANY_error(self, t):
Expand Down
47 changes: 47 additions & 0 deletions pyoracc/test/atf/test_atflexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,19 @@ def test_hash_note_UPPERCASE(self):
"LINELABEL"] + ["ID"] * 6 + ["NEWLINE", "NOTE", "ID", "NEWLINE"]
)

def test_hash_note_multiline(self):
# Notes can be free text until a double-newline.
line = "a-šar _saḫar.ḫi.a_ bu-bu-su-nu"
self.compare_tokens(
"1. " + line + "\n" +
"#note: Does this combine with the next line?\n"
"It should.\n\n",
["LINELABEL"] + ["ID"] * len(line.split()) + ["NEWLINE"] +
["NOTE", "ID", "NEWLINE"],
['1'] + line.split() +
[None, None, "Does this combine with the next line?\nIt should."]
)

def test_open_text_with_dots(self):
# This must not come out as a linelabel of Hello.
self.compare_tokens(
Expand Down Expand Up @@ -896,6 +909,40 @@ def test_note_ended_by_strucuture(self):
["REVERSE"]
)

def compare_note_ended_by_line(self, line_label):
'Helper for Note para state termination.'
# Sample text.
line1 = u"a-šar _saḫar.ḫi.a_ bu-bu-su-nu"
line2 = u"a-kal-ši-na ṭi-id-di"
# Generate the successive line numbers in the same style.
label1 = line_label
next_label = int(label1[:1]) + 1
if _pyversion() == 2:
label2 = unicode(next_label) + label1[1:]
else:
label2 = str(next_label) + label1[1:]
self.compare_tokens(
label1 + ". " + line1 + "\n" +
"#note: Does this combine with the next line?\n" +
label2 + ". " + line2 + "\n",
["LINELABEL"] + ["ID"] * len(line1.split()) + ["NEWLINE"] +
["NOTE", "ID", "NEWLINE"] +
["LINELABEL"] + ["ID"] * len(line2.split()) + ["NEWLINE"],
[label1] + line1.split() +
[None, None, "Does this combine with the next line?", None] +
[label2] + line2.split() + [None]
)

def test_note_ended_by_line(self):
'Notes can be free text until the next line label.'
for label in ["1",
"2'",
u"3\u2019",
u"4\u2032",
u"5\u02CA",
u"6\xb4"]:
self.compare_note_ended_by_line(label)

def test_milestone(self):
self.compare_tokens(
"@tablet\n" +
Expand Down

0 comments on commit 61cc9e5

Please sign in to comment.