Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

updated tests, ran tests on Python 2 + 3, corrected errors

  • Loading branch information...
commit 7ed41fb501a6154f33382e7e95cca26bca8ce0c4 1 parent dae178a
@proycon authored
View
11 evaluation.py
@@ -329,7 +329,7 @@ def startcommand(self, command, cwd, stdout, stderr, *arguments, **parameters):
else:
cmd += ' ' + key + str(value)
if printcommand:
- print("STARTING COMMAND: " + cmd.encode('utf-8'),file=stderr)
+ print("STARTING COMMAND: " + cmd, file=stderr)
self.begintime = datetime.datetime.now()
if not cwd:
@@ -350,7 +350,7 @@ def score(self):
def delete(self):
- raise Exception("Not implemented yet, make sure to overload the delete() method")
+ pass
def sample(self, size):
"""Return a sample of the input data"""
@@ -437,7 +437,10 @@ def __init__(self, experimentclass, inputdata, size, parameterscope, poolsize=1,
self.constraintfunc = lambda x: True
#compute all parameter combinations:
- verboseparameterscope = [ self._combine(x,y) for x,y in parameterscope.items() ]
+ if isinstance(parameterscope, dict):
+ verboseparameterscope = [ self._combine(x,y) for x,y in parameterscope.items() ]
+ else:
+ verboseparameterscope = [ self._combine(x,y) for x,y in parameterscope ]
self.parametercombinations = [ (x,0) for x in itertools.product(*verboseparameterscope) if self.constraintfunc(dict(x)) ] #generator
def _combine(self,name, values): #TODO: can't we do this inline in a list comprehension?
@@ -472,7 +475,7 @@ def test(self,i=None):
for parameters,score in self.parametercombinations:
experiment = self.ExperimentClass(data, **dict(parameters))
experiment.run()
- newparametercombinations.append( (parameters, experiment.scofe()) )
+ newparametercombinations.append( (parameters, experiment.score()) )
if self.delete:
experiment.delete()
else:
View
23 formats/folia.py
@@ -2029,15 +2029,6 @@ def relaxng(cls, includechildren=True,extraattribs = None, extraelements=None):
E = ElementMaker(namespace="http://relaxng.org/ns/structure/1.0",nsmap={None:'http://relaxng.org/ns/structure/1.0' , 'folia': "http://ilk.uvt.nl/folia", 'xml' : "http://www.w3.org/XML/1998/namespace"})
return E.define( E.element(E.text(), E.optional( E.attribute(name='offset')), E.optional( E.attribute(name='class')),name=cls.XMLTAG ), name=cls.XMLTAG, ns=NSFOLIA)
-class String(AbstractElement):
- """String"""
- ACCEPTED_DATA = (TextContent,Alignment,Description, Metric, Correction)
- XMLTAG = 'str'
- REQUIRED_ATTRIBS = ()
- OPTIONAL_ATTRIBS = (Attrib.CLASS,Attrib.ANNOTATOR,Attrib.CONFIDENCE, Attrib.DATETIME)
- ANNOTATIONTYPE = AnnotationType.STRING
- OCCURRENCES = 0 #Number of times this element may occur in its parent (0=unlimited)
- OCCURRENCESPERSET = 0 #Number of times this element may occur per set (0=unlimited)
class Linebreak(AbstractStructureElement):
@@ -2667,6 +2658,8 @@ def resolve(self):
return l
+
+
class ErrorDetection(AbstractExtendedTokenAnnotation):
ANNOTATIONTYPE = AnnotationType.ERRORDETECTION
XMLTAG = 'errordetection'
@@ -2860,6 +2853,16 @@ def select(self, cls, set=None, recursive=True, ignorelist=[], node=None):
Original.ACCEPTED_DATA = (AbstractTokenAnnotation, Word, TextContent, Correction, Description, Metric)
+
+class String(AbstractElement):
+ """String"""
+ ACCEPTED_DATA = (TextContent,Alignment,Description, Metric, Correction)
+ XMLTAG = 'str'
+ REQUIRED_ATTRIBS = ()
+ OPTIONAL_ATTRIBS = (Attrib.CLASS,Attrib.ANNOTATOR,Attrib.CONFIDENCE, Attrib.DATETIME)
+ ANNOTATIONTYPE = AnnotationType.STRING
+ OCCURRENCES = 0 #Number of times this element may occur in its parent (0=unlimited)
+ OCCURRENCESPERSET = 0 #Number of times this element may occur per set (0=unlimited)
class Alternative(AbstractElement, AllowTokenAnnotation, AllowGenerateID):
"""Element grouping alternative token annotation(s). Multiple alternative elements may occur, each denoting a different alternative. Elements grouped inside an alternative block are considered dependent."""
@@ -5001,7 +5004,7 @@ def validate(filename,schema=None,deep=False):
doc = Document(tree=doc, deepvalidation=True)
XML2CLASS = {}
-for c in vars().values():
+for c in list(vars().values()):
try:
if c.XMLTAG:
XML2CLASS[c.XMLTAG] = c
View
16 search.py
@@ -55,6 +55,14 @@ def __eq__(self):
"""Implement an equality test in the derived method, based only on the state's content (not its path etc!)"""
raise Exception("Classes derived from AbstractSearchState must define an __eq__() method!")
+ def __lt__(self, other):
+ assert isinstance(other, AbstractSearchState)
+ return self.score() < other.score()
+
+ def __gt__(self, other):
+ assert isinstance(other, AbstractSearchState)
+ return self.score() > other.score()
+
def __hash__(self):
"""Return a unique hash for this state, based on its ID"""
@@ -106,9 +114,7 @@ def __init__(self, **kwargs):
self.traversed = 0 #Count of number of nodes visited
self.solutions = 0 #Counts the number of solutions
self.debug = 0
- if not hasattr(self,'fringe'):
- self.fringe = [] #this is just here for pylint to stop complaining
- raise Exception("No fringe initialised") #subclasses should have assigned one already!!!
+
for key, value in kwargs.items():
if key == 'graph':
self.usememory = value #search space is a graph? memory required to keep visited states
@@ -374,7 +380,7 @@ def __iter__(self):
pass
- if not self.usememory or (self.usememory and not hash(state) in self.visited):
+ if not self.usememory or (self.usememory and not hash(state) in self._visited):
self.traversed += 1
#Evaluate state
@@ -425,7 +431,7 @@ def __iter__(self):
if self.debug:
print("\t[pynlpl debug] Expanded " + str(statecount) + " states, " + str(offers) + " offered to successor pool",file=stderr)
if self.keeptraversal: self._traversal.append(state)
- if self.usememory: self.visited[hash(state)] = True
+ if self.usememory: self._visited[hash(state)] = True
self.prune(state) #calls prune method (does nothing by default in this search!!!)
else:
View
16 tests/cgn.py
@@ -12,14 +12,22 @@
#
#----------------------------------------------------------------
+
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
import sys
import os
import unittest
-import codecs
-sys.path.append(sys.path[0] + '/../../')
-os.environ['PYTHONPATH'] = sys.path[0] + '/../../'
-from StringIO import StringIO
+
+if sys.version < '3':
+ from StringIO import StringIO
+else:
+ from io import StringIO
+
import lxml.etree
from pynlpl.formats import cgn
View
8 tests/datatypes.py
@@ -1,12 +1,16 @@
#!/usr/bin/env python
#-*- coding:utf-8 -*-
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from pynlpl.common import u
+
import os
import sys
import unittest
-sys.path.append(sys.path[0] + '/../../')
-os.environ['PYTHONPATH'] = sys.path[0] + '/../../'
from pynlpl.datatypes import PriorityQueue
View
30 tests/evaluation.py
@@ -11,14 +11,17 @@
#
#-------------------------------------------------------------
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+from pynlpl.common import u
+
import sys
import os
import unittest
import random
-sys.path.append(sys.path[0] + '/../../')
-os.environ['PYTHONPATH'] = sys.path[0] + '/../../'
-
from pynlpl.evaluation import AbstractExperiment, WPSParamSearch, ExperimentPool, ClassEvaluation
class ParamExperiment(AbstractExperiment):
@@ -43,8 +46,8 @@ def sample(inputdata,n):
class PoolExperiment(AbstractExperiment):
def start(self):
- self.runcommand('sleep',None,None,None,str(self.parameters['duration']))
- print "STARTING: sleep " + str(self.parameters['duration'])
+ self.startcommand('sleep',None,None,None,str(self.parameters['duration']))
+ print("STARTING: sleep " + str(self.parameters['duration']))
class WPSTest(unittest.TestCase):
@@ -61,10 +64,11 @@ class ExperimentPoolTest(unittest.TestCase):
def test_pool(self):
pool = ExperimentPool(4)
for i in range(0,15):
- pool.append( PoolExperiment(None, duration=random.randint(1,60)) )
+ pool.append( PoolExperiment(None, duration=random.randint(1,6)) )
for experiment in pool.run():
- print "DONE: sleep " + str(experiment.parameters['duration'])
- self.assertEqual(1, False)
+ print("DONE: sleep " + str(experiment.parameters['duration']))
+
+ self.assertTrue(True) #if we got here, no exceptions were raised and it's okay
class ClassEvaluationTest2(unittest.TestCase):
def setUp(self):
@@ -74,9 +78,9 @@ def setUp(self):
def test001(self):
e = ClassEvaluation(self.goals, self.observations)
- print
- print e
- print e.confusionmatrix()
+ print()
+ print(e)
+ print(e.confusionmatrix())
class ClassEvaluationTest(unittest.TestCase):
@@ -90,8 +94,8 @@ def test001(self):
e = ClassEvaluation(self.goals, self.observations)
print
- print e
- print e.confusionmatrix()
+ print(e)
+ print(e.confusionmatrix())
self.assertEqual(e.tp['cat'], 5)
View
1  tests/search.py
@@ -64,7 +64,6 @@ def score(self):
totaldistortion += tokendistortion
return totaldistortion
-
def expand(self):
#Operator: Swap two consecutive pairs
l = len(self.tokens)
View
7 tests/statistics.py
@@ -10,14 +10,15 @@
# Licensed under GPLv3
#
#----------------------------------------------------------------
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
import sys
import os
import unittest
-sys.path.append(sys.path[0] + '/../../')
-os.environ['PYTHONPATH'] = sys.path[0] + '/../../'
-
from pynlpl.statistics import FrequencyList, HiddenMarkovModel
from pynlpl.textprocessors import Windower
View
16 tests/textprocessors.py
@@ -12,14 +12,16 @@
#
#----------------------------------------------------------------
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
import sys
import os
import unittest
-sys.path.append(sys.path[0] + '/../../')
-os.environ['PYTHONPATH'] = sys.path[0] + '/../../'
-
-from pynlpl.textprocessors import Windower, crude_tokenizer, strip_accents, calculate_overlap
+from pynlpl.textprocessors import Windower, tokenise, strip_accents, calculate_overlap
text = "This is a test .".split(" ")
@@ -54,12 +56,12 @@ class CrudeTokenizerTest(unittest.TestCase):
def test_tokenize(self):
"""Crude tokeniser"""
global text
- self.assertEqual(crude_tokenizer("This is a test."),text)
+ self.assertEqual(tokenise("This is a test."),text)
class StripAccentTest(unittest.TestCase):
def test_strip_accents(self):
- """Strip Accents"""
- self.assertEqual(strip_accents(u"áàâãāĝŭçñßt"),"aaaaagucnt")
+ """Strip Accents"""
+ self.assertEqual(strip_accents("áàâãāĝŭçñßt"),"aaaaagucnt")
class OverlapTest(unittest.TestCase):
def test_overlap_subset(self):
View
12 textprocessors.py
@@ -33,9 +33,10 @@
import string
import io
import array
-from statistics import FrequencyList
-from datatypes import intarraytobytearray, bytearraytoint, containsnullbyte
from itertools import permutations
+from pynlpl.statistics import FrequencyList
+from pynlpl.datatypes import intarraytobytearray, bytearraytoint, containsnullbyte
+
class Windower(object):
@@ -206,9 +207,12 @@ def tokenise(line):
if buffer:
tokens.append(buffer)
buffer = ''
+ if c in string.punctuation:
+ tokens.append(c)
else:
buffer += c
- if buffer: tokens.append(buffer)
+ if buffer:
+ tokens.append(buffer)
return tokens
@@ -221,7 +225,7 @@ def strip_accents(s, encoding= 'utf-8'):
return unicodedata.normalize('NFKD', unicode(s,encoding)).encode('ASCII', 'ignore')
else:
if isinstance(s,bytes): s = str(s,encoding)
- return unicodedata.normalize('NFKD', s).encode('ASCII', 'ignore')
+ return str(unicodedata.normalize('NFKD', s).encode('ASCII', 'ignore'),'ascii')
def swap(tokens, maxdist=2):
"""Perform a swap operation on a sequence of tokens, exhaustively swapping all tokens up to the maximum specified distance. This is a subset of all permutations."""
Please sign in to comment.
Something went wrong with that request. Please try again.