Skip to content
Browse files

added packaging for Python 3 + fixes

  • Loading branch information...
1 parent 7b949c7 commit dae178a2f90144d7fc50b409d0fb49d8aa8a17df @proycon committed
Showing with 30 additions and 19 deletions.
  1. +1 −1 clients/freeling.py
  2. +4 −2 formats/folia.py
  3. +15 −5 setup.py
  4. +4 −4 statistics.py
  5. +5 −6 tagger.py
  6. +1 −1 textprocessors.py
View
2 clients/freeling.py
@@ -44,7 +44,7 @@ def process(self, sourcewords, debug=False):
"""Process a list of words, passing it to the server and realigning the output with the original words"""
if isinstance( sourcewords, list ) or isinstance( sourcewords, tuple ):
- sourcewords_s = u" ".join(sourcewords)
+ sourcewords_s = " ".join(sourcewords)
else:
sourcewords_s = sourcewords
sourcewords = sourcewords.split(' ')
View
6 formats/folia.py
@@ -57,6 +57,8 @@
FOLIAVERSION = '0.9.1'
LIBVERSION = '0.9.1.31' #== FoLiA version + library revision
+#0.9.1.31 is the first version with Python 3 support
+
NSFOLIA = "http://ilk.uvt.nl/folia"
NSDCOI = "http://lands.let.ru.nl/projects/d-coi/ns/1.0"
@@ -1842,7 +1844,7 @@ def __init__(self, doc, *args, **kwargs):
self.value = u(kwargs['value'])
del kwargs['value']
elif not kwargs['value']:
- self.value = u""
+ self.value = ""
del kwargs['value']
else:
raise Exception("Invalid value: " + repr(kwargs['value']))
@@ -4395,7 +4397,7 @@ def words(self, index = None):
def text(self, retaintokenisation=False):
"""Returns the text of the entire document (returns a unicode instance)"""
- s = u""
+ s = ""
for c in self.data:
if s: s += "\n\n\n"
try:
View
20 setup.py
@@ -1,26 +1,34 @@
#! /usr/bin/env python
# -*- coding: utf8 -*-
+from __future__ import print_function
+
+
import os
import sys
from setuptools import setup, find_packages
os.chdir(os.path.dirname(sys.argv[0]))
if not os.path.exists('pynlpl'):
- print >>sys.stderr, "Preparing build"
- if not os.path.exists('build'): os.mkdir('build')
+ print("Preparing build",file=sys.stderr)
+ if os.path.exists('build'):
+ os.system('rm -Rf build')
+ os.mkdir('build')
os.chdir('build')
if not os.path.exists('pynlpl'): os.mkdir('pynlpl')
os.system('cp -Rpdf ../* pynlpl/ 2> /dev/null')
os.system('mv -f pynlpl/setup.py pynlpl/setup.cfg .')
- os.system('cp -f pynlpl/README .')
+ os.system('cp -f pynlpl/README .')
+
+ #Do not include unfininished WIP modules:
+ os.system('rm -f pynlpl/formats/colibri.py pynlpl/formats/alpino.py pynlpl/foliaprocessing.py pynlpl/grammar.py')
def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()
setup(
name = "PyNLPl",
- version = "0.5.4.3",
+ version = "0.6.0",
author = "Maarten van Gompel",
author_email = "proycon@anaproy.nl",
description = ("PyNLPl, pronounced as 'pineapple', is a Python library for Natural Language Processing. It contains various modules useful for common, and less common, NLP tasks. PyNLPl can be used for example the computation of n-grams, frequency lists and distributions, language models. There are also more complex data types, such as Priority Queues, and search algorithms, such as Beam Search."),
@@ -33,6 +41,8 @@ def read(fname):
"Development Status :: 4 - Beta",
"Topic :: Text Processing :: Linguistic",
"Programming Language :: Python :: 2.6",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3",
"Operating System :: POSIX",
"Intended Audience :: Developers",
"Intended Audience :: Science/Research",
@@ -40,5 +50,5 @@ def read(fname):
],
#include_package_data=True,
#package_data = {'': ['*.wsgi','*.js','*.xsl','*.gif','*.png','*.xml','*.html','*.jpg','*.svg','*.rng'] },
- install_requires=['lxml >= 2.2']
+ install_requires=['lxml >= 2.2','httplib2 >= 0.6']
)
View
8 statistics.py
@@ -21,7 +21,7 @@
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
-from pynlpl.common import u
+from pynlpl.common import u, isstring
import sys
if sys.version < '3':
from codecs import getwriter
@@ -181,10 +181,10 @@ def output(self,delimiter = '\t', addnormalised=False):
for type, count in self:
if isinstance(type,tuple) or isinstance(type,list):
if addnormalised:
- yield u" ".join((u(x) for x in type)) + delimiter + str(count) + delimiter + str(count/self.total)
+ yield " ".join((u(x) for x in type)) + delimiter + str(count) + delimiter + str(count/self.total)
else:
- yield u" ".join((u(x) for x in type)) + delimiter + str(count)
- elif isinstance(type,str) or (sys.version < '3' and isinstance(type,unicode)):
+ yield " ".join((u(x) for x in type)) + delimiter + str(count)
+ elif isstring(type):
if addnormalised:
yield type + delimiter + str(count) + delimiter + str(count/self.total)
else:
View
11 tagger.py
@@ -178,12 +178,11 @@ def process(self, words, debug=False):
postags.append('?')
return words, postags, lemmas
elif self.mode == 'treetagger':
- s = u' '.join(words)
- if isinstance(s, unicode):
- s = s.encode('utf-8')
+ s = " ".join(words)
+ s = u(s)
p = subprocess.Popen([self.tagger], shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- (out, err) = p.communicate(s)
+ (out, err) = p.communicate(s.encode('utf-8'))
newwords = []
postags = []
@@ -215,7 +214,7 @@ def flush(sentences):
for sentence in sentences:
out = ""
p = subprocess.Popen([self.tagger], shell=False, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- (results, err) = p.communicate(u"\n".join(sentences).encode('utf-8'))
+ (results, err) = p.communicate("\n".join(sentences).encode('utf-8'))
for line in results.split('\n'):
line = line.strip()
if line:
@@ -276,7 +275,7 @@ def tag(self, f_in, f_out,oneperline=False, debug=False):
if line.strip():
words = line.strip().split(' ')
words, postags, lemmas = self.process(words, debug)
- out = u""
+ out = ""
for word, pos, lemma in zip(words,postags, lemmas):
if word is None: word = ""
if lemma is None: lemma = "?"
View
2 textprocessors.py
@@ -109,7 +109,7 @@ def __init__(self, stream, filternontext=True):
self.filternontext = filternontext
def __iter__(self):
- eosmarkers = ('.',':','?','!','"',"'",u"",u"",u"")
+ eosmarkers = ('.',':','?','!','"',"'","","","")
emptyline = 0
buffer = ""
for line in self.stream:

0 comments on commit dae178a

Please sign in to comment.
Something went wrong with that request. Please try again.