Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

updated tools for Python 3 compatibility

  • Loading branch information...
commit 9302cd21843dda046b7c87f16deb01b7c5289a10 1 parent 95945a7
@proycon authored
View
8 tools/classdecode.py
@@ -1,9 +1,15 @@
#!/usr/bin/env python
#-*- coding:utf-8 -*-
+
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
from pynlpl.textprocessors import Classer
import sys
classer = Classer(sys.argv[1])
for line in classer.decodefile(sys.argv[2]):
- print " ".join(line).encode('utf-8')
+ print(" ".join(line))
View
8 tools/classencode.py
@@ -1,6 +1,12 @@
#!/usr/bin/env python
#-*- coding:utf-8 -*-
+
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
from pynlpl.textprocessors import Classer
from pynlpl.statistics import FrequencyList
import sys
@@ -12,7 +18,7 @@
freqlist = FrequencyList()
for i, line in enumerate(f):
if (i % 10000 == 0):
- print >>sys.stderr, "\tLine " + str(i+1)
+ print("\tLine " + str(i+1),file=sys.stderr)
line = ['<s>'] + line.strip().split(' ') + ['</s>']
freqlist.append(line)
View
35 tools/freqlist.py
@@ -12,30 +12,31 @@
#
###############################################################
+
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
import getopt
import sys
-import os
import codecs
-if __name__ == "__main__":
- sys.path.append(sys.path[0] + '/../..')
- os.environ['PYTHONPATH'] = sys.path[0] + '/../..'
-
from pynlpl.statistics import FrequencyList, Distribution
from pynlpl.textprocessors import Windower, crude_tokenizer
def usage():
- print >>sys.stderr,"freqlist.py -n 1 file1 (file2) etc.."
- print >>sys.stderr,"\t-n number n-gram size (default: 1)"
- print >>sys.stderr,"\t-i case-insensitve"
- print >>sys.stderr,"\t-e encoding (default: utf-8)"
+ print("freqlist.py -n 1 file1 (file2) etc..",file=sys.stderr)
+ print("\t-n number n-gram size (default: 1)",file=sys.stderr)
+ print("\t-i case-insensitve",file=sys.stderr)
+ print("\t-e encoding (default: utf-8)",file=sys.stderr)
try:
opts, files = getopt.getopt(sys.argv[1:], "hn:ie:", ["help"])
-except getopt.GetoptError, err:
+except getopt.GetoptError as err:
# print help information and exit:
- print str(err)
+ print(str(err),file=sys.stderr)
usage()
sys.exit(2)
@@ -52,7 +53,7 @@ def usage():
elif o == "-e":
encoding = a
else:
- print >>sys.stderr, "ERROR: Unknown option:",o
+ print("ERROR: Unknown option:",o,file=sys.stderr)
sys.exit(1)
if not files:
@@ -75,11 +76,11 @@ def usage():
if isinstance(type,tuple) or isinstance(type,list):
type = " ".join(type)
s = type + "\t" + str(count) + "\t" + str(dist[type]) + "\t" + str(dist.information(type))
- print s.encode('utf-8')
+ print(s)
-print >>sys.stderr, "Tokens: ", freqlist.tokens()
-print >>sys.stderr, "Types: ", len(freqlist)
-print >>sys.stderr, "Type-token ratio: ", freqlist.typetokenratio()
-print >>sys.stderr, "Entropy: ", dist.entropy()
+print("Tokens: ", freqlist.tokens(),file=sys.stderr)
+print("Types: ", len(freqlist),file=sys.stderr)
+print("Type-token ratio: ", freqlist.typetokenratio(),file=sys.stderr)
+print("Entropy: ", dist.entropy(),file=sys.stderr)
View
22 tools/reflow.py
@@ -1,28 +1,28 @@
#! /usr/bin/env python
# -*- coding: utf8 -*-
-import codecs
+
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
import sys
-import os
+import io
import getopt
-if __name__ == "__main__":
- sys.path.append(sys.path[0] + '/../..')
- os.environ['PYTHONPATH'] = sys.path[0] + '/../..'
-
from pynlpl.textprocessors import ReflowText
try:
opts, args = getopt.getopt(sys.argv[1:], "to:")
-except getopt.GetoptError, err:
+except getopt.GetoptError as err:
# print help information and exit:
- print str(err) # will print something like "option -a not recognized"
- self.usage()
+ print(str(err)) # will print something like "option -a not recognized"
sys.exit(2)
for filename in sys.argv[1:]:
- f = codecs.open(filename, 'r', 'utf-8')
+ f = io.open(filename, 'r', encoding='utf-8')
for line in ReflowText(f):
- print line.encode('utf-8')
+ print(line)
f.close()
View
25 tools/sampler.py
@@ -16,13 +16,14 @@
#
###############################################################
+
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import division
+from __future__ import absolute_import
+
import getopt
import sys
-import os
-
-if __name__ == "__main__":
- sys.path.append(sys.path[0] + '/../..')
- os.environ['PYTHONPATH'] = sys.path[0] + '/../..'
from pynlpl.evaluation import filesampler
@@ -33,9 +34,9 @@ def usage():
try:
opts, args = getopt.getopt(sys.argv[1:], "ht:d:", ["help"])
-except getopt.GetoptError, err:
+except getopt.GetoptError as err:
# print help information and exit:
- print str(err)
+ print(str(err))
usage()
sys.exit(2)
@@ -49,7 +50,7 @@ def usage():
try:
testsetsize = float(a)
except:
- print >>sys.stderr,"ERROR: Invalid testsize"
+ print("ERROR: Invalid testsize",file=sys.stderr)
sys.exit(2)
elif o == "-d":
try:
@@ -58,21 +59,21 @@ def usage():
try:
devsetsize = float(a)
except:
- print >>sys.stderr,"ERROR: Invalid devsetsize"
+ print("ERROR: Invalid devsetsize",file=sys.stderr)
sys.exit(2)
elif o == "-h":
usage()
sys.exit(0)
else:
- print >>sys.stderr,"ERROR: No such option: ",o
+ print("ERROR: No such option: ",o,file=sys.stderr)
sys.exit(2)
if testsetsize == 0:
- print >>sys.stderr,"ERROR: Specify at least a testset size!"
+ print("ERROR: Specify at least a testset size!",file=sys.stderr)
usage()
sys.exit(2)
elif len(args) == 0:
- print >>sys.stderr,"ERROR: Specify at least one file!"
+ print("ERROR: Specify at least one file!",file=sys.stderr)
usage()
sys.exit(2)
Please sign in to comment.
Something went wrong with that request. Please try again.