/
example.py
executable file
·98 lines (71 loc) · 4.34 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python
#####################################################################
# SemEval 2014 task 5 - L2 Writing Assistant
# Example system in Python
#
#
# This example system takes two arguments, the trial/test set and the file to
# output to.
#
# Instead of translating the fragments from L2 to L1. This example
# simply converts the fragments to uppercase.
#
#####################################################################
#We import from the future to get Python 3 like functionality in Python 2.6+, if you're already on three this does nothing
from __future__ import print_function, unicode_literals, division, absolute_import
import sys
#Here we import the format library for this task:
import libsemeval2014task5.format as format
try:
inputfilename, outputfilename = sys.argv[1:]
except:
print("ERROR: Specify a trial/test set to load and an output filename",file=sys.stderr)
sys.exit(2)
#We open the XML reader
reader = format.Reader(inputfilename)
#At the same time, we open the XML writer for writing the system output
writer = format.Writer(outputfilename, reader.L1, reader.L2)
#Then we iterate over all sentence pairs in the set
for sentencepair in reader:
#The following attributes are available
# - sentencepair.id
# - sentencepair.L1
# - sentencepair.L2
# - sentencepair.input - A tuple of words and a Fragment instance, representing the input sentence (L2) with L1 fragment
# - sentencepair.output - A tuple of words and a Fragment instance , representing the reference sentence (L2) with the system translation of the fragment (L2)
# - sentencepair.ref - A tuple of words and a Fragment instance, representing the reference sentence (L2) with the correct L2 fragment
#We iterate over the fragment in the input (there will be only one iteration by default, each sentence in this task only has one fragment)
for leftcontext, fragment, rightcontent in sentencepair.inputfragments():
#leftcontext is a tuple of words to the left of the fragment
#rightcontent is a tuple of words to the right of the fragment
#fragment is an instance of format.Fragment. fragment.value is a tuple of the words that are in the fragment, or None if unknown
assert isinstance(fragment, format.Fragment)
###############################################################################################
#Convert the fragment's textual value to uppercase, this is our dummy translation step
translatedvalue = [x.upper() for x in fragment.value]
###############################################################################################
#now we create a new fragment for the new value, it must carry the same ID
translatedfragment = format.Fragment(tuple(translatedvalue), fragment.id)
#if you can not provide a translation at all (the system doesn't know),
#then you can create an empty fragment as follows:
# translatedfragment = format.Fragment(None, fragment.id)
#In out-of-five mode, you may add up to four extra alternatives:
#This you can do as follows:
# translatedfragment.alternatives.append( format.Alternative(tuple("your","alternative")) )
#Now we can set the system output by copying the input sentence
#(the context after all will stay the same, we only change the fragment)
#And then replacing the old fragment with the translated one. All done in this single method:
sentencepair.output = sentencepair.replacefragment(fragment, translatedfragment, sentencepair.input)
#write this modified sentencepair to the output file, using the writer:
writer.write(sentencepair)
#(If you want to create a new sentencepair from scratch, use something like:
# newsentencepair = format.SentencePair(id,input,output,ref)
#the arguments input,output,ref may be set to None if not applicable
#Just for fun, let's also print the input and output with nice coloured markers. Omit the two arguments to get a plain string
print("Input: " + sentencepair.inputstr(True,"blue"))
print("Output: " + sentencepair.outputstr(True,"yellow"))
#There will be only one iteration, so this is not necessary, but just to prevent confusion:
break
#Don't forget to close the reader and writer
writer.close()
reader.close()