This repository has been archived by the owner on Apr 23, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
translate
executable file
·257 lines (232 loc) · 8.32 KB
/
translate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#!/usr/bin/env python
# translate
# http://perigee.tremby.net/utils/translate
#
# Patches are welcome
# Python script making use of Google's translation API to translate text from
# the commandline
# See http://code.google.com/apis/ajaxlanguage/documentation/#fonje
#
# Copyright 2009 Bart Nagel (bart@tremby.net)
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program. If not, see <http://www.gnu.org/licenses/>.
import urllib
import urllib2
import sys
import os
import re
import htmlentitydefs
import json
# json decoding function -- if Python < 2.6 assume python-json
if sys.version_info >= (2, 6):
jsondecode = json.loads
else:
jsondecode = json.read
#translateurl = "http://www.google.com/uds/Gtranslate" # used in the documentation's example code, possibly not meant for general usage
translateurl = "http://ajax.googleapis.com/ajax/services/language/translate"
def usage(stdout = False):
stream = sys.stdout if stdout else sys.stderr
bin = os.path.basename(progname)
indent = " " * (len(bin) + len("Usage: "))
print >> stream, "Usage: " + os.path.basename(progname) + " [--help|-h]"
print >> stream, indent + " [--verbose|-v]"
print >> stream, indent + " [(--source|-s) <source language>]"
print >> stream, indent + " [(--destination|--dest|-d) <destination language>]"
print >> stream, indent + " [(--preserve-newlines|-p)]"
print >> stream, indent + " [(--filename|--file|-f) <filename>|<text to translate> ...]"
print >> stream, """
Translate some text from one language to another, giving the result on standard
output.
By default, translate runs as a filter (accepting text to translate on standard
input and outputting the result to standard output).
Source language defaults to auto-detection (which can be specified with "-s -")
and destination language defaults to English. Languages should be entered as
language codes, for instance en, de, es. Inspect http://translate.google.com for
a full list of supported languages. If auto-detection is used, the language
detected will be shown on stderr.
You can set the environment variable "TRANSLATE_DEST_LANG" to your preferred
destination language to omit the "-d" parameter. For example (bash):
export TRANSLATE_DEST_LANG=de
If there are no non-option arguments the text to translate is taken from stdin
by default (which is the same as giving "-f -"). If there are non-option
arguments these are instead taken as the source text. If you want to use it this
way, it's best to give the argument "--" to show that no more options will
appear. Alternatively a file can be used as input by using the -f option.
The --preserve-newlines or -p switch can be given to preserve newlines.
All information and error messages go to standard error and so quiet operation
is possible by redirecting stderr to /dev/null, for example
translate -d de -f story_en.txt 2>/dev/null
The --verbose or -v switch enables verbose output (to stderr)."""
# unescape HTML entities
def htmlentities_decode(s):
def htmlentities_decode_single(e):
s = e.group(0)
if s[:2] == "&#":
try:
if s[:3] == "&#x":
return unichr(int(s[3:-1], 16))
else:
return unichr(int(s[2:-1]))
except ValueError: pass
else:
try:
s = unichr(htmlentitydefs.name2codepoint[s[1:-1]])
except KeyError: pass
return s
return re.sub("&#?\w+;", htmlentities_decode_single, s)
# get and handle commandline arguments
verbose = False
preservenewlines = False
langsource = None
langdest = "en"
nomoreoptions = False
sourcetext = None
file = None
progname = sys.argv.pop(0)
envlangdest = os.getenv("TRANSLATE_DEST_LANG")
if envlangdest is not None:
langdest = envlangdest
while len(sys.argv):
arg = sys.argv.pop(0)
if arg == "--":
nomoreoptions = True
continue
if not nomoreoptions and arg[0] == "-":
if arg == "--verbose" or arg == "-v":
verbose = True
continue
elif arg == "--help" or arg == "-h":
usage(True)
sys.exit(0)
elif arg == "--source" or arg == "-s":
try:
langsource = sys.argv.pop(0)
if langsource == "-":
langsource = None
continue
except:
print >> sys.stderr, "Option \"%s\" requires a parameter" % arg
elif arg == "--destination" or arg == "--dest" or arg == "-d":
try:
langdest = sys.argv.pop(0)
continue
except:
print >> sys.stderr, "Option \"%s\" requires a parameter" % arg
elif arg == "--filename" or arg == "--file" or arg == "-f":
if file is None:
try:
file = sys.argv.pop(0)
if file == "-":
file = sys.stdin
continue
except:
print >> sys.stderr, "Option \"%s\" requires a parameter" % arg
else:
print >> sys.stderr, "Duplicate file argument"
elif arg == "--preserve-newlines" or arg == "-p":
preservenewlines = True
continue
else:
print >> sys.stderr, "Unknown option \"%s\"" % arg
elif file is None:
if sourcetext is None:
sourcetext = arg
else:
sourcetext += " " + arg
continue
else:
print >> sys.stderr, "Unexpected argument \"%s\" since we are reading from a file or stdin" % arg
# problem with arguments
usage()
sys.exit(1)
# final error checking and setup
if file is not None and sourcetext is not None:
print >> sys.stderr, "Unexpected non-option argument since we are reading from a file or stdin"
usage()
sys.exit(1)
if file is None and sourcetext is None:
file = sys.stdin
# get source text if we don't already have it
if file is None:
if verbose: print >> sys.stderr, "Reading from non-option commandline arguments"
else:
if file == sys.stdin:
if verbose: print >> sys.stderr, "Reading from stdin"
else:
if verbose: print >> sys.stderr, "Reading from file \"%s\"" % file
try:
fileobject = open(file)
except:
print >> sys.stderr, "Could not open file \"%s\"" % file
sys.exit(2)
file = fileobject
try:
sourcetext = file.read()
file.close()
except:
print >> sys.stderr, "Could not read from file \"%s\"" % file.name
sys.exit(3)
# knock off trailing newline if we're preserving them -- Google adds one
if preservenewlines:
if sourcetext[-1:] == "\n":
sourcetext = sourcetext[0:-1]
# say what we're doing
if verbose:
print >> sys.stderr, "input text:\n-----\n%s\n-----" % sourcetext
if langsource is None:
print >> sys.stderr, "Auto-detecting input language, translating to %s" % langdest
else:
print >> sys.stderr, "Translating from %s to %s" % (langsource, langdest)
# collect data together for Google
data = [
("v", "1.0"),
("langpair", (langsource if langsource is not None else "") + "|" + langdest),
("q", sourcetext)
]
if preservenewlines:
data.append(("format", "text"))
# send request to Google
if verbose: print >> sys.stderr, "Sending data to Google, waiting for response..."
try:
request = urllib2.Request(translateurl)
request.add_data(urllib.urlencode(data))
request.add_header("Referer", "http://perigee.tremby.net/utils/translate")
response = jsondecode(urllib2.urlopen(request).read())
except:
print >> sys.stderr, "Error getting response from Google or parsing response:"
print >> sys.stderr, sys.exc_info()
sys.exit(4)
# deal with response
if response["responseStatus"] == 400:
print >> sys.stderr, "Translation failed: %s" % response["responseDetails"]
sys.exit(5)
elif response["responseStatus"] == 200:
if response["responseDetails"] is not None:
print >> sys.stderr, "Response details: %s" % response["responseDetails"]
if "detectedSourceLanguage" in response["responseData"]:
print >> sys.stderr, "Detected source language: %s" % response["responseData"]["detectedSourceLanguage"]
decodedresponse = htmlentities_decode(response["responseData"]["translatedText"])
try:
print decodedresponse
except UnicodeEncodeError:
print decodedresponse.encode("UTF-8")
except:
print >> sys.stderr, "Error printing response to stdout:"
print >> sys.stderr, sys.exc_info()
sys.exit(6)
sys.exit(0)
else:
print >> sys.stderr, "Unexpected response:"
print >> sys.stderr, response
sys.exit(128)
# vi: set ts=4 sts=4 sw=4 tw=80