This repository has been archived by the owner on Apr 19, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
126 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# -*- test-case-name: omnipresence.plugins.wwwjdic.test_wwwjdic | ||
"""Event plugins for searching WWWJDIC.""" | ||
|
||
|
||
import re | ||
import urllib | ||
|
||
from bs4 import BeautifulSoup | ||
from twisted.internet.defer import inlineCallbacks, returnValue | ||
from twisted.web.client import readBody | ||
try: | ||
from waapuro import romanize | ||
except ImportError: | ||
romanize = None | ||
|
||
from ...plugin import EventPlugin, UserVisibleError | ||
from ...web.http import default_agent | ||
|
||
|
||
#: A regex for identifying pronunciations in a JDIC entry, if present. | ||
PRONUNCIATIONS_RE = re.compile(ur'\[([^\]]+)\]') | ||
|
||
#: A regex for identifying markings at the end of a kana pronunciation. | ||
MARKINGS_RE = re.compile(ur'(?:\([^)]+\))+$') | ||
|
||
|
||
class Default(EventPlugin): | ||
def __init__(self): | ||
self.agent = default_agent | ||
self.romanize = romanize | ||
|
||
@inlineCallbacks | ||
def on_command(self, msg): | ||
if not msg.content: | ||
raise UserVisibleError('Please specify a search query.') | ||
q = urllib.quote_plus(msg.content) | ||
response = yield self.agent.request('GET', | ||
'http://www.edrdg.org/cgi-bin/wwwjdic/wwwjdic?1ZUJ{}'.format(q)) | ||
content = yield readBody(response) | ||
soup = BeautifulSoup(content) | ||
if not soup.pre: | ||
raise UserVisibleError('No results found for \x02{}\x02.' | ||
.format(msg.content)) | ||
raw = soup.pre.string.strip().splitlines() | ||
results = [] | ||
for i, result in enumerate(raw): | ||
if not result.strip(): | ||
continue | ||
# Find the kana pronunciations and add their romanizations. | ||
if self.romanize: | ||
match = PRONUNCIATIONS_RE.search(result) | ||
if match is None: | ||
pronunciations = result.split(None, 1)[0] | ||
start = 0 | ||
end = len(pronunciations) | ||
else: | ||
pronunciations = match.group(1) | ||
start = match.start(1) | ||
end = match.end(1) | ||
pronunciations = pronunciations.split(u';') | ||
with_romanizations = [] | ||
for pronunciation in pronunciations: | ||
match = MARKINGS_RE.search(pronunciation) | ||
if match is not None: | ||
pronunciation = pronunciation[:match.start()] | ||
with_romanizations.append( | ||
pronunciation + | ||
u' (' + self.romanize(pronunciation) + u')' + | ||
(u'' if match is None else u' ' + match.group(0))) | ||
result = (result[:start] + | ||
u'; '.join(with_romanizations) + | ||
result[end:]) | ||
# Strip off the trailing slash for the last gloss, then | ||
# replace the first slash with nothing and the remaining | ||
# ones with semicolons, in an approximation of the Web | ||
# interface. | ||
result = result[:-1].strip() | ||
result = result.replace(u'/', u'', 1) | ||
result = result.replace(u'/', u'; ') | ||
results.append(result) | ||
returnValue(results) | ||
|
||
def on_cmdhelp(self, msg): | ||
return collapse("""\ | ||
\x1Fquery\x1F - Look up a Japanese word or phrase in Jim | ||
Breen's WWWJDIC <http://wwwjdic.org/>. | ||
""") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# -*- coding: utf-8 | ||
"""Unit tests for the wwwjdic event plugin.""" | ||
# pylint: disable=missing-docstring,too-few-public-methods | ||
|
||
|
||
from ...message import collapse | ||
from ...test.helpers import AbstractCassetteTestCase | ||
|
||
from . import Default | ||
|
||
|
||
class WWWJDICTestCase(AbstractCassetteTestCase): | ||
command_class = Default | ||
|
||
@staticmethod | ||
def romanize(string): | ||
return 'mogiroomazi' | ||
|
||
def setUp(self): | ||
super(WWWJDICTestCase, self).setUp() | ||
self.command.romanize = WWWJDICTestCase.romanize | ||
|
||
@AbstractCassetteTestCase.use_cassette('wwwjdic/no-results') | ||
def test_no_results(self): | ||
return self.assert_error( | ||
'slartibartfast', | ||
'No results found for \x02slartibartfast\x02.') | ||
|
||
@AbstractCassetteTestCase.use_cassette('wwwjdic/some-results') | ||
def test_some_results(self): | ||
return self.assert_reply('amanogawa', map(collapse, [ | ||
u"""天の川(P);天の河(P) | ||
[あまのがわ (mogiroomazi) (P); あまのかわ (mogiroomazi)] | ||
(n) Milky Way; (P)""", | ||
u"""天の川銀河 | ||
[あまのがわぎんが (mogiroomazi); あまのかわぎんが (mogiroomazi)] | ||
(n) Milky Way Galaxy"""])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"recorded_with": "Stenographer 0.1-dev", "http_interactions": [{"response": {"status": {"message": "OK", "code": 200}, "http_version": "1.1", "body": {"base64_string": "H4sIAAAAAAAAAy2Oy07DMBBF9/mKqfdhisoCgWOptY0ISh8LV1aXUWoUq6kT7CmQv6dR2Mzo6NzRXL5Qe2lOBw3vZlvB4bipSgksR7QriaiMmsXTw/IRTKxD8uT7UHeIesdExic7Lb1Wgm+1WUNLNOTu6+a/Cyb7QC5QbsbBMWhmKhi5X8KWrt0rNG0dk6PiaN7yZya4KU2lhbX2Q5XyBWwfz6B8Grp65DjLjOP8brNXpzsNItv1cK2paV2CHxcdfPa3cL7PCNT6BBc3LqYYx/8TnGv/AYzDF4r+AAAA", "encoding": "utf-8"}, "headers": {"Content-Encoding": ["gzip"], "Access-Control-Allow-Origin": ["*"], "Date": ["Mon, 10 Aug 2015 18:21:36 GMT"], "Vary": ["Accept-Encoding"], "Server": ["Apache/2.2.29 (Unix) mod_ssl/2.2.29 OpenSSL/1.0.1f DAV/2 mod_wsgi/4.2.8 Python/2.6.4 PHP/5.4.9 mod_perl/2.0.4 Perl/v5.12.2"], "Access-Control-Allow-Credentials": ["false"], "Content-Type": ["text/html; charset=UTF-8"]}}, "recorded_at": "Mon, 10 Aug 2015 18:21:36 -0000", "request": {"method": "GET", "uri": "http://www.edrdg.org/cgi-bin/wwwjdic/wwwjdic?1ZUJslartibartfast", "body": {"string": "", "encoding": "utf-8"}, "headers": {"Accept-Encoding": ["gzip"], "User-Agent": ["Omnipresence/3.0alpha1 (+bot; https://github.com/kxz/omnipresence)"]}}}]} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"recorded_with": "Stenographer 0.1-dev", "http_interactions": [{"recorded_at": "Mon, 10 Aug 2015 18:22:46 -0000", "request": {"uri": "http://www.edrdg.org/cgi-bin/wwwjdic/wwwjdic?1ZUJamanogawa", "method": "GET", "body": {"encoding": "utf-8", "string": ""}, "headers": {"User-Agent": ["Omnipresence/3.0alpha1 (+bot; https://github.com/kxz/omnipresence)"], "Accept-Encoding": ["gzip"]}}, "response": {"headers": {"Access-Control-Allow-Credentials": ["false"], "Content-Type": ["text/html; charset=UTF-8"], "Access-Control-Allow-Origin": ["*"], "Date": ["Mon, 10 Aug 2015 18:22:46 GMT"], "Content-Encoding": ["gzip"], "Server": ["Apache/2.2.29 (Unix) mod_ssl/2.2.29 OpenSSL/1.0.1f DAV/2 mod_wsgi/4.2.8 Python/2.6.4 PHP/5.4.9 mod_perl/2.0.4 Perl/v5.12.2"], "Vary": ["Accept-Encoding"]}, "body": {"base64_string": "H4sIAAAAAAAAA7NRdPF3DokMcFXwCPH1UQgIdfLxdFZQ0tXXDzd21td3CXGBSJjoGRgqhBQl5hVnlmTm5yXm6Ou7+inZcdmAZEGUq6OLnY2va4ijQkZJSYFuamFpZpmtknN+XklqXoluSGVBqpJCMoRnq1SSWlGin1GSm2OtkJyRWFScWmIbGuKma6FkZxPiGeLjahceHu7l4ulspRCeX5Si4JJZXJCTWGmjD5HkstGHWOfk7xIJ5BWAcFGqHdfTJSsfN657un2uRoCmNYTzbNNmIEch+nFj0+PGfUCBx409j5v6QQqQhLqBQrEK+hp5mgq+mTnZlQrhiZX6QDX6CCNfdjUAzcIw6HFj3+OmyUA2unFwCXRzFdwTcxIrKvWBvgA72kYf6g19SFACADGwpz6SAQAA", "encoding": "utf-8"}, "status": {"message": "OK", "code": 200}, "http_version": "1.1"}}]} |