Skip to content
This repository has been archived by the owner on Apr 19, 2019. It is now read-only.

Commit

Permalink
wwwjdic: New plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
kxz committed Aug 10, 2015
1 parent 28dffc6 commit 3ffb2c5
Show file tree
Hide file tree
Showing 4 changed files with 126 additions and 0 deletions.
87 changes: 87 additions & 0 deletions omnipresence/plugins/wwwjdic/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# -*- test-case-name: omnipresence.plugins.wwwjdic.test_wwwjdic
"""Event plugins for searching WWWJDIC."""


import re
import urllib

from bs4 import BeautifulSoup
from twisted.internet.defer import inlineCallbacks, returnValue
from twisted.web.client import readBody
try:
from waapuro import romanize
except ImportError:
romanize = None

from ...plugin import EventPlugin, UserVisibleError
from ...web.http import default_agent


#: A regex for identifying pronunciations in a JDIC entry, if present.
PRONUNCIATIONS_RE = re.compile(ur'\[([^\]]+)\]')

#: A regex for identifying markings at the end of a kana pronunciation.
MARKINGS_RE = re.compile(ur'(?:\([^)]+\))+$')


class Default(EventPlugin):
def __init__(self):
self.agent = default_agent
self.romanize = romanize

@inlineCallbacks
def on_command(self, msg):
if not msg.content:
raise UserVisibleError('Please specify a search query.')
q = urllib.quote_plus(msg.content)
response = yield self.agent.request('GET',
'http://www.edrdg.org/cgi-bin/wwwjdic/wwwjdic?1ZUJ{}'.format(q))
content = yield readBody(response)
soup = BeautifulSoup(content)
if not soup.pre:
raise UserVisibleError('No results found for \x02{}\x02.'
.format(msg.content))
raw = soup.pre.string.strip().splitlines()
results = []
for i, result in enumerate(raw):
if not result.strip():
continue
# Find the kana pronunciations and add their romanizations.
if self.romanize:
match = PRONUNCIATIONS_RE.search(result)
if match is None:
pronunciations = result.split(None, 1)[0]
start = 0
end = len(pronunciations)
else:
pronunciations = match.group(1)
start = match.start(1)
end = match.end(1)
pronunciations = pronunciations.split(u';')
with_romanizations = []
for pronunciation in pronunciations:
match = MARKINGS_RE.search(pronunciation)
if match is not None:
pronunciation = pronunciation[:match.start()]
with_romanizations.append(
pronunciation +
u' (' + self.romanize(pronunciation) + u')' +
(u'' if match is None else u' ' + match.group(0)))
result = (result[:start] +
u'; '.join(with_romanizations) +
result[end:])
# Strip off the trailing slash for the last gloss, then
# replace the first slash with nothing and the remaining
# ones with semicolons, in an approximation of the Web
# interface.
result = result[:-1].strip()
result = result.replace(u'/', u'', 1)
result = result.replace(u'/', u'; ')
results.append(result)
returnValue(results)

def on_cmdhelp(self, msg):
return collapse("""\
\x1Fquery\x1F - Look up a Japanese word or phrase in Jim
Breen's WWWJDIC <http://wwwjdic.org/>.
""")
37 changes: 37 additions & 0 deletions omnipresence/plugins/wwwjdic/test_wwwjdic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# -*- coding: utf-8
"""Unit tests for the wwwjdic event plugin."""
# pylint: disable=missing-docstring,too-few-public-methods


from ...message import collapse
from ...test.helpers import AbstractCassetteTestCase

from . import Default


class WWWJDICTestCase(AbstractCassetteTestCase):
command_class = Default

@staticmethod
def romanize(string):
return 'mogiroomazi'

def setUp(self):
super(WWWJDICTestCase, self).setUp()
self.command.romanize = WWWJDICTestCase.romanize

@AbstractCassetteTestCase.use_cassette('wwwjdic/no-results')
def test_no_results(self):
return self.assert_error(
'slartibartfast',
'No results found for \x02slartibartfast\x02.')

@AbstractCassetteTestCase.use_cassette('wwwjdic/some-results')
def test_some_results(self):
return self.assert_reply('amanogawa', map(collapse, [
u"""天の川(P);天の河(P)
[あまのがわ (mogiroomazi) (P); あまのかわ (mogiroomazi)]
(n) Milky Way; (P)""",
u"""天の川銀河
[あまのがわぎんが (mogiroomazi); あまのかわぎんが (mogiroomazi)]
(n) Milky Way Galaxy"""]))
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"recorded_with": "Stenographer 0.1-dev", "http_interactions": [{"response": {"status": {"message": "OK", "code": 200}, "http_version": "1.1", "body": {"base64_string": "H4sIAAAAAAAAAy2Oy07DMBBF9/mKqfdhisoCgWOptY0ISh8LV1aXUWoUq6kT7CmQv6dR2Mzo6NzRXL5Qe2lOBw3vZlvB4bipSgksR7QriaiMmsXTw/IRTKxD8uT7UHeIesdExic7Lb1Wgm+1WUNLNOTu6+a/Cyb7QC5QbsbBMWhmKhi5X8KWrt0rNG0dk6PiaN7yZya4KU2lhbX2Q5XyBWwfz6B8Grp65DjLjOP8brNXpzsNItv1cK2paV2CHxcdfPa3cL7PCNT6BBc3LqYYx/8TnGv/AYzDF4r+AAAA", "encoding": "utf-8"}, "headers": {"Content-Encoding": ["gzip"], "Access-Control-Allow-Origin": ["*"], "Date": ["Mon, 10 Aug 2015 18:21:36 GMT"], "Vary": ["Accept-Encoding"], "Server": ["Apache/2.2.29 (Unix) mod_ssl/2.2.29 OpenSSL/1.0.1f DAV/2 mod_wsgi/4.2.8 Python/2.6.4 PHP/5.4.9 mod_perl/2.0.4 Perl/v5.12.2"], "Access-Control-Allow-Credentials": ["false"], "Content-Type": ["text/html; charset=UTF-8"]}}, "recorded_at": "Mon, 10 Aug 2015 18:21:36 -0000", "request": {"method": "GET", "uri": "http://www.edrdg.org/cgi-bin/wwwjdic/wwwjdic?1ZUJslartibartfast", "body": {"string": "", "encoding": "utf-8"}, "headers": {"Accept-Encoding": ["gzip"], "User-Agent": ["Omnipresence/3.0alpha1 (+bot; https://github.com/kxz/omnipresence)"]}}}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"recorded_with": "Stenographer 0.1-dev", "http_interactions": [{"recorded_at": "Mon, 10 Aug 2015 18:22:46 -0000", "request": {"uri": "http://www.edrdg.org/cgi-bin/wwwjdic/wwwjdic?1ZUJamanogawa", "method": "GET", "body": {"encoding": "utf-8", "string": ""}, "headers": {"User-Agent": ["Omnipresence/3.0alpha1 (+bot; https://github.com/kxz/omnipresence)"], "Accept-Encoding": ["gzip"]}}, "response": {"headers": {"Access-Control-Allow-Credentials": ["false"], "Content-Type": ["text/html; charset=UTF-8"], "Access-Control-Allow-Origin": ["*"], "Date": ["Mon, 10 Aug 2015 18:22:46 GMT"], "Content-Encoding": ["gzip"], "Server": ["Apache/2.2.29 (Unix) mod_ssl/2.2.29 OpenSSL/1.0.1f DAV/2 mod_wsgi/4.2.8 Python/2.6.4 PHP/5.4.9 mod_perl/2.0.4 Perl/v5.12.2"], "Vary": ["Accept-Encoding"]}, "body": {"base64_string": "H4sIAAAAAAAAA7NRdPF3DokMcFXwCPH1UQgIdfLxdFZQ0tXXDzd21td3CXGBSJjoGRgqhBQl5hVnlmTm5yXm6Ou7+inZcdmAZEGUq6OLnY2va4ijQkZJSYFuamFpZpmtknN+XklqXoluSGVBqpJCMoRnq1SSWlGin1GSm2OtkJyRWFScWmIbGuKma6FkZxPiGeLjahceHu7l4ulspRCeX5Si4JJZXJCTWGmjD5HkstGHWOfk7xIJ5BWAcFGqHdfTJSsfN657un2uRoCmNYTzbNNmIEch+nFj0+PGfUCBx409j5v6QQqQhLqBQrEK+hp5mgq+mTnZlQrhiZX6QDX6CCNfdjUAzcIw6HFj3+OmyUA2unFwCXRzFdwTcxIrKvWBvgA72kYf6g19SFACADGwpz6SAQAA", "encoding": "utf-8"}, "status": {"message": "OK", "code": 200}, "http_version": "1.1"}}]}

0 comments on commit 3ffb2c5

Please sign in to comment.