Fake a pull request #4

Closed
wants to merge 1 commit into
from
View
23 LICENSE.txt
@@ -1,23 +0,0 @@
-Copyright 2012 Keith Mitchell. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
View
22 README.md
@@ -1,22 +0,0 @@
-l2cs
-====
-
-`l2cs` - "lucene to CloudSearch" - is a module for converting search queries from [Apache lucene's base syntax](http://lucene.apache.org/core/3_6_0/queryparsersyntax.html) into an [Amazon CloudSearch boolean query](http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/booleansearch.html).
-
-
-Install
--------
-
-Run `setup.py install` to install the module
-
-
-Thanks
-------
-
-Many thanks to Matt Chaput, whose [whoosh module](https://bitbucket.org/mchaput/whoosh/overview) is a dependency and key component of `l2cs`.
-
-
-License
--------
-
-Please see `LICENSE.txt` in the source.
View
5 debian/changelog
@@ -1,5 +0,0 @@
-l2cs (0.1.0-1) natty; urgency=low
-
- * source package automatically created by stdeb 0.6.0+git
-
- -- Keith Mitchell <kemitche@reddit.com> Thu, 17 May 2012 15:57:52 -0700
View
1 debian/compat
@@ -1 +0,0 @@
-7
View
13 debian/control
@@ -1,13 +0,0 @@
-Source: l2cs
-Maintainer: Keith Mitchell <kemitche@reddit.com>
-Section: python
-Priority: optional
-Build-Depends: python-setuptools (>= 0.6b3), python-all (>= 2.6.6-3), debhelper (>= 7)
-Standards-Version: 3.9.1
-
-Package: python-l2cs
-Architecture: all
-Depends: ${misc:Depends}, ${python:Depends}
-Breaks: ${python:Breaks}
-Description: Converts queries from Lucene to Amazon CloudSearch syntax
-
View
1 debian/pydist-overrides
@@ -1 +0,0 @@
-whoosh python-whoosh
View
9 debian/rules
@@ -1,9 +0,0 @@
-#!/usr/bin/make -f
-
-# This file was automatically generated by stdeb 0.6.0+git at
-# Thu, 17 May 2012 15:57:52 -0700
-
-%:
- dh $@ --with python2 --buildsystem=python_distutils
-
-
View
1 debian/source/format
@@ -1 +0,0 @@
-3.0 (quilt)
View
244 l2cs.py
@@ -1,244 +0,0 @@
-#!/usr/bin/env python
-'''
-l2cs (lucene to CloudSearch) - is a module for converting search queries
-from Apache lucene's base syntax
-(http://lucene.apache.org/core/3_6_0/queryparsersyntax.html)
-into an Amazon CloudSearch boolean query
-(http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/booleansearch.html).
-'''
-
-import sys
-
-import whoosh.qparser
-import whoosh.qparser.plugins
-import whoosh.qparser.syntax
-import whoosh.query
-
-
-__version__ = "0.1.0"
-
-
-HANDLERS = {}
-
-
-def handler(classes):
@spladug
spladug May 30, 2012

*classes and you can do away with the tuples!

- def decorator(fn):
- for cls in classes:
- if cls in HANDLERS:
- raise ValueError("%s already has a handler")
- HANDLERS[cls] = fn
- return fn
- return decorator
-
-
-@handler((whoosh.query.Term, whoosh.query.Phrase, whoosh.query.Prefix))
-def build_field(clause):
- integer_field = getattr(clause, "integer_field", False)
- if not integer_field:
- yield "(field "
- yield clause.fieldname
- yield " '"
- if isinstance(clause, whoosh.query.Term):
- yield clause.text.replace(r"'", r"\'")
- elif isinstance(clause, whoosh.query.Prefix):
- yield clause.text.replace(r"'", r"\'")
- yield '*'
- elif isinstance(clause, whoosh.query.Phrase):
- for word in clause.words[:-1]:
- yield word.replace(r"'", r"\'")
- yield " "
- yield clause.words[-1]
- yield "')"
- else:
- yield clause.fieldname
- yield ':'
- yield clause.text
-
-
-@handler((whoosh.query.And, whoosh.query.Or, whoosh.query.Not))
-def build_grouper(clause):
- yield "("
- yield clause.__class__.__name__.lower()
- for child_clause in clause.children():
- yield " "
- for piece in walk_clause(child_clause):
- yield piece
- yield ")"
-
-
-@handler((whoosh.query.AndNot,))
-def build_compound(clause):
- yield '(and '
- use, avoid = list(clause.children())
- for piece in walk_clause(use):
- yield piece
- yield ' (not '
- for piece in walk_clause(avoid):
- yield piece
- yield '))'
-
-
-def walk_clause(clause):
- handler_fn = HANDLERS[clause.__class__]
- for piece in handler_fn(clause):
- yield piece
-
-
-class IntNode(whoosh.qparser.syntax.WordNode):
- def __init__(self, value):
- self.__int_value = int(value)
- whoosh.qparser.syntax.WordNode.__init__(self, str(self.__int_value))
-
- def query(self, parser):
- q = whoosh.qparser.syntax.WordNode.query(self, parser)
- q.integer_field = True
- return q
-
-
-class PseudoFieldPlugin(whoosh.qparser.plugins.PseudoFieldPlugin):
- def __init__(self, fieldnames):
- mapping = {}
- for name in fieldnames:
- function = self.modify_node_fn(name, self.modify_node)
- mapping[name] = function
- super(PseudoFieldPlugin, self).__init__(mapping)
-
- @staticmethod
- def modify_node_fn(fname, base_fn):
- def fn(node):
- return base_fn(fname, node)
- return fn
-
- def modify_node(self, fieldname, node):
- raise NotImplementedError
-
-
-class IntNodePlugin(PseudoFieldPlugin):
- def modify_node(self, fieldname, node):
- if node.has_text:
- try:
- new_node = IntNode(node.text)
- new_node.set_fieldname(fieldname)
- return new_node
- except ValueError:
- return None
- else:
- return node
-
-
-class YesNoPlugin(PseudoFieldPlugin):
- def modify_node(self, fieldname, node):
- if node.has_text:
- if node.text in ("yes", "y", "1"):
- new_node = IntNode(1)
- else:
- new_node = IntNode(0)
- new_node.set_fieldname(fieldname)
- return new_node
- else:
- return node
-
-
-class FieldAliasPlugin(PseudoFieldPlugin):
- def __init__(self, aliases):
- reverse_aliases = {}
- for fieldname, alias_list in aliases.items():
- for alias in alias_list:
- reverse_aliases[alias] = fieldname
- self.aliases = reverse_aliases
- super(FieldAliasPlugin, self).__init__(self.aliases.keys())
-
- def modify_node(self, fieldname, node):
- if node.has_text:
- node.set_fieldname(self.aliases[fieldname])
- return node
-
-
-class PlusMinusPlugin(whoosh.qparser.plugins.PlusMinusPlugin):
- '''The default PlusMinus plugin doesn't respect the parser's
- default grouping, instead blindly using "OR" groupings. This modified
- version takes the parser's desired grouping into account
- '''
- def do_plusminus(self, parser, group):
- '''This filter sorts nodes in a flat group into "required", "default",
- and "banned" subgroups based on the presence of plus and minus nodes.
- '''
- required = whoosh.qparser.syntax.AndGroup()
- banned = whoosh.qparser.syntax.OrGroup()
- default = parser.group()
-
- # Which group to put the next node we see into
- next_ = default
- for node in group:
- if isinstance(node, self.Plus):
- # +: put the next node in the required group
- next_ = required
- elif isinstance(node, self.Minus):
- # -: put the next node in the banned group
- next_ = banned
- else:
- # Anything else: put it in the appropriate group
- next_.append(node)
- # Reset to putting things in the optional group by default
- next_ = default
-
- group = default
- if required:
- group = whoosh.qparser.syntax.AndMaybeGroup([required, group])
- if banned:
- group = whoosh.qparser.syntax.AndNotGroup([group, banned])
- return group
-
-
-DEFAULT_PLUGINS = (
- whoosh.qparser.plugins.WhitespacePlugin(),
- whoosh.qparser.plugins.SingleQuotePlugin(),
- whoosh.qparser.plugins.FieldsPlugin(),
- whoosh.qparser.plugins.PhrasePlugin(),
- whoosh.qparser.plugins.PrefixPlugin(),
- whoosh.qparser.plugins.GroupPlugin(),
- whoosh.qparser.plugins.OperatorsPlugin(AndMaybe=None,
- Require=None),
- whoosh.qparser.plugins.EveryPlugin(),
- PlusMinusPlugin(),
- )
-
-
-def make_parser(default_field='text', plugins=DEFAULT_PLUGINS, schema=None,
- int_fields=None, yesno_fields=None, aliases=None):
- parser = whoosh.qparser.QueryParser(default_field, schema, plugins=plugins)
- if int_fields:
- parser.add_plugin(IntNodePlugin(int_fields))
- if yesno_fields:
- parser.add_plugin(YesNoPlugin(yesno_fields))
- if aliases:
- parser.add_plugin(FieldAliasPlugin(aliases))
- return parser
-
-
-def convert(query, parser):
- parsed = parser.parse(query)
- pieces = walk_clause(parsed)
- return ''.join(pieces)
-
-
-def __sample_parser():
- return make_parser(int_fields=["count", "number"],
- yesno_fields=["active", "ready"],
- aliases={"alias": ["alias1", "alias2"]})
-
-
-def main(args):
- '''For command line experimentation'''
- query = ' '.join(args[1:])
- print "Lucene input:", query
- parser = __sample_parser()
- parsed = parser.parse(query)
- print "Parsed representation:", repr(parsed)
- print "Lucene form:", str(parsed)
- cloudsearch_query = ''.join(walk_clause(parsed))
- print "Cloudsearch form:", cloudsearch_query
-
-
-if __name__ == '__main__':
- main(sys.argv)
View
23 setup.py
@@ -1,23 +0,0 @@
-#!/usr/bin/env python2.7
-
-try:
- from setuptools import setup, find_packages
-except ImportError:
- from ez_setup import use_setuptools
- use_setuptools()
- from setuptools import setup, find_packages
-
-setup(
- name='l2cs',
- version="0.1.0",
- author='Keith Mitchell',
- author_email='kemitche@reddit.com',
- description=("Rewrites queries from lucene syntax to"
- " Amazon Cloudsearch syntax"),
- license='BSD',
- url="http://github.com/kemitche/l2cs",
- install_requires=["whoosh"],
- packages=find_packages(exclude=['ez_setup']),
- include_package_data=True,
- test_suite="test_l2cs",
-)
View
87 test_l2cs.py
@@ -1,87 +0,0 @@
-#!/usr/bin/env python
-
-import unittest
-
-import l2cs
-
-
-class l2csTester(unittest.TestCase):
- def setUp(self):
- self.parser = l2cs.make_parser(int_fields=["count", "number"],
- yesno_fields=["active", "ready"],
- aliases={"alias": ["alias1", "alias2"]})
-
- def tearDown(self):
- self.parser = None
-
- def _run_test(self, input_, expected, parser=None):
- parser = parser or self.parser
- parsed = parser.parse(input_)
- pieces = l2cs.walk_clause(parsed)
- result = ''.join(pieces)
- errmsg = ("\ninput: %s\nparsed: %s\nresult: %s\nexpected: %s" %
- (input_, parsed, result, expected))
- self.assertEqual(result, expected, errmsg)
-
- # basic fields
- def test_fields1(self):
- self._run_test("foo", "(field text 'foo')")
- def test_fields2(self):
- self._run_test("foo:bar", "(field foo 'bar')")
-
- # phrases
- def test_phrases1(self):
- self._run_test('"foo bar baz"', "(field text 'foo bar baz')")
-
- # AND clauses
- def test_and1(self):
- self._run_test("foo AND bar", "(and (field text 'foo') (field text 'bar'))")
- def test_and2(self):
- self._run_test("foo AND bar:baz", "(and (field text 'foo') (field bar 'baz'))")
-
- # OR clauses
- def test_or1(self):
- self._run_test("foo OR bar", "(or (field text 'foo') (field text 'bar'))")
- def test_or2(self):
- self._run_test("bar:baz OR foo", "(or (field bar 'baz') (field text 'foo'))")
-
- # NOT clauses
- def test_not1(self):
- self._run_test("NOT foo", "(not (field text 'foo'))")
- def test_not2(self):
- self._run_test("baz NOT bar", "(and (field text 'baz') (not (field text 'bar')))")
- def test_not3(self):
- self._run_test("foo:bar NOT foo:baz", "(and (field foo 'bar') (not (field foo 'baz')))")
- def test_not4(self):
- self._run_test("bar AND foo:-baz", "(and (field text 'bar') (not (field text 'baz')))")
-
- # quotes
- def test_quote1(self):
- self._run_test("hello:\"goodbye you're sir\"", "(field hello 'goodbye you\\'re sir')")
- def test_quote2(self):
- self._run_test("hello:\"goodbye you''re sir\"", "(field hello 'goodbye you\\'\\'re sir')")
-
- # int fields
- def test_int1(self):
- self._run_test("count:12", "count:12")
- def test_int2(self):
- self._run_test("count:foo number:12 foo:bar", "(and number:12 (field foo 'bar'))")
-
- # yes/no fields
- def test_yesno1(self):
- self._run_test("ready:yes active:n", "(and ready:1 active:0)")
-
- # prefixes
- def test_prefix1(self):
- self._run_test("foo:bar*", "(field foo 'bar*')")
-
- # Aliases
- def test_alias1(self):
- self._run_test("alias1:foo", "(field alias 'foo')")
- def test_alias2(self):
- '''Make sure the reference the base of the alias still works'''
- self._run_test("alias:foo", "(field alias 'foo')")
-
-
-if __name__ == '__main__':
- unittest.main()