Permalink
Browse files

bug 839214 - add elasticutils and pyes

  • Loading branch information...
1 parent 66e051f commit cb8f1ca70941b88bb7ab780956d02401ae35d016 @groovecoder groovecoder committed Feb 19, 2013
Showing with 9,126 additions and 0 deletions.
  1. +3 −0 .gitmodules
  2. +2 −0 kuma.pth
  3. BIN packages/pyes/._AUTHORS
  4. BIN packages/pyes/._Changelog
  5. BIN packages/pyes/._MANIFEST.in
  6. BIN packages/pyes/._README
  7. BIN packages/pyes/._README.rst
  8. BIN packages/pyes/._pavement.py
  9. BIN packages/pyes/._setup.py
  10. +9 −0 packages/pyes/AUTHORS
  11. 0 packages/pyes/Changelog
  12. +28 −0 packages/pyes/LICENSE
  13. +19 −0 packages/pyes/MANIFEST.in
  14. +316 −0 packages/pyes/PKG-INFO
  15. +300 −0 packages/pyes/README
  16. +300 −0 packages/pyes/README.rst
  17. +2 −0 packages/pyes/THANKS
  18. +2 −0 packages/pyes/TODO
  19. +169 −0 packages/pyes/contrib/mailman/archive_and_index.py
  20. +147 −0 packages/pyes/pavement.py
  21. BIN packages/pyes/pyes/.___init__.py
  22. +42 −0 packages/pyes/pyes/__init__.py
  23. +206 −0 packages/pyes/pyes/connection.py
  24. +207 −0 packages/pyes/pyes/connection_http.py
  25. +77 −0 packages/pyes/pyes/convert_errors.py
  26. +125 −0 packages/pyes/pyes/djangoutils.py
  27. +1,017 −0 packages/pyes/pyes/es.py
  28. +80 −0 packages/pyes/pyes/exceptions.py
  29. +254 −0 packages/pyes/pyes/facets.py
  30. +205 −0 packages/pyes/pyes/fakettypes.py
  31. +389 −0 packages/pyes/pyes/filters.py
  32. +43 −0 packages/pyes/pyes/highlight.py
  33. +455 −0 packages/pyes/pyes/mappings.py
  34. +219 −0 packages/pyes/pyes/pyesthrift/Rest.py
  35. +1 −0 packages/pyes/pyes/pyesthrift/__init__.py
  36. +9 −0 packages/pyes/pyes/pyesthrift/constants.py
  37. +30 −0 packages/pyes/pyes/pyesthrift/simple_test.py
  38. +408 −0 packages/pyes/pyes/pyesthrift/ttypes.py
  39. +1,227 −0 packages/pyes/pyes/query.py
  40. +186 −0 packages/pyes/pyes/query_extra.py
  41. +120 −0 packages/pyes/pyes/rivers.py
  42. +47 −0 packages/pyes/pyes/scriptfields.py
  43. +60 −0 packages/pyes/pyes/tests/__init__.py
  44. +110 −0 packages/pyes/pyes/tests/aliases.py
  45. +84 −0 packages/pyes/pyes/tests/attachments.py
  46. +49 −0 packages/pyes/pyes/tests/bulk.py
  47. +57 −0 packages/pyes/pyes/tests/cluster.py
  48. +29 −0 packages/pyes/pyes/tests/dump_curl.py
  49. +75 −0 packages/pyes/pyes/tests/errors.py
  50. +25 −0 packages/pyes/pyes/tests/es_related.py
  51. +113 −0 packages/pyes/pyes/tests/facets.py
  52. +93 −0 packages/pyes/pyes/tests/geoloc.py
  53. +51 −0 packages/pyes/pyes/tests/highlight.py
  54. +149 −0 packages/pyes/pyes/tests/indexing.py
  55. +23 −0 packages/pyes/pyes/tests/issue6.py
  56. +25 −0 packages/pyes/pyes/tests/mapping_parser.py
  57. +130 −0 packages/pyes/pyes/tests/multifield.py
  58. +79 −0 packages/pyes/pyes/tests/percolator.py
  59. +45 −0 packages/pyes/pyes/tests/pyestest.py
  60. +181 −0 packages/pyes/pyes/tests/queries.py
  61. +50 −0 packages/pyes/pyes/tests/reindex.py
  62. +70 −0 packages/pyes/pyes/tests/rivers.py
  63. +58 −0 packages/pyes/pyes/tests/serialize.py
  64. +63 −0 packages/pyes/pyes/tests/utils.py
  65. +13 −0 packages/pyes/pyes/urllib3/__init__.py
  66. +462 −0 packages/pyes/pyes/urllib3/connectionpool.py
  67. 0 packages/pyes/pyes/urllib3/contrib/__init__.py
  68. +88 −0 packages/pyes/pyes/urllib3/contrib/ntlmpool.py
  69. +44 −0 packages/pyes/pyes/urllib3/filepost.py
  70. +149 −0 packages/pyes/pyes/utils.py
  71. +24 −0 packages/pyes/setup.cfg
  72. +82 −0 packages/pyes/setup.py
  73. +1 −0 src/elasticutils
View
3 .gitmodules
@@ -115,3 +115,6 @@
[submodule "src/pystatsd"]
path = src/pystatsd
url = git://github.com/jsocol/pystatsd.git
+[submodule "src/elasticutils"]
+ path = src/elasticutils
+ url = git://github.com/mozilla/elasticutils.git
View
2 kuma.pth
@@ -41,6 +41,7 @@ packages/jsonpickle
packages/lockfile
packages/recaptcha-client
packages/beautifulsoup
+packages/pyes
src
src/django-cronjobs
src/django-cache-machine
@@ -76,3 +77,4 @@ src/basket-client
src/pytidylib
src/pystatsd
src/django-statsd
+src/elasticutils
View
BIN packages/pyes/._AUTHORS
Binary file not shown.
View
BIN packages/pyes/._Changelog
Binary file not shown.
View
BIN packages/pyes/._MANIFEST.in
Binary file not shown.
View
BIN packages/pyes/._README
Binary file not shown.
View
BIN packages/pyes/._README.rst
Binary file not shown.
View
BIN packages/pyes/._pavement.py
Binary file not shown.
View
BIN packages/pyes/._setup.py
Binary file not shown.
View
9 packages/pyes/AUTHORS
@@ -0,0 +1,9 @@
+Origin based on a pyelasticsearch of Robert Eanes and Matt Dennewitz
+
+Ordered by date of first contribution:
+ Alberto Paro <alberto.paro@gmail.com>
+ sandymahalo
+ andrei
+ Tavis Aitken
+ Richard Boulton
+ matterkkila
View
0 packages/pyes/Changelog
No changes.
View
28 packages/pyes/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2009, Ask Solem
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+Neither the name of Ask Solem nor the names of its contributors may be used
+to endorse or promote products derived from this software without specific
+prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
View
19 packages/pyes/MANIFEST.in
@@ -0,0 +1,19 @@
+include AUTHORS
+include Changelog
+include README
+include README.rst
+include MANIFEST.in
+include LICENSE
+include TODO
+include THANKS
+include pavement.py
+include setup.cfg
+recursive-include bin *
+recursive-include pyes *.py
+recursive-include contrib *
+recursive-include examples *
+prune docs/*
+prune tests/*.pyc
+prune contrib/*.pyc
+prune pyes/*.pyc
+prune bin/*.pyc
View
316 packages/pyes/PKG-INFO
@@ -0,0 +1,316 @@
+Metadata-Version: 1.0
+Name: pyes
+Version: 0.16.0
+Summary: Python Elastic Search driver
+Home-page: http://github.com/aparo/pyes/
+Author: Alberto Paro
+Author-email: alberto.paro@gmail.com
+License: BSD
+Description: =============================
+ pyes - Python ElasticSearch
+ =============================
+
+ :Web: http://pypi.python.org/pypi/pyes/
+ :Download: http://pypi.python.org/pypi/pyes/
+ :Source: http://github.com/aparo/pyes/
+ :Keywords: search, elastisearch, distribute search
+
+ --
+
+ pyes is a connector to use elasticsearch from python.
+
+ This version requires elasticsearch 0.15 or above.
+
+ Features
+ ========
+
+ - Thrift/HTTP protocols
+ - Bulk insert/delete
+ - Index management
+ - Every search query types
+ - Facet Support
+ - Geolocalization support
+ - Highlighting
+ - River support
+
+ Connecting
+ ==========
+
+ These function are taken from pycassa.
+
+ Import the module:
+
+ >>> import pyes
+
+ pyes is able to use standard http or thrift protocol. If your port starts with "92" http protocol is used, otherwise thrift.
+
+
+ For a single connection (which is _not_ thread-safe), pass a list of servers.
+
+ For thrift:
+
+ >>> conn = pyes.ES() # Defaults to connecting to the server at '127.0.0.1:9500'
+ >>> conn = pyes.ES(['127.0.0.1:9500'])
+
+ For http:
+
+ >>> conn = pyes.ES(['127.0.0.1:9200'])
+
+ Connections are robust to server failures. Upon a disconnection, it will attempt to connect to each server in the list in turn. If no server is available, it will raise a NoServerAvailable exception.
+
+ Timeouts are also supported and should be used in production to prevent a thread from freezing while waiting for the server to return.
+
+ >>> conn = pyes.ES(timeout=3.5) # 3.5 second timeout
+ (Make some pyes calls and the connection to the server suddenly becomes unresponsive.)
+
+ Traceback (most recent call last):
+ ...
+ pyes.connection.NoServerAvailable
+
+ Note that this only handles socket timeouts.
+
+
+ Usage
+ =====
+
+ Creating a connection:
+
+ >>> from pyes import *
+ >>> conn = ES('127.0.0.1:9500')
+
+ Deleting an index:
+
+ >>> try:
+ >>> conn.delete_index("test-index")
+ >>> except:
+ >>> pass
+
+ (an exception is fored if the index is not present)
+
+ Create an index:
+
+ >>> conn.create_index("test-index")
+
+ Creating a mapping:
+
+ >>> mapping = { u'parsedtext': {'boost': 1.0,
+ >>> 'index': 'analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string',
+ >>> "term_vector" : "with_positions_offsets"},
+ >>> u'name': {'boost': 1.0,
+ >>> 'index': 'analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string',
+ >>> "term_vector" : "with_positions_offsets"},
+ >>> u'title': {'boost': 1.0,
+ >>> 'index': 'analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string',
+ >>> "term_vector" : "with_positions_offsets"},
+ >>> u'pos': {'store': 'yes',
+ >>> 'type': u'integer'},
+ >>> u'uuid': {'boost': 1.0,
+ >>> 'index': 'not_analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string'}}
+ >>> conn.put_mapping("test-type", {'properties':mapping}, ["test-index"])
+
+ Index some documents:
+
+ >>> conn.index({"name":"Joe Tester", "parsedtext":"Joe Testere nice guy", "uuid":"11111", "position":1}, "test-index", "test-type", 1)
+ >>> conn.index({"name":"Bill Baloney", "parsedtext":"Joe Testere nice guy", "uuid":"22222", "position":2}, "test-index", "test-type", 2)
+
+ Refresh an index:
+
+ >>> conn.refresh(["test-index"])
+
+ Execute a query
+
+ >>> q = TermQuery("name", "joe")
+ >>> result = self.conn.search(query = q)
+
+ For more examples looks at the tests.
+
+
+ Changelog
+ =========
+
+ Note for next release - the order of geolocation parameters expected by
+ elasticsearch changed between ES 0.14.4 and ES 0.15, from [lat, lon] to [lon,
+ lat]. Clients will need to update accordingly, or use an object with named
+ parameters.
+
+ v. 0.16.0:
+
+ Updated documentation.
+
+ Added TextQuery and some clean up of code.
+
+ Added percolator (matterkkila).
+
+ Added date_histogram facet (zebuline).
+
+ Added script fields to Search object, also add "fields" to TermFacet (aguereca).
+
+ Added analyze_wildcard param to StringQuery (available for ES 0.16.0) (zebuline).
+
+ Add ScriptFields object used as parameter script_fields of Search object (aguereca).
+
+ Add IdsQuery, IdsFilter and deleteByQuery (aguereca).
+
+ Bulk delete (acdha).
+
+ v. 0.15.0:
+
+ Only require simplejson for python < 2.6 (matterkkila)
+
+ Added basic version support to ES.index and Search (merrellb)
+
+ Added scan method to ES. This is only supported on ES Master (pre 0.16) (merrellb)
+
+ Added GeoPointField to mapping types (merrellb)
+
+ Disable thrift in setup.py.
+
+ Added missing _routing property in ObjectField
+
+ Added ExistsFilter
+
+ Improved HasChildren
+
+ Add min_similarity and prefix_length to flt.
+
+ Added _scope to HasChildQuery. (andreiz)
+
+ Added parent/child document in test indexing. Added _scope to HasChildFilter.
+
+ Added MissingFilter as a subclass of TermFilter
+
+ Fixed error in checking TermsQuery (merrellb)
+
+ If an analyzer is set on a field, the returned mapping will have an analyzer
+
+ Add a specific error subtype for mapper parsing exceptions (rboulton)
+
+ Add support for Float numeric field mappings (rboulton)
+
+ ES.get() now accepts "fields" as well as other keyword arguments (eg "routing") (rboulton)
+
+ Allow dump_curl to be passed a filehandle (or still a filename), don't for filenames to be in /tmp, and add a basic test of it.
+
+ Add alias handling (rboulton)
+
+ Add ElasticSearchIllegalArgumentException - used for example when writing to an alias which refers to more than one index. (rboulton)
+
+ Handle errors produced by deleting a missing document, and add a test for it. (rboulton)
+
+ Split Query object into a Search object, for the search specific parts, and a Query base class. Allow ES.search() to take a query or a search object. Make some of the methods of Query base classes chainable, where that is an obviously reasonable thing to do. (rboulton)
+
+ v. 0.14.0: Added delete of mapping type.
+
+ Embedded urllib3 to be buildout safe and for users sake.
+
+ Some code cleanup.
+
+ Added reindex by query (usable only with my elasticsearch git branch).
+
+ Added contrib with mailman indexing.
+
+ Autodetect if django is available and added related functions.
+
+ Code cleanup and PEP8.
+
+ Reactivated the morelikethis query.
+
+ Fixed river support plus unittest. (Tavis Aitken)
+
+ Added autorefresh to sync search and write.
+
+ Added QueryFilter.
+
+ Forced name attribute in multifield declaration.
+
+ Added is_empty to ConstantScoreQuery and fixed some bad behaviour.
+
+ Added CustomScoreQuery.
+
+ Added parent/children indexing.
+
+ Added dump commands in a script file "curl" way.
+
+ Added a lot of fix from Richard Boulton.
+
+ v. 0.13.1: Added jython support (HTTP only for now).
+
+ v. 0.13.0: API Changes: errors -> exceptions.
+
+ Splitting of query/filters.
+
+ Added open/close of index.
+
+ Added the number of retries if server is down.
+
+ Refactory Range query. (Andrei)
+
+ Improved HTTP connection timeout/retries. (Sandymahalo)
+
+ Cleanup some imports. (Sandymahalo)
+
+ v. 0.12.1: Added collecting server info.
+
+ Version 0.12 or above requirement.
+
+ Fixed attachment plugin.
+
+ Updated bulk insert to use new api.
+
+ Added facet support (except geotypes).
+
+ Added river support.
+
+ Cleanup some method.
+
+ Added default_indexes variable.
+
+ Added datetime deserialization.
+
+ Improved performance and memory usage in bulk insert replacing list with StringIO.
+
+ Initial propagation of elasticsearch exception to python.
+
+ v. 0.12.0: added http transport, added autodetect of transport, updated thrift interface.
+
+ v. 0.10.3: added bulk insert, explain and facet.
+
+ v. 0.10.2: added new geo query type.
+
+ v. 0.10.1: added new connection pool system based on pycassa one.
+
+ v. 0.10.0: initial working version.
+
+
+ TODO
+ ----
+
+ - more docs
+ - more tests
+ - cleanup
+ - add coverage
+ - add jython native client protocol
+
+ License
+ =======
+
+ This software is licensed under the ``New BSD License``. See the ``LICENSE``
+ file in the top distribution directory for the full license text.
+
+ .. # vim: syntax=rst expandtab tabstop=4 shiftwidth=4 shiftround
+
+Platform: any
+Classifier: Development Status :: 2 - Pre-Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
View
300 packages/pyes/README
@@ -0,0 +1,300 @@
+=============================
+ pyes - Python ElasticSearch
+=============================
+
+:Web: http://pypi.python.org/pypi/pyes/
+:Download: http://pypi.python.org/pypi/pyes/
+:Source: http://github.com/aparo/pyes/
+:Keywords: search, elastisearch, distribute search
+
+--
+
+pyes is a connector to use elasticsearch from python.
+
+This version requires elasticsearch 0.15 or above.
+
+Features
+========
+
+- Thrift/HTTP protocols
+- Bulk insert/delete
+- Index management
+- Every search query types
+- Facet Support
+- Geolocalization support
+- Highlighting
+- River support
+
+Connecting
+==========
+
+These function are taken from pycassa.
+
+Import the module:
+
+ >>> import pyes
+
+pyes is able to use standard http or thrift protocol. If your port starts with "92" http protocol is used, otherwise thrift.
+
+
+For a single connection (which is _not_ thread-safe), pass a list of servers.
+
+For thrift:
+
+ >>> conn = pyes.ES() # Defaults to connecting to the server at '127.0.0.1:9500'
+ >>> conn = pyes.ES(['127.0.0.1:9500'])
+
+For http:
+
+ >>> conn = pyes.ES(['127.0.0.1:9200'])
+
+Connections are robust to server failures. Upon a disconnection, it will attempt to connect to each server in the list in turn. If no server is available, it will raise a NoServerAvailable exception.
+
+Timeouts are also supported and should be used in production to prevent a thread from freezing while waiting for the server to return.
+
+ >>> conn = pyes.ES(timeout=3.5) # 3.5 second timeout
+ (Make some pyes calls and the connection to the server suddenly becomes unresponsive.)
+
+ Traceback (most recent call last):
+ ...
+ pyes.connection.NoServerAvailable
+
+Note that this only handles socket timeouts.
+
+
+Usage
+=====
+
+Creating a connection:
+
+ >>> from pyes import *
+ >>> conn = ES('127.0.0.1:9500')
+
+Deleting an index:
+
+ >>> try:
+ >>> conn.delete_index("test-index")
+ >>> except:
+ >>> pass
+
+(an exception is fored if the index is not present)
+
+Create an index:
+
+ >>> conn.create_index("test-index")
+
+Creating a mapping:
+
+ >>> mapping = { u'parsedtext': {'boost': 1.0,
+ >>> 'index': 'analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string',
+ >>> "term_vector" : "with_positions_offsets"},
+ >>> u'name': {'boost': 1.0,
+ >>> 'index': 'analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string',
+ >>> "term_vector" : "with_positions_offsets"},
+ >>> u'title': {'boost': 1.0,
+ >>> 'index': 'analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string',
+ >>> "term_vector" : "with_positions_offsets"},
+ >>> u'pos': {'store': 'yes',
+ >>> 'type': u'integer'},
+ >>> u'uuid': {'boost': 1.0,
+ >>> 'index': 'not_analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string'}}
+ >>> conn.put_mapping("test-type", {'properties':mapping}, ["test-index"])
+
+Index some documents:
+
+ >>> conn.index({"name":"Joe Tester", "parsedtext":"Joe Testere nice guy", "uuid":"11111", "position":1}, "test-index", "test-type", 1)
+ >>> conn.index({"name":"Bill Baloney", "parsedtext":"Joe Testere nice guy", "uuid":"22222", "position":2}, "test-index", "test-type", 2)
+
+Refresh an index:
+
+ >>> conn.refresh(["test-index"])
+
+Execute a query
+
+ >>> q = TermQuery("name", "joe")
+ >>> result = self.conn.search(query = q)
+
+For more examples looks at the tests.
+
+
+Changelog
+=========
+
+Note for next release - the order of geolocation parameters expected by
+elasticsearch changed between ES 0.14.4 and ES 0.15, from [lat, lon] to [lon,
+lat]. Clients will need to update accordingly, or use an object with named
+parameters.
+
+v. 0.16.0:
+
+ Updated documentation.
+
+ Added TextQuery and some clean up of code.
+
+ Added percolator (matterkkila).
+
+ Added date_histogram facet (zebuline).
+
+ Added script fields to Search object, also add "fields" to TermFacet (aguereca).
+
+ Added analyze_wildcard param to StringQuery (available for ES 0.16.0) (zebuline).
+
+ Add ScriptFields object used as parameter script_fields of Search object (aguereca).
+
+ Add IdsQuery, IdsFilter and deleteByQuery (aguereca).
+
+ Bulk delete (acdha).
+
+v. 0.15.0:
+
+ Only require simplejson for python < 2.6 (matterkkila)
+
+ Added basic version support to ES.index and Search (merrellb)
+
+ Added scan method to ES. This is only supported on ES Master (pre 0.16) (merrellb)
+
+ Added GeoPointField to mapping types (merrellb)
+
+ Disable thrift in setup.py.
+
+ Added missing _routing property in ObjectField
+
+ Added ExistsFilter
+
+ Improved HasChildren
+
+ Add min_similarity and prefix_length to flt.
+
+ Added _scope to HasChildQuery. (andreiz)
+
+ Added parent/child document in test indexing. Added _scope to HasChildFilter.
+
+ Added MissingFilter as a subclass of TermFilter
+
+ Fixed error in checking TermsQuery (merrellb)
+
+ If an analyzer is set on a field, the returned mapping will have an analyzer
+
+ Add a specific error subtype for mapper parsing exceptions (rboulton)
+
+ Add support for Float numeric field mappings (rboulton)
+
+ ES.get() now accepts "fields" as well as other keyword arguments (eg "routing") (rboulton)
+
+ Allow dump_curl to be passed a filehandle (or still a filename), don't for filenames to be in /tmp, and add a basic test of it.
+
+ Add alias handling (rboulton)
+
+ Add ElasticSearchIllegalArgumentException - used for example when writing to an alias which refers to more than one index. (rboulton)
+
+ Handle errors produced by deleting a missing document, and add a test for it. (rboulton)
+
+ Split Query object into a Search object, for the search specific parts, and a Query base class. Allow ES.search() to take a query or a search object. Make some of the methods of Query base classes chainable, where that is an obviously reasonable thing to do. (rboulton)
+
+v. 0.14.0: Added delete of mapping type.
+
+ Embedded urllib3 to be buildout safe and for users sake.
+
+ Some code cleanup.
+
+ Added reindex by query (usable only with my elasticsearch git branch).
+
+ Added contrib with mailman indexing.
+
+ Autodetect if django is available and added related functions.
+
+ Code cleanup and PEP8.
+
+ Reactivated the morelikethis query.
+
+ Fixed river support plus unittest. (Tavis Aitken)
+
+ Added autorefresh to sync search and write.
+
+ Added QueryFilter.
+
+ Forced name attribute in multifield declaration.
+
+ Added is_empty to ConstantScoreQuery and fixed some bad behaviour.
+
+ Added CustomScoreQuery.
+
+ Added parent/children indexing.
+
+ Added dump commands in a script file "curl" way.
+
+ Added a lot of fix from Richard Boulton.
+
+v. 0.13.1: Added jython support (HTTP only for now).
+
+v. 0.13.0: API Changes: errors -> exceptions.
+
+ Splitting of query/filters.
+
+ Added open/close of index.
+
+ Added the number of retries if server is down.
+
+ Refactory Range query. (Andrei)
+
+ Improved HTTP connection timeout/retries. (Sandymahalo)
+
+ Cleanup some imports. (Sandymahalo)
+
+v. 0.12.1: Added collecting server info.
+
+ Version 0.12 or above requirement.
+
+ Fixed attachment plugin.
+
+ Updated bulk insert to use new api.
+
+ Added facet support (except geotypes).
+
+ Added river support.
+
+ Cleanup some method.
+
+ Added default_indexes variable.
+
+ Added datetime deserialization.
+
+ Improved performance and memory usage in bulk insert replacing list with StringIO.
+
+ Initial propagation of elasticsearch exception to python.
+
+v. 0.12.0: added http transport, added autodetect of transport, updated thrift interface.
+
+v. 0.10.3: added bulk insert, explain and facet.
+
+v. 0.10.2: added new geo query type.
+
+v. 0.10.1: added new connection pool system based on pycassa one.
+
+v. 0.10.0: initial working version.
+
+
+TODO
+----
+
+- more docs
+- more tests
+- cleanup
+- add coverage
+- add jython native client protocol
+
+License
+=======
+
+This software is licensed under the ``New BSD License``. See the ``LICENSE``
+file in the top distribution directory for the full license text.
+
+.. # vim: syntax=rst expandtab tabstop=4 shiftwidth=4 shiftround
View
300 packages/pyes/README.rst
@@ -0,0 +1,300 @@
+=============================
+ pyes - Python ElasticSearch
+=============================
+
+:Web: http://pypi.python.org/pypi/pyes/
+:Download: http://pypi.python.org/pypi/pyes/
+:Source: http://github.com/aparo/pyes/
+:Keywords: search, elastisearch, distribute search
+
+--
+
+pyes is a connector to use elasticsearch from python.
+
+This version requires elasticsearch 0.15 or above.
+
+Features
+========
+
+- Thrift/HTTP protocols
+- Bulk insert/delete
+- Index management
+- Every search query types
+- Facet Support
+- Geolocalization support
+- Highlighting
+- River support
+
+Connecting
+==========
+
+These function are taken from pycassa.
+
+Import the module:
+
+ >>> import pyes
+
+pyes is able to use standard http or thrift protocol. If your port starts with "92" http protocol is used, otherwise thrift.
+
+
+For a single connection (which is _not_ thread-safe), pass a list of servers.
+
+For thrift:
+
+ >>> conn = pyes.ES() # Defaults to connecting to the server at '127.0.0.1:9500'
+ >>> conn = pyes.ES(['127.0.0.1:9500'])
+
+For http:
+
+ >>> conn = pyes.ES(['127.0.0.1:9200'])
+
+Connections are robust to server failures. Upon a disconnection, it will attempt to connect to each server in the list in turn. If no server is available, it will raise a NoServerAvailable exception.
+
+Timeouts are also supported and should be used in production to prevent a thread from freezing while waiting for the server to return.
+
+ >>> conn = pyes.ES(timeout=3.5) # 3.5 second timeout
+ (Make some pyes calls and the connection to the server suddenly becomes unresponsive.)
+
+ Traceback (most recent call last):
+ ...
+ pyes.connection.NoServerAvailable
+
+Note that this only handles socket timeouts.
+
+
+Usage
+=====
+
+Creating a connection:
+
+ >>> from pyes import *
+ >>> conn = ES('127.0.0.1:9500')
+
+Deleting an index:
+
+ >>> try:
+ >>> conn.delete_index("test-index")
+ >>> except:
+ >>> pass
+
+(an exception is fored if the index is not present)
+
+Create an index:
+
+ >>> conn.create_index("test-index")
+
+Creating a mapping:
+
+ >>> mapping = { u'parsedtext': {'boost': 1.0,
+ >>> 'index': 'analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string',
+ >>> "term_vector" : "with_positions_offsets"},
+ >>> u'name': {'boost': 1.0,
+ >>> 'index': 'analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string',
+ >>> "term_vector" : "with_positions_offsets"},
+ >>> u'title': {'boost': 1.0,
+ >>> 'index': 'analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string',
+ >>> "term_vector" : "with_positions_offsets"},
+ >>> u'pos': {'store': 'yes',
+ >>> 'type': u'integer'},
+ >>> u'uuid': {'boost': 1.0,
+ >>> 'index': 'not_analyzed',
+ >>> 'store': 'yes',
+ >>> 'type': u'string'}}
+ >>> conn.put_mapping("test-type", {'properties':mapping}, ["test-index"])
+
+Index some documents:
+
+ >>> conn.index({"name":"Joe Tester", "parsedtext":"Joe Testere nice guy", "uuid":"11111", "position":1}, "test-index", "test-type", 1)
+ >>> conn.index({"name":"Bill Baloney", "parsedtext":"Joe Testere nice guy", "uuid":"22222", "position":2}, "test-index", "test-type", 2)
+
+Refresh an index:
+
+ >>> conn.refresh(["test-index"])
+
+Execute a query
+
+ >>> q = TermQuery("name", "joe")
+ >>> result = self.conn.search(query = q)
+
+For more examples looks at the tests.
+
+
+Changelog
+=========
+
+Note for next release - the order of geolocation parameters expected by
+elasticsearch changed between ES 0.14.4 and ES 0.15, from [lat, lon] to [lon,
+lat]. Clients will need to update accordingly, or use an object with named
+parameters.
+
+v. 0.16.0:
+
+ Updated documentation.
+
+ Added TextQuery and some clean up of code.
+
+ Added percolator (matterkkila).
+
+ Added date_histogram facet (zebuline).
+
+ Added script fields to Search object, also add "fields" to TermFacet (aguereca).
+
+ Added analyze_wildcard param to StringQuery (available for ES 0.16.0) (zebuline).
+
+ Add ScriptFields object used as parameter script_fields of Search object (aguereca).
+
+ Add IdsQuery, IdsFilter and deleteByQuery (aguereca).
+
+ Bulk delete (acdha).
+
+v. 0.15.0:
+
+ Only require simplejson for python < 2.6 (matterkkila)
+
+ Added basic version support to ES.index and Search (merrellb)
+
+ Added scan method to ES. This is only supported on ES Master (pre 0.16) (merrellb)
+
+ Added GeoPointField to mapping types (merrellb)
+
+ Disable thrift in setup.py.
+
+ Added missing _routing property in ObjectField
+
+ Added ExistsFilter
+
+ Improved HasChildren
+
+ Add min_similarity and prefix_length to flt.
+
+ Added _scope to HasChildQuery. (andreiz)
+
+ Added parent/child document in test indexing. Added _scope to HasChildFilter.
+
+ Added MissingFilter as a subclass of TermFilter
+
+ Fixed error in checking TermsQuery (merrellb)
+
+ If an analyzer is set on a field, the returned mapping will have an analyzer
+
+ Add a specific error subtype for mapper parsing exceptions (rboulton)
+
+ Add support for Float numeric field mappings (rboulton)
+
+ ES.get() now accepts "fields" as well as other keyword arguments (eg "routing") (rboulton)
+
+ Allow dump_curl to be passed a filehandle (or still a filename), don't for filenames to be in /tmp, and add a basic test of it.
+
+ Add alias handling (rboulton)
+
+ Add ElasticSearchIllegalArgumentException - used for example when writing to an alias which refers to more than one index. (rboulton)
+
+ Handle errors produced by deleting a missing document, and add a test for it. (rboulton)
+
+ Split Query object into a Search object, for the search specific parts, and a Query base class. Allow ES.search() to take a query or a search object. Make some of the methods of Query base classes chainable, where that is an obviously reasonable thing to do. (rboulton)
+
+v. 0.14.0: Added delete of mapping type.
+
+ Embedded urllib3 to be buildout safe and for users sake.
+
+ Some code cleanup.
+
+ Added reindex by query (usable only with my elasticsearch git branch).
+
+ Added contrib with mailman indexing.
+
+ Autodetect if django is available and added related functions.
+
+ Code cleanup and PEP8.
+
+ Reactivated the morelikethis query.
+
+ Fixed river support plus unittest. (Tavis Aitken)
+
+ Added autorefresh to sync search and write.
+
+ Added QueryFilter.
+
+ Forced name attribute in multifield declaration.
+
+ Added is_empty to ConstantScoreQuery and fixed some bad behaviour.
+
+ Added CustomScoreQuery.
+
+ Added parent/children indexing.
+
+ Added dump commands in a script file "curl" way.
+
+ Added a lot of fix from Richard Boulton.
+
+v. 0.13.1: Added jython support (HTTP only for now).
+
+v. 0.13.0: API Changes: errors -> exceptions.
+
+ Splitting of query/filters.
+
+ Added open/close of index.
+
+ Added the number of retries if server is down.
+
+ Refactory Range query. (Andrei)
+
+ Improved HTTP connection timeout/retries. (Sandymahalo)
+
+ Cleanup some imports. (Sandymahalo)
+
+v. 0.12.1: Added collecting server info.
+
+ Version 0.12 or above requirement.
+
+ Fixed attachment plugin.
+
+ Updated bulk insert to use new api.
+
+ Added facet support (except geotypes).
+
+ Added river support.
+
+ Cleanup some method.
+
+ Added default_indexes variable.
+
+ Added datetime deserialization.
+
+ Improved performance and memory usage in bulk insert replacing list with StringIO.
+
+ Initial propagation of elasticsearch exception to python.
+
+v. 0.12.0: added http transport, added autodetect of transport, updated thrift interface.
+
+v. 0.10.3: added bulk insert, explain and facet.
+
+v. 0.10.2: added new geo query type.
+
+v. 0.10.1: added new connection pool system based on pycassa one.
+
+v. 0.10.0: initial working version.
+
+
+TODO
+----
+
+- more docs
+- more tests
+- cleanup
+- add coverage
+- add jython native client protocol
+
+License
+=======
+
+This software is licensed under the ``New BSD License``. See the ``LICENSE``
+file in the top distribution directory for the full license text.
+
+.. # vim: syntax=rst expandtab tabstop=4 shiftwidth=4 shiftround
View
2 packages/pyes/THANKS
@@ -0,0 +1,2 @@
+Thanks to kimchy and lukasvlcek.
+
View
2 packages/pyes/TODO
@@ -0,0 +1,2 @@
+Please see our Issue Tracker at GitHub:
+ http://github.com/aparo/pyes/issues
View
169 packages/pyes/contrib/mailman/archive_and_index.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2010 by the Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+# USA.
+
+"""This is a template for constructing an external archiver for situations
+where one wants to archive posts in Mailman's pipermail archive, but also
+wants to invoke some other process on the archived message after its URL
+and/or path are known.
+
+It assumes this is invoked by mm_cfg.py settings like
+PUBLIC_EXTERNAL_ARCHIVER = '/path/to/Ext_Arch.py %(hostname)s %(listname)s'
+PRIVATE_EXTERNAL_ARCHIVER = '/path/to/Ext_Arch.py %(hostname)s %(listname)s'
+
+The path in the sys.path.insert() below must be adjusted to the actual path
+to Mailman's bin/ directory, or you can simply put this script in Mailman's
+bin/ directory and it will work without the sys.path.insert() and of course,
+you must add the code you want to the ext_process function.
+"""
+
+import sys
+sys.path.insert(0, '/usr/local/mailman/bin') # path to your mailman dir
+import paths
+
+import os
+import email
+import time
+
+from cStringIO import StringIO
+
+from Mailman import Message
+from Mailman import MailList
+from Mailman.Archiver import HyperArch
+from Mailman.Logging.Syslog import syslog
+from Mailman.Logging.Utils import LogStdErr
+
+# For debugging, log stderr to Mailman's 'debug' log
+LogStdErr('debug', 'mailmanctl', manual_reprime=0)
+
+def ext_process(listname, hostname, url, filepath, msg):
+ """Here's where you put your code to deal with the just archived message.
+
+ Arguments here are the list name, the host name, the URL to the just
+ archived message, the file system path to the just archived message and
+ the message object.
+
+ These can be replaced or augmented as needed.
+ """
+ from pyes import ES
+ from pyes.exceptions import ClusterBlockException, NoServerAvailable
+ import datetime
+
+ #CHANGE this settings to reflect your configuration
+ _ES_SERVERS = ['127.0.0.1:9500'] # I prefer thrift
+ _indexname = "mailman"
+ _doctype = "mail"
+ date = datetime.datetime.today()
+
+ try:
+ iconn = ES(_ES_SERVERS)
+ status = None
+ try:
+ status = iconn.status(_indexname)
+ logger.debug("Indexer status:%s" % status)
+ except:
+ iconn.create_index(_indexname)
+ time.sleep(1)
+ status = iconn.status(_indexname)
+ mappings = { u'text': {'boost': 1.0,
+ 'index': 'analyzed',
+ 'store': 'yes',
+ 'type': u'string',
+ "term_vector" : "with_positions_offsets"},
+ u'url': {'boost': 1.0,
+ 'index': 'not_analyzed',
+ 'store': 'yes',
+ 'type': u'string',
+ "term_vector" : "no"},
+ u'title': {'boost': 1.0,
+ 'index': 'analyzed',
+ 'store': 'yes',
+ 'type': u'string',
+ "term_vector" : "with_positions_offsets"},
+ u'date': {'store': 'yes',
+ 'type': u'date'}}
+ time.sleep(1)
+ status = iconn.put_mapping(_doctype, mappings, _indexname)
+
+
+ data = dict(url=url,
+ title=msg.get('subject'),
+ date=date,
+ text=str(msg)
+ )
+ iconn.index(data, _indexname, _doctype)
+
+ syslog('debug', 'listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
+ listname, hostname, url, filepath, msg)
+ except ClusterBlockException:
+ syslog('error', 'Cluster in revocery state: listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
+ listname, hostname, url, filepath, msg)
+ except NoServerAvailable:
+ syslog('error', 'No server available: listname: %s, hostname: %s, url: %s, path: %s, msg: %s',
+ listname, hostname, url, filepath, msg)
+ except:
+ import traceback
+ syslog('error', 'Unknown: listname: %s, hostname: %s, url: %s, path: %s, msg: %s\nstacktrace: %s',
+ listname, hostname, url, filepath, msg, repr(traceback.format_exc()))
+
+ return
+
+def main():
+ """This is the mainline.
+
+ It first invokes the pipermail archiver to add the message to the archive,
+ then calls the function above to do whatever with the archived message
+ after it's URL and path are known.
+ """
+
+ listname = sys.argv[2]
+ hostname = sys.argv[1]
+
+ # We must get the list unlocked here because it is already locked in
+ # ArchRunner. This is safe because we aren't actually changing our list
+ # object. ArchRunner's lock plus pipermail's archive lock will prevent
+ # any race conditions.
+ mlist = MailList.MailList(listname, lock=False)
+
+ # We need a seekable file for processUnixMailbox()
+ f = StringIO(sys.stdin.read())
+
+ # If we don't need a Message.Message instance, we can skip the next and
+ # the imports of email and Message above.
+ msg = email.message_from_file(f, Message.Message)
+
+ h = HyperArch.HyperArchive(mlist)
+ # Get the message number for the next message
+ sequence = h.sequence
+ # and add the message.
+ h.processUnixMailbox(f)
+ f.close()
+
+ # Get the archive name, etc.
+ archive = h.archive
+ msgno = '%06d' % sequence
+ filename = msgno + '.html'
+ filepath = os.path.join(h.basedir, archive, filename)
+ h.close()
+
+ url = '%s%s/%s' % (mlist.GetBaseArchiveURL(), archive, filename)
+
+ ext_process(listname, hostname, url, filepath, msg)
+
+if __name__ == '__main__':
+ main()
View
147 packages/pyes/pavement.py
@@ -0,0 +1,147 @@
+from paver.easy import *
+from paver import doctools
+from paver.setuputils import setup
+
+options(
+ sphinx=Bunch(builddir=".build"),
+)
+
+def sphinx_builddir(options):
+ return path("docs") / options.sphinx.builddir / "html"
+
+
+@task
+def clean_docs(options):
+ sphinx_builddir(options).rmtree()
+
+
+@task
+@needs("clean_docs", "paver.doctools.html")
+def html(options):
+ destdir = path("Documentation")
+ destdir.rmtree()
+ builtdocs = sphinx_builddir(options)
+ builtdocs.move(destdir)
+
+
+@task
+@needs("paver.doctools.html")
+def qhtml(options):
+ destdir = path("Documentation")
+ builtdocs = sphinx_builddir(options)
+ sh("rsync -az %s/ %s" % (builtdocs, destdir))
+
+
+@task
+@needs("clean_docs", "paver.doctools.html")
+def ghdocs(options):
+ builtdocs = sphinx_builddir(options)
+ sh("sphinx-to-github", cwd=builtdocs)
+ sh("git checkout gh-pages && \
+ cp -r %s/* . && \
+ git commit . -m 'Rendered documentation for Github Pages.' && \
+ git push origin gh-pages && \
+ git checkout master" % builtdocs)
+
+
+@task
+@needs("clean_docs", "paver.doctools.html")
+def upload_pypi_docs(options):
+ builtdocs = path("docs") / options.builddir / "html"
+ sh("python setup.py upload_sphinx --upload-dir='%s'" % (builtdocs))
+
+
+@task
+@needs("upload_pypi_docs", "ghdocs")
+def upload_docs(options):
+ pass
+
+
+@task
+def autodoc(options):
+ sh("contrib/release/doc4allmods pyes")
+
+
+@task
+def verifyindex(options):
+ sh("contrib/release/verify-reference-index.sh")
+
+
+@task
+def flakes(options):
+ sh("find pyes -name '*.py' | xargs pyflakes")
+
+
+@task
+def clean_readme(options):
+ path("README").unlink()
+ path("README.rst").unlink()
+
+
+@task
+@needs("clean_readme")
+def readme(options):
+ sh("python contrib/release/sphinx-to-rst.py docs/templates/readme.txt \
+ > README.rst")
+ sh("ln -sf README.rst README")
+
+
+@task
+def bump(options):
+ sh("bump -c pyes")
+
+
+@task
+@cmdopts([
+ ("coverage", "c", "Enable coverage"),
+ ("quick", "q", "Quick test"),
+ ("verbose", "V", "Make more noise"),
+])
+def test(options):
+ cmd = "CELERY_LOADER=default nosetests"
+ if getattr(options, "coverage", False):
+ cmd += " --with-coverage3"
+ if getattr(options, "quick", False):
+ cmd = "QUICKTEST=1 SKIP_RLIMITS=1 %s" % cmd
+ if getattr(options, "verbose", False):
+ cmd += " --verbosity=2"
+ sh(cmd)
+
+
+@task
+@cmdopts([
+ ("noerror", "E", "Ignore errors"),
+])
+def pep8(options):
+ noerror = getattr(options, "noerror", False)
+ return sh("""find . -name "*.py" | xargs pep8 | perl -nle'\
+ print; $a=1 if $_}{exit($a)'""", ignore_error=noerror)
+
+
+@task
+def removepyc(options):
+ sh("find . -name '*.pyc' | xargs rm")
+
+
+@task
+@needs("removepyc")
+def gitclean(options):
+ sh("git clean -xdn")
+
+
+@task
+@needs("removepyc")
+def gitcleanforce(options):
+ sh("git clean -xdf")
+
+
+@task
+@needs("pep8", "autodoc", "verifyindex", "test", "gitclean")
+def releaseok(options):
+ pass
+
+
+@task
+@needs("releaseok", "removepyc", "upload_docs")
+def release(options):
+ pass
View
BIN packages/pyes/pyes/.___init__.py
Binary file not shown.
View
42 packages/pyes/pyes/__init__.py
@@ -0,0 +1,42 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import logging
+logger = logging.getLogger(__name__)
+
+VERSION = (0, 16, 0)
+
+__version__ = ".".join(map(str, VERSION[0:3])) + "".join(VERSION[3:])
+__author__ = "Alberto Paro"
+__contact__ = "alberto.paro@gmail.com"
+__homepage__ = "http://github.com/aparo/pyes/"
+__docformat__ = "restructuredtext"
+
+
+def is_stable_release():
+ if len(VERSION) > 3 and isinstance(VERSION[3], basestring):
+ return False
+ return not VERSION[1] % 2
+
+
+def version_with_meta():
+ return "%s (%s)" % (__version__,
+ is_stable_release() and "stable" or "unstable")
+
+from es import ES, file_to_attachment, decode_json
+from query import *
+from rivers import *
+from filters import *
+#from highlight import HighLighter
+from utils import *
+try:
+ #useful for additional query extra features
+ from query_extra import *
+except ImportError:
+ pass
+
+try:
+ #useful for additional features for django users
+ from djangoutils import *
+except ImportError:
+ pass
View
206 packages/pyes/pyes/connection.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+__author__ = 'Alberto Paro'
+
+"""
+Work taken from pycassa
+"""
+import logging
+import random
+import socket
+import threading
+import time
+
+from thrift import Thrift
+from thrift.transport import TTransport
+from thrift.transport import TSocket
+from thrift.protocol import TBinaryProtocol
+from pyesthrift import Rest
+
+from exceptions import NoServerAvailable
+
+__all__ = ['connect', 'connect_thread_local', 'NoServerAvailable']
+
+DEFAULT_SERVER = '127.0.0.1:9500'
+#API_VERSION = VERSION.split('.')
+
+log = logging.getLogger('pyes')
+
+class ClientTransport(object):
+ """Encapsulation of a client session."""
+
+ def __init__(self, server, framed_transport, timeout, recycle):
+ host, port = server.split(":")
+ socket = TSocket.TSocket(host, int(port))
+ if timeout is not None:
+ socket.setTimeout(timeout*1000.0)
+ if framed_transport:
+ transport = TTransport.TFramedTransport(socket)
+ else:
+ transport = TTransport.TBufferedTransport(socket)
+ protocol = TBinaryProtocol.TBinaryProtocolAccelerated(transport)
+ client = Rest.Client(protocol)
+ transport.open()
+
+# server_api_version = client.describe_version().split('.', 1)
+# assert server_api_version[0] == API_VERSION[0], \
+# "Thrift API version mismatch. " \
+# "(Client: %s, Server: %s)" % (API_VERSION[0], server_api_version[0])
+
+ self.client = client
+ self.transport = transport
+
+ if recycle:
+ self.recycle = time.time() + recycle + random.uniform(0, recycle * 0.1)
+ else:
+ self.recycle = None
+
+
+def connect(servers=None, framed_transport=False, timeout=None,
+ retry_time=60, recycle=None, round_robin=None, max_retries=3):
+ """
+ Constructs a single ElastiSearch connection. Connects to a randomly chosen
+ server on the list.
+
+ If the connection fails, it will attempt to connect to each server on the
+ list in turn until one succeeds. If it is unable to find an active server,
+ it will throw a NoServerAvailable exception.
+
+ Failing servers are kept on a separate list and eventually retried, no
+ sooner than `retry_time` seconds after failure.
+
+ Parameters
+ ----------
+ servers : [server]
+ List of ES servers with format: "hostname:port"
+
+ Default: ['127.0.0.1:9500']
+ framed_transport: bool
+ If True, use a TFramedTransport instead of a TBufferedTransport
+ timeout: float
+ Timeout in seconds (e.g. 0.5)
+
+ Default: None (it will stall forever)
+ retry_time: float
+ Minimum time in seconds until a failed server is reinstated. (e.g. 0.5)
+
+ Default: 60
+ recycle: float
+ Max time in seconds before an open connection is closed and returned to the pool.
+
+ Default: None (Never recycle)
+
+ max_retries: int
+ Max retry time on connection down
+
+ round_robin: bool
+ *DEPRECATED*
+
+ Returns
+ -------
+ ES client
+ """
+
+ if servers is None:
+ servers = [DEFAULT_SERVER]
+ return ThreadLocalConnection(servers, framed_transport, timeout,
+ retry_time, recycle, max_retries=max_retries)
+
+connect_thread_local = connect
+
+
+class ServerSet(object):
+ """Automatically balanced set of servers.
+ Manages a separate stack of failed servers, and automatic
+ retrial."""
+
+ def __init__(self, servers, retry_time=10):
+ self._lock = threading.RLock()
+ self._servers = list(servers)
+ self._retry_time = retry_time
+ self._dead = []
+
+ def get(self):
+ self._lock.acquire()
+ try:
+ if self._dead:
+ ts, revived = self._dead.pop()
+ if ts > time.time(): # Not yet, put it back
+ self._dead.append((ts, revived))
+ else:
+ self._servers.append(revived)
+ log.info('Server %r reinstated into working pool', revived)
+ if not self._servers:
+ log.critical('No servers available')
+ raise NoServerAvailable()
+ return random.choice(self._servers)
+ finally:
+ self._lock.release()
+
+ def mark_dead(self, server):
+ self._lock.acquire()
+ try:
+ self._servers.remove(server)
+ self._dead.insert(0, (time.time() + self._retry_time, server))
+ finally:
+ self._lock.release()
+
+
+class ThreadLocalConnection(object):
+ def __init__(self, servers, framed_transport=False, timeout=None,
+ retry_time=10, recycle=None, max_retries=3):
+ self._servers = ServerSet(servers, retry_time)
+ self._framed_transport = framed_transport
+ self._timeout = timeout
+ self._recycle = recycle
+ self._max_retries = max_retries
+ self._local = threading.local()
+
+ def __getattr__(self, attr):
+ def _client_call(*args, **kwargs):
+
+ for retry in xrange(self._max_retries+1):
+ try:
+ conn = self._ensure_connection()
+ return getattr(conn.client, attr)(*args, **kwargs)
+ except (Thrift.TException, socket.timeout, socket.error), exc:
+ log.exception('Client error: %s', exc)
+ self.close()
+
+ if retry < self._max_retries:
+ continue
+
+ raise NoServerAvailable
+
+ setattr(self, attr, _client_call)
+ return getattr(self, attr)
+
+ def _ensure_connection(self):
+ """Make certain we have a valid connection and return it."""
+ conn = self.connect()
+ if conn.recycle and conn.recycle < time.time():
+ log.debug('Client session expired after %is. Recycling.', self._recycle)
+ self.close()
+ conn = self.connect()
+ return conn
+
+ def connect(self):
+ """Create new connection unless we already have one."""
+ if not getattr(self._local, 'conn', None):
+ try:
+ server = self._servers.get()
+ log.debug('Connecting to %s', server)
+ self._local.conn = ClientTransport(server, self._framed_transport,
+ self._timeout, self._recycle)
+ except (Thrift.TException, socket.timeout, socket.error):
+ log.warning('Connection to %s failed.', server)
+ self._servers.mark_dead(server)
+ return self.connect()
+ return self._local.conn
+
+ def close(self):
+ """If a connection is open, close its transport."""
+ if self._local.conn:
+ self._local.conn.transport.close()
+ self._local.conn = None
View
207 packages/pyes/pyes/connection_http.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+__author__ = 'Alberto Paro'
+
+"""
+Work taken from pycassa
+"""
+import logging
+import random
+import threading
+import time
+import urllib
+from pyes.exceptions import NoServerAvailable
+import urllib3
+import urllib
+from httplib import HTTPConnection
+from fakettypes import *
+import socket
+import sys
+__all__ = ['connect', 'connect_thread_local']
+
+DEFAULT_SERVER = '127.0.0.1:9200'
+#API_VERSION = VERSION.split('.')
+
+log = logging.getLogger('pyes')
+
+class TimeoutHttpConnectionPool(urllib3.HTTPConnectionPool):
+ def _new_conn(self):
+ """
+ Return a fresh HTTPConnection with timeout passed
+ """
+ self.num_connections += 1
+ log.info("Starting new HTTP connection (%d): %s" % (self.num_connections, self.host))
+ if sys.version_info < (2, 6):
+ return HTTPConnection(host=self.host, port=int(self.port))
+ return HTTPConnection(host=self.host, port=self.port, timeout=self.timeout)
+
+
+class ClientTransport(object):
+ """Encapsulation of a client session."""
+
+ def __init__(self, server, framed_transport, timeout, recycle):
+ host, port = server.split(":")
+ self.client = TimeoutHttpConnectionPool(host, port, timeout)
+ setattr(self.client, "execute", self.execute)
+ if recycle:
+ self.recycle = time.time() + recycle + random.uniform(0, recycle * 0.1)
+ else:
+ self.recycle = None
+
+ def execute(self, request):
+ """
+ Execute a request and return a response
+ """
+ uri = request.uri
+ if request.parameters:
+ uri += '?' + urllib.urlencode(request.parameters)
+ response = self.client.urlopen(Method._VALUES_TO_NAMES[request.method], uri, body=request.body, headers=request.headers)
+ return RestResponse(status=response.status, body=response.data, headers=response.headers)
+
+def connect(servers=None, framed_transport=False, timeout=None,
+ retry_time=60, recycle=None, round_robin=None, max_retries=3):
+ """
+ Constructs a single ElastiSearch connection. Connects to a randomly chosen
+ server on the list.
+
+ If the connection fails, it will attempt to connect to each server on the
+ list in turn until one succeeds. If it is unable to find an active server,
+ it will throw a NoServerAvailable exception.
+
+ Failing servers are kept on a separate list and eventually retried, no
+ sooner than `retry_time` seconds after failure.
+
+ Parameters
+ ----------
+ servers : [server]
+ List of ES servers with format: "hostname:port"
+
+ Default: ['127.0.0.1:9200']
+ framed_transport: bool
+ If True, use a TFramedTransport instead of a TBufferedTransport
+ timeout: float
+ Timeout in seconds (e.g. 0.5)
+
+ Default: None (it will stall forever)
+ retry_time: float
+ Minimum time in seconds until a failed server is reinstated. (e.g. 0.5)
+
+ Default: 60
+ recycle: float
+ Max time in seconds before an open connection is closed and returned to the pool.
+
+ Default: None (Never recycle)
+ max_retries: int
+ Max retry time on connection down
+
+ round_robin: bool
+ *DEPRECATED*
+
+ Returns
+ -------
+ ES client
+ """
+
+ if servers is None:
+ servers = [DEFAULT_SERVER]
+ return ThreadLocalConnection(servers, framed_transport, timeout,
+ retry_time, recycle, max_retries=max_retries)
+
+connect_thread_local = connect
+
+
+class ServerSet(object):
+ """Automatically balanced set of servers.
+ Manages a separate stack of failed servers, and automatic
+ retrial."""
+
+ def __init__(self, servers, retry_time=10):
+ self._lock = threading.RLock()
+ self._servers = list(servers)
+ self._retry_time = retry_time
+ self._dead = []
+
+ def get(self):
+ self._lock.acquire()
+ try:
+ if self._dead:
+ ts, revived = self._dead.pop()
+ if ts > time.time(): # Not yet, put it back
+ self._dead.append((ts, revived))
+ else:
+ self._servers.append(revived)
+ log.info('Server %r reinstated into working pool', revived)
+ if not self._servers:
+ log.critical('No servers available')
+ raise NoServerAvailable()
+ return random.choice(self._servers)
+ finally:
+ self._lock.release()
+
+ def mark_dead(self, server):
+ self._lock.acquire()
+ try:
+ self._servers.remove(server)
+ self._dead.insert(0, (time.time() + self._retry_time, server))
+ finally:
+ self._lock.release()
+
+
+class ThreadLocalConnection(object):
+ def __init__(self, servers, framed_transport=False, timeout=None,
+ retry_time=10, recycle=None, max_retries=3):
+ self._servers = ServerSet(servers, retry_time)
+ self._framed_transport = framed_transport #not used in http
+ self._timeout = timeout
+ self._recycle = recycle
+ self._max_retries = max_retries
+ self._local = threading.local()
+
+ def __getattr__(self, attr):
+ def _client_call(*args, **kwargs):
+
+ for retry in xrange(self._max_retries+1):
+ try:
+ conn = self._ensure_connection()
+ return getattr(conn.client, attr)(*args, **kwargs)
+ except (socket.timeout, socket.error), exc:
+ log.exception('Client error: %s', exc)
+ self.close()
+
+ if retry < self._max_retries:
+ continue
+
+ raise urllib3.MaxRetryError
+
+ setattr(self, attr, _client_call)
+ return getattr(self, attr)
+
+ def _ensure_connection(self):
+ """Make certain we have a valid connection and return it."""
+ conn = self.connect()
+ if conn.recycle and conn.recycle < time.time():
+ log.debug('Client session expired after %is. Recycling.', self._recycle)
+ self.close()
+ conn = self.connect()
+ return conn
+
+ def connect(self):
+ """Create new connection unless we already have one."""
+ if not getattr(self._local, 'conn', None):
+ try:
+ server = self._servers.get()
+ log.debug('Connecting to %s', server)
+ self._local.conn = ClientTransport(server, self._framed_transport,
+ self._timeout, self._recycle)
+ except (socket.timeout, socket.error):
+ log.warning('Connection to %s failed.', server)
+ self._servers.mark_dead(server)
+ return self.connect()
+ return self._local.conn
+
+ def close(self):
+ """If a connection is open, close it."""
+# if self._local.conn:
+# self._local.conn.transport.close()
+ self._local.conn = None
View
77 packages/pyes/pyes/convert_errors.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""Routines for converting error responses to appropriate exceptions.
+
+"""
+
+__author__ = 'Richard Boulton'
+
+__all__ = ['raise_if_error']
+
+import pyes.exceptions
+
+# Patterns used to map exception strings to classes.
+
+# First, exceptions for which the messages start with the error name,
+# and then contain the error description wrapped in [].
+exceptions_by_name = dict((name, getattr(pyes.exceptions, name))
+ for name in (
+ 'ElasticSearchIllegalArgumentException',
+ 'IndexAlreadyExistsException',
+ 'IndexMissingException',
+ 'SearchPhaseExecutionException',
+ 'ReplicationShardOperationFailedException',
+ 'ClusterBlockException',
+ 'MapperParsingException',
+ )
+)
+
+# Second, patterns for exceptions where the message is just the error
+# description, and doesn't contain an error name. These patterns are matched
+# at the end of the exception.
+exception_patterns_trailing = {
+ '] missing': pyes.exceptions.NotFoundException,
+ '] Already exists': pyes.exceptions.AlreadyExistsException,
+}
+
+def raise_if_error(status, result):
+ """Raise an appropriate exception if the result is an error.
+
+ Any result with a status code of 400 or higher is considered an error.
+
+ The exception raised will either be an ElasticSearchException, or a more
+ specific subclass of ElasticSearchException if the type is recognised.
+
+ The status code and result can be retrieved from the exception by accessing its
+ status and result properties.
+
+ """
+ assert isinstance(status, int)
+
+ if status < 400:
+ return
+
+ if status == 404 and isinstance(result, dict) and result.get('ok'):
+ raise pyes.exceptions.NotFoundException("Item not found", status, result)
+
+ if not isinstance(result, dict) or 'error' not in result:
+ raise pyes.exceptions.ElasticSearchException("Unknown exception type", status, result)
+
+ error = result['error']
+ bits = error.split('[', 1)
+ if len(bits) == 2:
+ excClass = exceptions_by_name.get(bits[0], None)
+ if excClass is not None:
+ msg = bits[1]
+ if msg.endswith(']'):
+ msg = msg[:-1]
+ raise excClass(msg, status, result)
+
+ for pattern, excClass in exception_patterns_trailing.iteritems():
+ if not error.endswith(pattern):
+ continue
+ # For these exceptions, the returned value is the whole descriptive
+ # message.
+ raise excClass(error, status, result)
+
+ raise pyes.exceptions.ElasticSearchException(error, status, result)
View
125 packages/pyes/pyes/djangoutils.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+__author__ = 'Alberto Paro'
+__all__ = ["get_values"]
+
+#useful to raise ad invali import
+import django
+
+from types import NoneType
+import datetime
+
+#--- taken from http://djangosnippets.org/snippets/2278/
+
+def get_values(instance, go_into={}, exclude=(), extra=(), skip_none=False):
+ """
+ Transforms a django model instance into an object that can be used for
+ serialization.
+ @param instance(django.db.models.Model) - the model in question
+ @param go_into(dict) - relations with other models that need expanding
+ @param exclude(tuple) - fields that will be ignored
+ @param extra(tuple) - additional functions/properties which are not fields
+ @param skip_none(bool) - skip None field
+
+ Usage:
+ get_values(MyModel.objects.get(pk=187),
+ {'user': {'go_into': ('clan',),
+ 'exclude': ('crest_blob',),
+ 'extra': ('get_crest_path',)}},
+ ('image'))
+
+ """
+ from django.db.models.manager import Manager
+ from django.db.models import Model
+
+ SIMPLE_TYPES = (int, long, str, list, dict, tuple, bool, float, bool,
+ unicode, NoneType)
+
+ if not isinstance(instance, Model):
+ raise TypeError("Argument is not a Model")
+
+ value = {
+ 'pk': instance.pk,
+ }
+
+ # check for simple string instead of tuples
+ # and dicts; this is shorthand syntax
+ if isinstance(go_into, str):
+ go_into = {go_into: {}}
+
+ if isinstance(exclude, str):
+ exclude = (exclude,)
+
+ if isinstance(extra, str):
+ extra = (extra,)
+
+ # process the extra properties/function/whatever
+ for field in extra:
+ property = getattr(instance, field)
+
+ if callable(property):
+ property = property()
+
+ if skip_none and property is None:
+ continue
+ elif isinstance(property, SIMPLE_TYPES):
+ value[field] = property
+ else:
+ value[field] = repr(property)
+
+ field_options = instance._meta.get_all_field_names()
+ for field in field_options:
+ try:
+ property = getattr(instance, field)
+ except:
+ continue
+ if skip_none and property is None:
+ continue
+
+ if field in exclude or field[0] == '_' or isinstance(property, Manager):
+ # if it's in the exclude tuple, ignore it
+ # if it's a "private" field, ignore it
+ # if it's an instance of manager (this means a more complicated
+ # relationship), ignore it
+ continue
+ elif go_into.has_key(field):
+ # if it's in the go_into dict, make a recursive call for that field
+ try:
+ field_go_into = go_into[field].get('go_into', {})
+ except AttributeError:
+ field_go_into = {}
+
+ try:
+ field_exclude = go_into[field].get('exclude', ())
+ except AttributeError:
+ field_exclude = ()
+
+ try:
+ field_extra = go_into[field].get('extra', ())
+ except AttributeError:
+ field_extra = ()
+
+ value[field] = get_values(property,
+ field_go_into,
+ field_exclude,
+ field_extra, skip_none=skip_none)
+ else:
+ if isinstance(property, Model):
+ # if it's a model, we need it's PK #
+ value[field] = property.pk
+ elif isinstance(property, (datetime.date,
+ datetime.time,
+ datetime.datetime)):
+ value[field] = property
+ else:
+ # else, we just put the value #
+ if callable(property):
+ property = property()
+
+ if isinstance(property, SIMPLE_TYPES):
+ value[field] = property
+ else:
+ value[field] = repr(property)
+
+ return value
View
1,017 packages/pyes/pyes/es.py
@@ -0,0 +1,1017 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from __future__ import with_statement
+
+__author__ = 'Alberto Paro'
+__all__ = ['ES', 'file_to_attachment', 'decode_json']
+
+try:
+ # For Python >= 2.6
+ import json
+except ImportError:
+ # For Python < 2.6 or people using a newer version of simplejson
+ import simplejson as json
+
+import logging
+from datetime import date, datetime
+import base64
+import time
+from StringIO import StringIO
+from decimal import Decimal
+
+try:
+ from connection import connect as thrift_connect
+ from pyesthrift.ttypes import Method, RestRequest
+ thrift_enable = True
+except ImportError:
+ from fakettypes import Method, RestRequest
+ thrift_enable = False
+
+from connection_http import connect as http_connect
+log = logging.getLogger('pyes')
+from mappings import Mapper
+
+from convert_errors import raise_if_error
+import pyes.exceptions
+
+def file_to_attachment(filename):
+ """
+ Convert a file to attachment
+ """
+ with open(filename, 'rb') as _file:
+ return {'_name':filename,
+ 'content':base64.b64encode(_file.read())
+ }
+
+class ESJsonEncoder(json.JSONEncoder):
+ def default(self, value):
+ """Convert rogue and mysterious data types.
+ Conversion notes:
+
+ - ``datetime.date`` and ``datetime.datetime`` objects are
+ converted into datetime strings.
+ """
+
+ if isinstance(value, datetime):
+ return value.strftime("%Y-%m-%dT%H:%M:%S")
+ elif isinstance(value, date):
+ dt = datetime(value.year, value.month, value.day, 0, 0, 0)
+ return dt.strftime("%Y-%m-%dT%H:%M:%S")
+ elif isinstance(value, Decimal):
+ return float(str(value))
+ else:
+ # use no special encoding and hope for the best
+ return value
+
+class ESJsonDecoder(json.JSONDecoder):
+ def __init__(self, *args, **kwargs):
+ kwargs['object_hook'] = self.dict_to_object
+ json.JSONDecoder.__init__(self, *args, **kwargs)
+
+ def string_to_datetime(self, obj):
+ """Decode a datetime string to a datetime object
+ """
+ if isinstance(obj, basestring) and len(obj) == 19:
+ try:
+ return datetime(*obj.strptime("%Y-%m-%dT%H:%M:%S")[:6])
+ except:
+ pass
+ return obj
+
+ def dict_to_object(self, d):
+ """
+ Decode datetime value from string to datetime
+ """
+ for k, v in d.items():
+ if isinstance(v, basestring) and len(v) == 19:
+ try:
+ d[k] = datetime(*time.strptime(v, "%Y-%m-%dT%H:%M:%S")[:6])
+ except ValueError:
+ pass
+ elif isinstance(v, list):
+ d[k] = [self.string_to_datetime(elem) for elem in v]
+ return d
+
+class ES(object):
+ """
+ ES connection object.
+ """
+
+ def __init__(self, server, timeout=5.0, bulk_size=400,
+ encoder=None, decoder=None,
+ max_retries=3, autorefresh=False,
+ default_indexes=['_all'],
+ dump_curl=False):
+ """
+ Init a es object
+
+ server: the server name, it can be a list of servers
+ timeout: timeout for a call
+ bulk_size: size of bulk operation
+ encoder: tojson encoder
+ max_retries: number of max retries for server if a server is down
+ autorefresh: check if need a refresh before a query
+
+ dump_curl: If truthy, this will dump every query to a curl file. If
+ this is set to a string value, it names the file that output is sent
+ to. Otherwise, it should be set to an object with a write() method,
+ which output will be written to.
+
+ """
+ self.timeout = timeout
+ self.max_retries = max_retries
+ self.cluster = None
+ self.debug_dump = False
+ self.cluster_name = "undefined"
+ self.connection = None
+ self.autorefresh = autorefresh
+ self.refreshed = True
+ if dump_curl:
+ if isinstance(dump_curl, basestring):
+ self.dump_curl = open(dump_curl, "wb")
+ elif hasattr(dump_curl, 'write'):
+ self.dump_curl = dump_curl
+ else:
+ raise TypeError("dump_curl parameter must be supplied with a "
+ "string or an object with a write() method")
+ else:
+ self.dump_curl = None
+
+ #used in bulk
+ self.bulk_size = bulk_size #size of the bulk
+ self.bulk_data = StringIO()
+ self.bulk_items = 0
+
+ self.info = {} #info about the current server
+ self.encoder = encoder
+ if self.encoder is None:
+ self.encoder = ESJsonEncoder
+ self.decoder = decoder
+ if self.decoder is None:
+ self.decoder = ESJsonDecoder
+ if isinstance(server, (str, unicode)):
+ self.servers = [server]
+ else:
+ self.servers = server
+ self.default_indexes = default_indexes
+ self._init_connection()
+
+ def __del__(self):
+ """
+ Destructor
+ """
+ if self.bulk_items > 0:
+ self.flush()
+
+ def _init_connection(self):
+ """
+ Create initial connection pool
+ """
+ #detect connectiontype
+ port = self.servers[0].split(":")[1]
+ if port.startswith("92"):
+ self.connection = http_connect(self.servers, timeout=self.timeout, max_retries=self.max_retries)
+ return
+ if not thrift_enable:
+ raise RuntimeError("If you want to use thrift, please install pythrift")
+ self.connection = thrift_connect(self.servers, timeout=self.timeout, max_retries=self.max_retries)
+
+ def _discovery(self):
+ """
+ Find other servers asking nodes to given server
+ """
+ data = self.cluster_nodes()
+ self.cluster_name = data["cluster_name"]
+ for _, nodedata in data["nodes"].items():
+ server = nodedata['http_address'].replace("]", "").replace("inet[", "http:/")
+ if server not in self.servers:
+ self.servers.append(server)
+ self._init_connection()
+ return self.servers
+
+ def _send_request(self, method, path, body=None, params={}):
+ # prepare the request
+ if not path.startswith("/"):
+ path = "/" + path
+ if not self.connection:
+ self._init_connection()
+ if body:
+ if isinstance(body, dict):
+ body = json.dumps(body, cls=self.encoder)
+ else:
+ body = ""
+ request = RestRequest(method=Method._NAMES_TO_VALUES[method.upper()], uri=path, parameters=params, headers={}, body=body)
+ if self.dump_curl is not None:
+ self._dump_curl_request(request)
+
+ # execute the request
+ response = self.connection.execute(request)
+
+ # handle the response
+ try:
+ decoded = json.loads(response.body, cls=self.decoder)
+ except ValueError:
+ try:
+ decoded = json.loads(response.body, cls=ESJsonDecoder)
+ except ValueError:
+ # The only known place where we get back a body which can't be
+ # parsed as JSON is when no handler is found for a request URI.
+ # In this case, the body is actually a good message to return
+ # in the exception.
+ raise pyes.exceptions.ElasticSearchException(response.body, response.status, response.body)
+ if response.status != 200:
+ raise_if_error(response.status, decoded)
+ return decoded
+
+ def _make_path(self, path_components):
+ """
+ Smush together the path components. Empty components will be ignored.
+ """
+ path_components = [str(component) for component in path_components if component]
+ path = '/'.join(path_components)
+ if not path.startswith('/'):
+ path = '/' + path
+ return path
+
+ def _query_call(self, query_type, query, indexes=None, doc_types=None, **query_params):
+ """
+ This can be used for search and count calls.
+ These are identical api calls, except for the type of query.
+ """
+ if self.autorefresh and self.refreshed == False:
+ self.refresh(indexes)
+ querystring_args = query_params
+ indexes = self._validate_indexes(indexes)
+ if doc_types is None:
+ doc_types = []
+ if isinstance(doc_types, basestring):
+ doc_types = [doc_types]
+ body = query
+ path = self._make_path([','.join(indexes), ','.join(doc_types), query_type])
+ response = self._send_request('GET', path, body, querystring_args)
+ return response
+
+ def _validate_indexes(self, indexes=None):
+ """Return a valid list of indexes.
+
+ `indexes` may be a string or a list of strings.
+ If `indexes` is not supplied, returns the default_indexes.
+
+ """
+ indexes = indexes or self.default_indexes
+ if isinstance(indexes, basestring):
+ indexes = [indexes]
+ return indexes
+
+ def _dump_curl_request(self, request):
+ self.dump_curl.write("# [%s]\n" % datetime.now().isoformat())
+ self.dump_curl.write("curl -X" + Method._VALUES_TO_NAMES[request.method])
+ self.dump_curl.write(" http://" + self.servers[0] + request.uri)
+ if request.body:
+ self.dump_curl.write(" -d \"%s\"" % request.body.replace('"', '\\"'))
+ self.dump_curl.write("\n")
+
+ #---- Admin commands
+ def status(self, indexes=None):
+ """
+ Retrieve the status of one or more indices
+ """
+ indexes = self._validate_indexes(indexes)
+ path = self._make_path([','.join(indexes), '_status'])
+ return self._send_request('GET', path)
+
+ def create_index(self, index, settings=None):
+ """
+ Creates an index with optional settings.
+ Settings must be a dictionary which will be converted to JSON.
+ Elasticsearch also accepts yaml, but we are only passing JSON.
+ """
+ return self._send_request('PUT', index, settings)
+
+ def create_index_if_missing(self, index, settings=None):
+ """Creates an index if it doesn't already exist.
+
+ If supplied, settings must be a dictionary.
+