diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..60c018b --- /dev/null +++ b/.coveragerc @@ -0,0 +1,7 @@ +[run] +source = esengine + +[report] +omit = + */python?.?/* + */site-packages/nose/* diff --git a/.coveralls.yml b/.coveralls.yml new file mode 100644 index 0000000..6ff03ac --- /dev/null +++ b/.coveralls.yml @@ -0,0 +1 @@ +repo_token: DzZ30nm43hTFokYZPwlYbIBLGju5D0QI4 diff --git a/.landscape.yaml b/.landscape.yaml new file mode 100644 index 0000000..e65b0f4 --- /dev/null +++ b/.landscape.yaml @@ -0,0 +1,22 @@ +pylint: + disable: + - bare-except + - unused-argument + - pointless-string-statement + - too-many-locals + - too-many-arguments + - protected-access + - unused-variable + - super-on-old-class + +pep8: + disable: + - E1002 + +ignore-paths: + - tests + +requirements: + - test.req + +#max-line-length: 120 diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..7fc1e70 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +language: python +before_install: + - curl -O https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-1.7.3.deb && sudo dpkg -i --force-confnew elasticsearch-1.7.3.deb +before_script: + - sleep 10 +python: + - "2.7" +services: elasticsearch +install: + - "pip install --upgrade -r test.req" +script: make test +after_success: + - coveralls +notifications: + slack: catholabs:9yCjbY6Jgn3Xdy9hwq6PyLEJ diff --git a/LICENSE b/LICENSE index 8cdb845..47ed0f1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,340 +1,22 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Lesser General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - {description} - Copyright (C) {year} {fullname} - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - {signature of Ty Coon}, 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. - +Copyright (c) 2015 CathoLabs.com / Catho.com.br + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f1f34b6 --- /dev/null +++ b/Makefile @@ -0,0 +1,21 @@ +.PHONY: test +test: pep8 + py.test -v --cov=esengine -l --tb=short --maxfail=1 tests/ + +.PHONY: install +install: + python setup.py develop + +.PHONY: pep8 +pep8: + @flake8 esengine --ignore=F403 + +.PHONY: sdist +sdist: test + @python setup.py sdist upload + +.PHONY: clean +clean: + @find ./ -name '*.pyc' -exec rm -f {} \; + @find ./ -name 'Thumbs.db' -exec rm -f {} \; + @find ./ -name '*~' -exec rm -f {} \; diff --git a/README.md b/README.md new file mode 100644 index 0000000..5776dd6 --- /dev/null +++ b/README.md @@ -0,0 +1,322 @@ +[![Travis CI](http://img.shields.io/travis/catholabs/esengine.svg)](https://travis-ci.org/catholabs/esengine) +[![Coverage Status](http://img.shields.io/coveralls/catholabs/esengine.svg)](https://coveralls.io/r/catholabs/esengine) +[![Code Health](https://landscape.io/github/catholabs/esengine/development/landscape.svg?style=flat)](https://landscape.io/github/catholabs/esengine/development) +Small Acts Manifesto + +# ESEngine - ElasticSearch ODM +## (Object Document Mapper) inspired by MongoEngine + +

+ EsEngine +

+ + +# install + +ESengine depends on elasticsearch-py (Official E.S Python library) so the instalation +depends on the version of elasticsearch you are using. + + +## Elasticsearch 2.x + +```bash +pip install esengine[es2] +``` + +## Elasticsearch 1.x + +```bash +pip install esengine[es1] +``` + +## Elasticsearch 0.90.x + +```bash +pip install esengine[es0] +``` + +The above command will install esengine and the elasticsearch library specific for you ES version. + + +> Alternatively you can install elasticsearch library before esengine + +pip install ```` + +- for 2.0 + use "elasticsearch>=2.0.0,<3.0.0" +- for 1.0 + use "elasticsearch>=1.0.0,<2.0.0" +- under 1.0 use "elasticsearch<1.0.0" + +Then install esengine + +```bash +pip install esengine +``` + +# Getting started + +```python +from elasticsearch import ElasticSearch +from esengine import Document, StringField + +es = ElasticSearch(host='host', port=port) +``` + +# Defining a document + +```python +class Person(Document): + _doctype = "person" + _index = "universe" + + name = StringField() + +``` + +> If you do not specify an "id" field, ESEngine will automatically add "id" as StringField. It is recommended that when specifying you use StringField for ids. + +# Indexing + +```python +person = Person(id=1234, name="Gonzo") +person.save(es=es) +``` + +# Getting by id + +```python +Person.get(id=1234, es=es) +``` + +# filtering by IDS + +```python +ids = [1234, 5678, 9101] +power_trio = Person.filter(ids=ids) +``` + + +# filtering by fields + +```python +Person.filter(name="Gonzo", es=es) +``` + +# Searching + +ESengine does not try to create abstraction for query building, +by default ESengine only implements search transport receiving a raw ES query +in form of a Python dictionary. + +```python +query = { + "query": { + "filtered": { + "query": { + "match_all": {} + }, + "filter": { + "ids": { + "values": list(ids) + } + } + } + } +} +Person.search(query, size=10, es=es) +``` + +# Default connection + +By default ES engine does not try to implicit create a connection for you, +but you can easily achieve this overwriting the **get_es** method and returning a +default connection or using any kind of technique as RoundRobin or Mocking for tests +Also you can set the **_es** attribute pointing to a function generating the connection client +or the client instance as the following example: + +```python + +from elasticsearch import ElasticSearch +from esengine import Document, StringField +from esengine.utils import validate_client + + +class Person(Document): + _doctype = "person" + _index = "universe" + _es = Elasticsearch(host='10.0.0.0') + + name = StringField() + +``` + +# Now you can use the document transport methods ommiting ES instance + + +```python +person = Person(id=1234, name="Gonzo") +person.save() + +Person.get(id=1234) + +Person.filter(name="Gonzo") +``` + + +# Updating + +## A single document + +A single document can be updated simply using the **.save()** method + +```python + +person = Person.get(id=1234) +person.name = "Another Name" +person.save() + +``` + +## Updating a Resultset + +The Document methods **.get**, **.filter** and **.search** will return an instance +of **ResultSet** object. This object is an Iterator containing the **hits** reached by +the filtering or search process and exposes some CRUD methods[ **update**, **delete** and **reload** ] +to deal with its results. + + +```python +people = Person.filter(field='value') +people.update(another_field='another_value') +``` + +> When updating documents sometimes you need the changes done in the E.S index reflected in the objects +of the **ResultSet** iterator, so you can use **.reload** method to perform that action. + + +## The use of **reload** method + +```python +people = Person.filter(field='value') +print people +... + +# Updating another field on both instances +people.update(another_field='another_value') +print people +... + +# Note that in E.S index the values weres changed but the current ResultSet is not updated by defaul +# you have to fire an update +people.reload() + +print people +... + + +``` + +## Deleting documents + + +### A ResultSet + +```python +people = Person.all() +people.delete() +``` + +### A single document + +```python +Person.get(id=123).delete() +``` + +# Bulk operations + +ESEngine takes advantage of elasticsearch-py helpers for bulk actions, +the **ResultSet** object uses **bulk** melhod to **update** and **delete** documents. + +But you can use it in a explicit way using Document's **update_all**, **save__all** and **delete_all** methods. + +### Lets create a bunch of document instances + + +```python +top_5_racing_bikers = [] + +for name in ['Eddy Merckx', + 'Bernard Hinault', + 'Jacques Anquetil', + 'Sean Kelly', + 'Lance Armstrong']: + top_5_racing_bikers.append(Person(name=name)) +``` + +### Save it all + +```python +Person.save_all(top_5_racing_bikers) +``` + +### Using the **create** shortcut + +The above could be achieved using **create** shortcut + + +#### A single + +```python +Person.create(name='Eddy Merckx', active=False) +``` + +> Create will return the instance of the indexed Document + +#### All using list comprehension + +```python +top_5_racing_bikers = [ + Person.create(name=name, active=False) + for name in ['Eddy Merckx', + 'Bernard Hinault', + 'Jacques Anquetil', + 'Sean Kelly', + 'Lance Armstrong'] +] + +``` +> NOTE: **.create** method will automatically save the document to the index, and +will not raise an error if there is a document with the same ID (if specified), it will update it acting as upsert. + +### Updating all + +Turning the field **active** to **True** for all documents + +```python +Person.update_all(top_5_racing_bikes, active=True) +``` + +### Deleting all + +```python +Person.delete_all(top_5_racing_bikes) +``` + + +### Chunck size + +chunk_size is number of docs in one chunk sent to ES (default: 500) +you can change using **meta** argument. + +```python +Person.update_all( + top_5_racing_bikes, # the documents + active=True, # values to be changed + metal={'chunk_size': 200} # meta data passed to **bulk** operation +) +``` + +# Contribute + +ESEngine is OpenSource! join us! diff --git a/es_engine/__init__.py b/es_engine/__init__.py deleted file mode 100644 index bc7c984..0000000 --- a/es_engine/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from es_engine.embedded_document import EmbeddedDocument # noqa -from es_engine.fields import IntegerField # noqa -from es_engine.fields import DateField # noqa -from es_engine.fields import StringField # noqa -from es_engine.fields import FloatField # noqa -from es_engine.document import Document # noqa - -# TODO: unit tests diff --git a/es_engine/bases/document.py b/es_engine/bases/document.py deleted file mode 100644 index e58ea96..0000000 --- a/es_engine/bases/document.py +++ /dev/null @@ -1,39 +0,0 @@ -class BaseDocument(object): - def _initialize_multi_fields(self): - for key, field_class in self.__class__._fields.items(): - if field_class._multi: - setattr(self, key, []) - else: - setattr(self, key, None) - - def __init__(self, *args, **kwargs): - klass = self.__class__.__name__ - if not hasattr(self, '__doc_type__'): - raise ValueError('{} have no __doc_type__ field'.format(klass)) - if not hasattr(self, '__index__'): - raise ValueError('{} have no __index__ field'.format(klass)) - self._initialize_multi_fields() - for key, value in kwargs.iteritems(): - setattr(self, key, value) - - def __setattr__(self, key, value): - if (not key.startswith('_')) and key not in self._fields: - raise KeyError('`{}` is an invalid field'.format(key)) - super(BaseDocument, self).__setattr__(key, value) - - def to_dict(self): - result = {} - for field_name, field_class in self._fields.iteritems(): - value = getattr(self, field_name) - field_class.validate(field_name, value) - result.update({field_name: field_class.to_dict(value)}) - return result - - @classmethod - def from_dict(cls, dct): - params = {} - for field_name, field_class in cls._fields.iteritems(): - serialized = dct.get(field_name) - value = field_class.from_dict(serialized) - params[field_name] = value - return cls(**params) diff --git a/es_engine/document.py b/es_engine/document.py deleted file mode 100644 index ba19fad..0000000 --- a/es_engine/document.py +++ /dev/null @@ -1,61 +0,0 @@ -import elasticsearch.helpers as eh - -from es_engine.bases.document import BaseDocument -from es_engine.bases.metaclass import ModelMetaclass - - -class Document(BaseDocument): - __metaclass__ = ModelMetaclass - - def save(self, es): - doc = self.to_dict() - es.index(index=self.__index__, - doc_type=self.__doc_type__, - id=self.id, - body=doc) - - @classmethod - def get(cls, es, id=None, ids=None): - if id is not None and ids is not None: - raise ValueError('id and ids can not be passed together.') - if id is not None: - res = es.get(index=cls.__index__, - doc_type=cls.__doc_type__, - id=id) - return cls.from_dict(dct=res['_source']) - if ids is not None: - query = { - "query": { - "filtered": { - "query": {"match_all": {}}, - "filter": { - "ids": { - "values": list(ids) - } - } - } - }} - resp = es.search( - index=cls.__index__, - doc_type=cls.__doc_type__, - body=query, - size=len(ids) - ) - result = [] - for obj in resp['hits']['hits']: - result.append(cls.from_dict(dct=obj['_source']['doc'])) - return result - - @classmethod - def save_all(cls, es, docs): - updates = [ - { - '_op_type': 'index', - '_index': cls.__index__, - '_type': cls.__doc_type__, - '_id': doc.id, - 'doc': doc.to_dict() - } - for doc in docs - ] - eh.bulk(es, updates) diff --git a/es_engine/fields.py b/es_engine/fields.py deleted file mode 100644 index 99cc695..0000000 --- a/es_engine/fields.py +++ /dev/null @@ -1,45 +0,0 @@ -from datetime import datetime - -from es_engine.bases.field import BaseField - - -class IntegerField(BaseField): - __type__ = int - - -class StringField(BaseField): - __type__ = unicode - - -class FloatField(BaseField): - __type__ = float - - -class DateField(BaseField): - __type__ = datetime - - def to_dict(self, value): - return value.strftime("%Y-%m-%d %H:%M:%S") - - def from_dict(self, serialized): - if self._multi: - values = [] - for elem in serialized: - if isinstance(elem, self.__type__): - values.append(elem) - elif isinstance(elem, basestring): - date = datetime.strptime(elem, "%Y-%m-%d %H:%M:%S") - values.append(date) - else: - raise ValueError('Expected str or date. {} found'.format( - elem.__class__) - ) - return values - else: - if isinstance(serialized, self.__type__): - return serialized - elif isinstance(serialized, basestring): - return datetime.strptime(serialized, "%Y-%m-%d %H:%M:%S") - raise ValueError('Expected str or date. {} found'.format( - serialized.__class__) - ) diff --git a/esengine/__init__.py b/esengine/__init__.py new file mode 100644 index 0000000..3ee3cc5 --- /dev/null +++ b/esengine/__init__.py @@ -0,0 +1,3 @@ +from esengine.embedded_document import EmbeddedDocument # noqa +from esengine.document import Document # noqa +from esengine.fields import * # noqa diff --git a/es_engine/bases/__init__.py b/esengine/bases/__init__.py similarity index 100% rename from es_engine/bases/__init__.py rename to esengine/bases/__init__.py diff --git a/esengine/bases/document.py b/esengine/bases/document.py new file mode 100644 index 0000000..92d6174 --- /dev/null +++ b/esengine/bases/document.py @@ -0,0 +1,54 @@ +import warnings +from esengine.fields import StringField + + +class BaseDocument(object): + _strict = False + + def _initialize_multi_fields(self): + for key, field_class in self.__class__._fields.items(): + if field_class._multi: + setattr(self, key, []) + else: + setattr(self, key, None) + + def __init__(self, *args, **kwargs): + klass = self.__class__.__name__ + if not hasattr(self, '_doctype'): + raise ValueError('{} have no _doctype attribute'.format(klass)) + if not hasattr(self, '_index'): + raise ValueError('{} have no _index attribute'.format(klass)) + id_field = self.__class__._fields.get("id") + if id_field and not isinstance(id_field, StringField): + warnings.warn( + 'To avoid mapping problems, ' + 'it is recommended to define the id field as a StringField' + ) + self._initialize_multi_fields() + for key, value in kwargs.iteritems(): + setattr(self, key, value) + + def __setattr__(self, key, value): + if (not key.startswith('_')) and key not in self._fields: + raise KeyError('`{}` is an invalid field'.format(key)) + field_instance = self._fields.get(key) + if field_instance and not self._strict: + value = field_instance.from_dict(value) + super(BaseDocument, self).__setattr__(key, value) + + def to_dict(self): + result = {} + for field_name, field_instance in self._fields.iteritems(): + value = getattr(self, field_name) + field_instance.validate(field_name, value) + result.update({field_name: field_instance.to_dict(value)}) + return result + + @classmethod + def from_dict(cls, dct): + params = {} + for field_name, field_instance in cls._fields.iteritems(): + serialized = dct.get(field_name) + value = field_instance.from_dict(serialized) + params[field_name] = value + return cls(**params) diff --git a/es_engine/bases/field.py b/esengine/bases/field.py similarity index 58% rename from es_engine/bases/field.py rename to esengine/bases/field.py index 4d7f249..a6f7cbb 100644 --- a/es_engine/bases/field.py +++ b/esengine/bases/field.py @@ -1,16 +1,16 @@ from collections import Iterable -from es_engine.exceptions import RequiredField, InvalidMultiField -from es_engine.exceptions import FieldTypeMismatch +from esengine.exceptions import RequiredField, InvalidMultiField +from esengine.exceptions import FieldTypeMismatch class BaseField(object): def __init__(self, field_type=None, required=False, multi=False, **kwargs): if field_type is not None: - self.__type__ = field_type - self._required = required - self._multi = multi + self._type = field_type + self._required = required or getattr(self, '_required', False) + self._multi = multi or getattr(self, '_multi', False) for key, value in kwargs.iteritems(): setattr(self, key, value) @@ -23,18 +23,19 @@ def validate(self, field_name, value): if not isinstance(value, Iterable): raise InvalidMultiField(field_name) for elem in value: - if not isinstance(elem, self.__type__): - raise FieldTypeMismatch(field_name, self.__type__, + if not isinstance(elem, self._type): + raise FieldTypeMismatch(field_name, self._type, elem.__class__) else: - if not isinstance(value, self.__type__): - raise FieldTypeMismatch(field_name, self.__type__, + if not isinstance(value, self._type): + raise FieldTypeMismatch(field_name, self._type, value.__class__) def to_dict(self, value): return value def from_dict(self, serialized): - if self._multi: - return [self.__type__(x) for x in serialized] - return self.__type__(serialized) + if serialized is not None: + if self._multi: + return [self._type(x) for x in serialized] + return self._type(serialized) diff --git a/es_engine/bases/metaclass.py b/esengine/bases/metaclass.py similarity index 55% rename from es_engine/bases/metaclass.py rename to esengine/bases/metaclass.py index 79c3a01..1bd396c 100644 --- a/es_engine/bases/metaclass.py +++ b/esengine/bases/metaclass.py @@ -1,14 +1,20 @@ -from es_engine.bases.field import BaseField +from esengine.fields import StringField +from esengine.bases.field import BaseField class ModelMetaclass(type): def __new__(mcls, name, bases, attrs): # noqa attrs['_fields'] = {} + for base in bases: + if hasattr(base, '_autoid'): + if base._autoid and 'id' not in attrs: + attrs['id'] = StringField() + break for key, value in attrs.iteritems(): if isinstance(value, BaseField): attrs['_fields'][key] = value cls = type.__new__(mcls, name, bases, attrs) if any(x.__name__ == 'EmbeddedDocument' for x in bases): - cls.__type__ = cls + cls._type = cls return cls diff --git a/esengine/bases/result.py b/esengine/bases/result.py new file mode 100644 index 0000000..a619b65 --- /dev/null +++ b/esengine/bases/result.py @@ -0,0 +1,77 @@ +# coding: utf-8 +import time +import elasticsearch.helpers as eh + + +class ResultSet(object): + def __init__(self, values, model, query=None, + size=None, es=None, meta=None): + self._model = model + self._values = values + self._query = query + self._es = model.get_es(es) + self._size = size + self._meta = meta + self._all_values = [] + + def __iter__(self): + return self.values + + @property + def values(self): + return ( + self._model.from_dict(dct=value) + for value in self._values + ) + + @property + def all_values(self): + if not self._all_values: + self._all_values = [i for i in self.values] + return self._all_values + + def __getitem__(self, item): + return self.all_values[item] + + def reload(self, sleep=1): + time.sleep(sleep) + self._all_values = [] + resp = self._es.search( + index=self._model._index, + doc_type=self._model._doctype, + body=self._query, + size=self._size or len(self._values) + ) + self._values = [obj['_source'] for obj in resp['hits']['hits']] + + def update(self, meta=None, **kwargs): + if kwargs: + actions = [ + { + '_op_type': 'update', + '_index': self._model._index, + '_type': self._model._doctype, + '_id': doc.id, + 'doc': kwargs + } + for doc in self.values + ] + eh.bulk(self._es, actions, **meta if meta else {}) + + def delete(self, meta=None, **kwargs): + actions = ( + { + '_op_type': 'delete', + '_index': self._model._index, + '_type': self._model._doctype, + '_id': doc.id, + } + for doc in self.values + ) + eh.bulk(self._es, actions, **meta if meta else {}) + + def __unicode__(self): + return unicode(self.__unicode__()) + + def __str__(self): + return "".format(i=self) diff --git a/esengine/document.py b/esengine/document.py new file mode 100644 index 0000000..ce54d4c --- /dev/null +++ b/esengine/document.py @@ -0,0 +1,328 @@ +import elasticsearch.helpers as eh + +from esengine.bases.document import BaseDocument +from esengine.bases.metaclass import ModelMetaclass +from esengine.bases.result import ResultSet +from esengine.utils import validate_client + + +class Document(BaseDocument): + """ + Base Document to be extended in your models definitions + + >>> from elasticsearch import Elasticsearch + >>> from esengine import Document, StringField + >>> class MyDoc(Document): + ... _autoid = True + ... _index = 'indexname' + ... _doctype = 'doctypename' + ... _mapping = {} + ... name = StringField() + + >>> obj = MyDoc(name="Gonzo") + >>> obj.save(es=Elasticsearch()) + + >>> MyDoc.filter(name="Gonzo") + + """ + + __metaclass__ = ModelMetaclass + + # If _autoid is set to False the id Field will not be automatically + # included in the Document model and you will need to specify a field + # called 'id' preferably a StringField + _autoid = True + + # If mapping is not specified it will be generated using the document + # model fields and its default patterns and types + # any field mapping can be overwritten by specifying in the following + # instance _mapping dictionary + _mapping = {} + + @classmethod + def get_es(cls, es): + """ + This proxy-method allows the client overwrite + and the use of a default client for a document. + Document transport methods should use cls.get_es(es).method() + This method also validades that the connection is a valid ES client. + :return: elasticsearch.ElasticSearch() instance or equivalent client + """ + if not es and hasattr(cls, '_es'): + es = cls._es if not callable(cls._es) else cls._es() + validate_client(es) + return es + + def save(self, es=None): + """ + Save current instance of a Document + + >>> obj = Document(field='value') + >>> obj.save() + + :param es: ES client or None (if implemented a default in Model) + :return: Nothing or raise error + """ + doc = self.to_dict() + saved_document = self.get_es(es).index( + index=self._index, + doc_type=self._doctype, + id=self.id, # noqa + body=doc + ) + if saved_document.get('created'): + self.id = saved_document['_id'] + + def delete(self, es=None): + """ + Delete current instance of a Document + + >>> obj = Document.get(id=123) + >>> obj.delete() + + :param es: ES client or None (if implemented a default in Model) + :return: Nothing or raise error + """ + self.get_es(es).delete( + index=self._index, + doc_type=self._doctype, + id=self.id, # noqa + ) + + @classmethod + def create(cls, es=None, **kwargs): + """ + Creates and returns an instance of the Document + + >>> Document.create(field='value') + + + :param es: ES client or None (if implemented a default in Model) + :param kwargs: fields and its values + :return: Instance of the Document created + """ + instance = cls(**kwargs) + instance.save(es) + return instance + + @classmethod + def all(cls, *args, **kwargs): + """ + Returns a ResultSet with all documents without filtering + A semantic shortcut to filter() without keys + + :param: < See filter parameters> + :return: A ResultSet with all documents in the index/type + """ + return cls.filter(*args, **kwargs) + + @classmethod + def get(cls, id, es=None, **kwargs): # noqa + """ + A get query returning a single document by _id or _uid + + >>> Document.get(id=123) + + :param id: The _id or _uid of the object + :param es: ES client or None (if implemented a default in Model) + :param kwargs: extra key=value to be passed to es client + :return: A single Doc object + """ + es = cls.get_es(es) + res = es.get(index=cls._index, + doc_type=cls._doctype, + id=id, + **kwargs) + return cls.from_dict(dct=res['_source']) + + @classmethod + def filter(cls, es=None, ids=None, size=None, **filters): + """ + A match_all query with filters + + >>> Document.filter(ids=[123, 456]) + >>> Document.filter(name="Gonzo", city="Tunguska", size=10) + + :param es: ES client or None (if implemented a default in Model) + :param ids: Filtering by _id or _uid + :param size: size of result, default 100 + :param filters: key=value parameters + :return: Iterator of Doc objets + """ + + es = cls.get_es(es) + + if ids and filters: + raise ValueError( + "You can't specify ids together with other filters" + ) + + if ids: + query = { + "query": { + "filtered": { + "query": {"match_all": {}}, + "filter": {"ids": {"values": list(ids)}} + } + } + } + elif filters: + query = { + "query": { + "bool": { + "must": [ + {"match": {key: value}} + for key, value in filters.items() + ] + } + } + } + else: + query = { + "query": { + "match_all": {} + } + } + + size = len(ids) if ids else size + search_args = dict( + index=cls._index, + doc_type=cls._doctype, + body=query + ) + if size: + search_args['size'] = size + resp = es.search(**search_args) + return cls.build_result(resp, es=es, query=query, size=size) + + @classmethod + def search(cls, query, es=None, **kwargs): + """ + Takes a raw ES query in form of a dict and + return Doc instances iterator + + >>> query = { + ... "query": { + ... "bool": { + ... "must": [ + ... {"match": {"name": "Gonzo"}} + ... ] + ... } + ... } + ...} + >>> results = Document.search(query, size=10) + + :param query: raw query + :param es: ES client or None (if implemented a default in Model) + :param kwargs: extra key=value to be passed to es client + :return: Iterator of Doc objets + """ + es = cls.get_es(es) + resp = es.search( + index=cls._index, + doc_type=cls._doctype, + body=query, + **kwargs + ) + return cls.build_result( + resp, es=es, query=query, size=kwargs.get('size') + ) + + @classmethod + def build_result(cls, resp, query=None, es=None, size=None): + """ + Takes ES client response having ['hits']['hits'] + and turns it to an generator of Doc objects + :param resp: ES client raw results + :param query: The query used to build the results + :return: ResultSet: a generator of Doc objects + """ + # FIxme: should pass meta data and _scores + return ResultSet( + values=[obj['_source'] for obj in resp['hits']['hits']], + model=cls, + query=query, + size=size, + es=cls.get_es(es) + ) + + @classmethod + def save_all(cls, docs, es=None, **kwargs): + """ + Save various Doc instances in bulk + + >>> docs = (Document(value=value) for value in [1, 2, 3]) + >>> Document.save_all(docs) + + :param docs: Iterator of Document instances + :param es: ES client or None (if implemented a default in Model) + :param kwargs: Extra params to be passed to streaming_bulk + :return: Nothing or Raise error + """ + actions = [ + { + '_op_type': 'index', + '_index': cls._index, + '_type': cls._doctype, + '_id': doc.id, + '_source': doc.to_dict() + } + for doc in docs + ] + eh.bulk(cls.get_es(es), actions, **kwargs) + + @classmethod + def update_all(cls, docs, es=None, meta=None, **kwargs): + """ + Update various Doc instances in bulk + + >>> docs = (Document(value=value) for value in [1, 2, 3]) + # change all values to zero + >>> Document.update_all(docs, value=0) + + :param docs: Iterator of Document instances + :param es: ES client or None (if implemented a default in Model) + :param kwargs: Extra params to be passed to streaming_bulk + :return: Nothing or Raise error + """ + actions = ( + { + '_op_type': 'update', + '_index': cls._index, + '_type': cls._doctype, + '_id': doc.id, + 'doc': kwargs + } + for doc in docs + ) + eh.bulk(cls.get_es(es), actions, **meta if meta else {}) + + @classmethod + def delete_all(cls, docs, es=None, **kwargs): + """ + Delete various Doc instances in bulk + + >>> docs = (Document(value=value) for value in [1, 2, 3]) + >>> Document.delete_all(docs) + + :param docs: Iterator of Document instances + :param es: ES client or None (if implemented a default in Model) + :param kwargs: Extra params to be passed to streaming_bulk + :return: Nothing or Raise error + """ + actions = [ + { + '_op_type': 'delete', + '_index': cls._index, + '_type': cls._doctype, + '_id': doc.id, + } + for doc in docs + ] + eh.bulk(cls.get_es(es), actions, **kwargs) + + def __unicode__(self): + return unicode(self.__str__()) + + def __str__(self): + return "<{0} {1}>".format(self.__class__.__name__, self.to_dict()) diff --git a/es_engine/embedded_document.py b/esengine/embedded_document.py similarity index 89% rename from es_engine/embedded_document.py rename to esengine/embedded_document.py index 60da750..ccfb271 100644 --- a/es_engine/embedded_document.py +++ b/esengine/embedded_document.py @@ -1,9 +1,9 @@ from collections import Iterable -from es_engine.bases.field import BaseField -from es_engine.bases.metaclass import ModelMetaclass -from es_engine.exceptions import RequiredField, InvalidMultiField -from es_engine.exceptions import FieldTypeMismatch +from esengine.bases.field import BaseField +from esengine.bases.metaclass import ModelMetaclass +from esengine.exceptions import RequiredField, InvalidMultiField +from esengine.exceptions import FieldTypeMismatch class EmbeddedDocument(BaseField): @@ -24,7 +24,7 @@ def to_dict(self, value): def _validate_element(self, field_name, elem): if not isinstance(elem, EmbeddedDocument): - raise FieldTypeMismatch(field_name, self.__class__.__type__, + raise FieldTypeMismatch(field_name, self.__class__._type, elem.__class__) for field_name, field_class in self._fields.iteritems(): value = getattr(elem, field_name) diff --git a/es_engine/exceptions.py b/esengine/exceptions.py similarity index 89% rename from es_engine/exceptions.py rename to esengine/exceptions.py index 17d71c5..5f76f44 100644 --- a/es_engine/exceptions.py +++ b/esengine/exceptions.py @@ -1,3 +1,7 @@ +class ClientError(Exception): + pass + + class RequiredField(Exception): pass diff --git a/esengine/fields.py b/esengine/fields.py new file mode 100644 index 0000000..a8b3fb3 --- /dev/null +++ b/esengine/fields.py @@ -0,0 +1,65 @@ +__all__ = [ + 'IntegerField', 'StringField', 'FloatField', + 'DateField', 'BooleanField', 'GeoField' +] + +from datetime import datetime +from esengine.bases.field import BaseField + + +class IntegerField(BaseField): + _type = int + + +class StringField(BaseField): + _type = unicode + + +class FloatField(BaseField): + _type = float + + +class BooleanField(BaseField): + _type = bool + + +class GeoField(FloatField): + _multi = True + + +class DateField(BaseField): + _type = datetime + + @property + def _date_format(self): + return getattr(self, 'date_format', "%Y-%m-%d %H:%M:%S") + + def to_dict(self, value): + if value: + return value.strftime(self._date_format) + + def from_dict(self, serialized): + if serialized: + if self._multi: + values = [] + for elem in serialized: + if isinstance(elem, self._type): + values.append(elem) + elif isinstance(elem, basestring): + date = datetime.strptime(elem, self._date_format) + values.append(date) + else: + raise ValueError( + 'Expected str or date. {} found'.format( + elem.__class__ + ) + ) + return values + else: + if isinstance(serialized, self._type): + return serialized + elif isinstance(serialized, basestring): + return datetime.strptime(serialized, self._date_format) + raise ValueError('Expected str or date. {} found'.format( + serialized.__class__) + ) diff --git a/esengine/utils.py b/esengine/utils.py new file mode 100644 index 0000000..4336c69 --- /dev/null +++ b/esengine/utils.py @@ -0,0 +1,25 @@ +# coding: utf-8 + +from esengine.exceptions import ClientError + + +def validate_client(es): + """ + A valid ES client is a interface which must implements at least + "index" and "search" public methods. + preferably an elasticsearch.ElasticSearch() instance + :param es: + :return: None + """ + + if not es: + raise ClientError("ES client cannot be Nonetype") + + try: + if not callable(es.index) or not callable(es.search) or \ + not callable(es.get): + raise ClientError( + "index or search or get Interface is not callable" + ) + except AttributeError as e: + raise ClientError(str(e)) diff --git a/example.py b/example.py new file mode 100644 index 0000000..a1b375c --- /dev/null +++ b/example.py @@ -0,0 +1,149 @@ +# coding: utf-8 + +import time +import datetime +from elasticsearch import Elasticsearch +from esengine import ( + Document, StringField, IntegerField, BooleanField, + FloatField, GeoField, DateField +) + + +class ExampleDoc(Document): + _index = 'esengine_test' + _doctype = 'example' + _es = Elasticsearch() + + name = StringField() + age = IntegerField() + active = BooleanField() + weight = FloatField() + location = GeoField() + birthday = DateField(date_format="%Y-%m-%d") + city = StringField() + +######################################################################## +instances = [] +gonzo = ExampleDoc( + id=123456, + name="Gonzo", + age="2", + active=True, + weight="30.5", + location=[0.345, 1.456], + city="Tunguska" +) +gonzo.birthday = '2015-01-01' +gonzo.save() +instances.append(gonzo) + + +mongo = ExampleDoc( + id=789100, + name="Mongo", + age="3", + active=False, + weight="10.5", + location=[0.342, 2.456], + birthday=datetime.datetime.today(), + city="Tunguska" +) +mongo.save() +instances.append(mongo) + + + +######################################################################## + +for instance in instances: + print instance + + print "get by id=", instance.id, ExampleDoc.get(id=instance.id) + + print "Filter by name=", instance.name, [ + item.to_dict() for item in ExampleDoc.filter(name=instance.name, size=2) + ] + + print "Filter by name='" + instance.name + "', active=", instance.active, [ + item.to_dict() + for item in ExampleDoc.filter(name="Gonzo", active=instance.active, size=2) + ] + + QUERY = { + "query": { + "bool": { + "must": [ + {"match": {"name": instance.name}} + ] + } + } + } + + print "Search by query:", QUERY, [ + item.to_dict() + for item in ExampleDoc.search(QUERY) + ] + print "#" * 120 + + +for instance in instances: + print instance.name, "Old age:", instance.age + instance.age += 1 + print instance.name, "New age:", instance.age + +ExampleDoc.save_all(instances) + +for instance in instances: + print instance.name, "Saved age is now:", instance.age + +for instance in instances: + print "{i.name} activation is {i.active}".format(i=instance) + +######################################################################## + +time.sleep(2) + +print "updating turning activations to True" + +QUERY = { + "query": { + "bool": { + "must": [ + {"match": {"city": "Tunguska"}} + ] + } + } +} + +print "for", QUERY + +results = ExampleDoc.search(QUERY) +for res in results: + print res + + +results.update(active=True) +results.reload() +for res in results: + print "{i.name} activation is {i.active}".format(i=res) + +print "Will update the names to Jonson" + +results.update(name="Jonson") +results.reload() +for res in results: + print "{i.name} activation is {i.active}".format(i=res) + +print "Updating using Model.update_all" +ExampleDoc.update_all(results, city="Itapopoca") +time.sleep(1) +results = ExampleDoc.filter(city="Itapopoca") +for res in results: + print "{i.name} city is {i.city}".format(i=res) + +print "All documents" +for doc in ExampleDoc.all(): + print doc.to_dict() + +print "Deleting everything" +results.delete() \ No newline at end of file diff --git a/octosearch.gif b/octosearch.gif new file mode 100644 index 0000000..c881043 Binary files /dev/null and b/octosearch.gif differ diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 47ecfbb..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -elasticsearch==2.1.0 -pytest==2.8.2 -pytest-cov==2.2.0 diff --git a/setup.py b/setup.py index a5303c8..a70db8a 100644 --- a/setup.py +++ b/setup.py @@ -1,24 +1,16 @@ -from pip.req import parse_requirements +# coding: utf-8 try: from setuptools import setup, find_packages except ImportError: from distutils.core import setup, find_packages -links = [] -requires = [] -for item in parse_requirements('requirements.txt'): - if item.url: - links.append(str(item.url)) - if item.req: - requires.append(str(item.req)) - setup( name='esengine', - version="0.0.1", + version="0.0.3", url='https://github.com/catholabs/ESengine', - license='CATHO LICENSE', + license='MIT', author="Catholabs", author_email="catholabs@catho.com", description='Elasticsearch models inspired on mongo engine ORM', @@ -27,6 +19,19 @@ include_package_data=True, zip_safe=False, platforms='any', - install_requires=requires, - dependency_links=links + extras_require={ + "es0": ["elasticsearch<1.0.0"], + "es1": ["elasticsearch>=1.0.0,<2.0.0"], + "es2": ["elasticsearch>=2.0.0,<3.0.0"] + }, + tests_require=[ + "pytest==2.8.3", + "pytest-cov==2.2.0", + "flake8==2.5.0", + "pep8-naming==0.3.3", + "flake8-debugger==1.4.0", + "flake8-print==2.0.1", + "flake8-todo==0.4", + "radon==1.2.2" + ] ) diff --git a/test.req b/test.req new file mode 100644 index 0000000..009d06f --- /dev/null +++ b/test.req @@ -0,0 +1,15 @@ +# base +elasticsearch>=1.0.0,<2.0.0 + +# testing +coveralls +pytest==2.8.3 +pytest-cov==2.2.0 + +# style check +flake8==2.5.0 +pep8-naming==0.3.3 +flake8-debugger==1.4.0 +flake8-print==2.0.1 +flake8-todo==0.4 +radon==1.2.2 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..1c94e09 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,117 @@ +# content of conftest.py +import pytest +import elasticsearch.helpers as eh_original +from esengine import Document +from esengine.fields import IntegerField, StringField, FloatField + +_INDEX = 'index' +_DOC_TYPE = 'doc_type' + +class ES(object): + test_id = 100 + test_ids = [100, 101] + + def index(self, *args, **kwargs): + assert kwargs['index'] == _INDEX + assert kwargs['doc_type'] == _DOC_TYPE + assert kwargs['id'] == self.test_id + assert 'body' in kwargs + kwargs['created'] = True + kwargs['_id'] = self.test_id + return kwargs + + def get(self, *args, **kwargs): + assert kwargs['index'] == _INDEX + assert kwargs['doc_type'] == _DOC_TYPE + assert kwargs['id'] == self.test_id + return { + '_source': { + 'id': self.test_id + } + } + + def search(self, *args, **kwargs): + assert kwargs['index'] == _INDEX + assert kwargs['doc_type'] == _DOC_TYPE + docs = [] + for _id in self.test_ids: + doc = { + '_source': { + 'id': _id + } + } + docs.append(doc) + return { + 'hits': { + 'hits': docs + } + } + + +class D(Document): + _index = _INDEX + _doctype = _DOC_TYPE + id = IntegerField() + + +class DW(D): + _es = ES() + id = IntegerField() # ID hould be inherited + document_id = StringField() + house_number = IntegerField() + height = FloatField() + + +# def pytest_runtest_setup(item): +# # called for running each test in 'a' directory +# print("setting up", item) + + +@pytest.fixture(scope="module") +def INDEX(): + return 'index' + + +@pytest.fixture(scope="module") +def DOC_TYPE(): + return 'doc_type' + + +@pytest.fixture(scope="module") +def QUERY(): + return { + "query": { + "bool": { + "must": [ + {"match": {"name": "Gonzo"}} + ] + } + } + } + + +@pytest.fixture(scope="module") +def MockES(): + return ES + + +@pytest.fixture(scope="module") +def eh(): + def bulk(es, actions): + for action in actions: + assert action['_op_type'] in ['index', 'update', 'delete'] + assert action['_index'] == _INDEX + assert action['_type'] == _DOC_TYPE + + eh_original.bulk = bulk + return eh_original + + +@pytest.fixture(scope="module") +def Doc(): + return D + + +@pytest.fixture(scope="module") +def DocWithDefaultClient(): + return DW \ No newline at end of file diff --git a/tests/test_base_document.py b/tests/test_base_document.py index 1c35e5c..1efba75 100644 --- a/tests/test_base_document.py +++ b/tests/test_base_document.py @@ -1,9 +1,10 @@ import pytest -from es_engine.bases.document import BaseDocument -from es_engine.bases.field import BaseField +from esengine.bases.document import BaseDocument +from esengine.bases.field import BaseField +from esengine.fields import StringField, IntegerField -from es_engine.exceptions import FieldTypeMismatch +from esengine.exceptions import FieldTypeMismatch def test_raise_when_doc_has_no_doc_type(): @@ -13,16 +14,16 @@ def test_raise_when_doc_has_no_doc_type(): def test_raise_when_doc_has_no_index(): class WhitoutIndex(BaseDocument): - __doc_type__ = 'test' + _doctype = 'test' class WhitIndex(BaseDocument): - __doc_type__ = 'test' - __index__ = 'test' + _doctype = 'test' + _index = 'test' _fields = {} with pytest.raises(ValueError) as ex: WhitoutIndex() - assert str(ex.value) == '{} have no __index__ field'.format( + assert str(ex.value) == '{} have no _index attribute'.format( WhitoutIndex.__name__ ) WhitIndex() @@ -30,12 +31,12 @@ class WhitIndex(BaseDocument): def test_raise_if_doc_has_no_fields(): class WhitoutFields(BaseDocument): - __doc_type__ = 'test' - __index__ = 'test' + _doctype = 'test' + _index = 'test' class WhitFields(BaseDocument): - __doc_type__ = 'test' - __index__ = 'test' + _doctype = 'test' + _index = 'test' _fields = {} with pytest.raises(AttributeError) as ex: @@ -49,12 +50,19 @@ class WhitFields(BaseDocument): def test_doc_set_kwargs(): class Doc(BaseDocument): - __doc_type__ = 'test' - __index__ = 'test' + _doctype = 'test' + _index = 'test' _fields = {} def __setattr__(self, key, value): - super(BaseDocument, self).__setattr__(key, value) + if key not in self._fields: + if isinstance(value, basestring): + self._fields[key] = StringField() + elif isinstance(value, int): + self._fields[key] = IntegerField() + else: + self._fields[key] = StringField(_multi=True) + super(Doc, self).__setattr__(key, value) x = Doc(asdf='0', x=10, value=['a', 'b'], _value='aaa') assert x.asdf == '0' @@ -65,8 +73,8 @@ def __setattr__(self, key, value): def test_raise_if_attr_not_in_fields(): class Doc(BaseDocument): - __doc_type__ = 'test' - __index__ = 'test' + _doctype = 'test' + _index = 'test' _fields = {} with pytest.raises(KeyError) as ex: @@ -79,14 +87,15 @@ def pass_func(self): pass class Doc(BaseDocument): - __doc_type__ = 'test' - __index__ = 'test' - _fields = {} - Doc._fields['asdf'] = 1 + _doctype = 'test' + _index = 'test' + _fields = {"asdf": 1} Doc._initialize_multi_fields = pass_func - doc = Doc(asdf='0') - assert doc.asdf == '0' + doc = Doc() + with pytest.raises(AttributeError) as ex: + doc.asdf = "0" + assert ex.message == "'int' object has no attribute 'from_dict'" doc.__setattr__('_test', 10) assert doc._test == 10 @@ -94,8 +103,8 @@ class Doc(BaseDocument): def test_doc_initialize_multi_fields(): class Doc(BaseDocument): - __doc_type__ = 'test' - __index__ = 'test' + _doctype = 'test' + _index = 'test' _fields = { 'multiple': BaseField(field_type=int, multi=True), 'simple': BaseField(field_type=int) @@ -107,8 +116,8 @@ class Doc(BaseDocument): def test_doc_to_dict(): class Doc(BaseDocument): - __doc_type__ = 'test' - __index__ = 'test' + _doctype = 'test' + _index = 'test' _fields = { 'multiple': BaseField(field_type=int, multi=True), 'simple': BaseField(field_type=int) @@ -119,8 +128,9 @@ class Doc(BaseDocument): def test_doc_to_dict_call_validate(): class Doc(BaseDocument): - __doc_type__ = 'test' - __index__ = 'test' + _doctype = 'test' + _index = 'test' + _strict = True _fields = { 'multiple': BaseField(field_type=int, multi=True), 'simple': BaseField(field_type=int) @@ -128,13 +138,15 @@ class Doc(BaseDocument): doc = Doc(multiple=[1, 2], simple="10") with pytest.raises(FieldTypeMismatch) as ex: doc.to_dict() - assert str(ex.value) == "`simple` expected ``, actual ``" # noqa + assert str(ex.value) == ( + "`simple` expected ``, actual ``" + ) def test_doc_from_dict(): class Doc(BaseDocument): - __doc_type__ = 'test' - __index__ = 'test' + _doctype = 'test' + _index = 'test' _fields = { 'multiple': BaseField(field_type=int, multi=True), 'simple': BaseField(field_type=int) diff --git a/tests/test_base_field.py b/tests/test_base_field.py index 7d64047..6baf87f 100644 --- a/tests/test_base_field.py +++ b/tests/test_base_field.py @@ -1,9 +1,9 @@ import pytest -from es_engine.bases.field import BaseField +from esengine.bases.field import BaseField -from es_engine.exceptions import RequiredField, InvalidMultiField -from es_engine.exceptions import FieldTypeMismatch +from esengine.exceptions import RequiredField, InvalidMultiField +from esengine.exceptions import FieldTypeMismatch def test_raise_when_required_fild_has_empty_value(): diff --git a/tests/test_document.py b/tests/test_document.py index afe9890..ff62f63 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -1,108 +1,144 @@ import pytest -from es_engine.document import Document -from es_engine.fields import IntegerField - - -class Doc(Document): - __index__ = 'index' - __doc_type__ = 'doc_type' - id = IntegerField() - - -class MockES(object): - test_id = 100 - test_ids = [100, 101] - - def index(self, *args, **kwargs): - assert kwargs['index'] == Doc.__index__ - assert kwargs['doc_type'] == Doc.__doc_type__ - assert kwargs['id'] == self.test_id - assert 'body' in kwargs - - def get(self, *args, **kwargs): - assert kwargs['index'] == Doc.__index__ - assert kwargs['doc_type'] == Doc.__doc_type__ - assert kwargs['id'] == self.test_id - return { - '_source': { - 'id': self.test_id - } - } - - def search(self, *args, **kwargs): - assert kwargs['index'] == Doc.__index__ - assert kwargs['doc_type'] == Doc.__doc_type__ - assert kwargs['size'] == len(self.test_ids) - query = { - "query": { - "filtered": { - "query": {"match_all": {}}, - "filter": { - "ids": { - "values": self.test_ids - } - } - } - } - } - assert kwargs['body'] == query - docs = [] - for id in self.test_ids: - doc = { - '_source': { - 'doc': { - 'id': self.test_id - } - } - } - docs.append(doc) - return { - 'hits': { - 'hits': docs - } - } - - -def test_document_save(): - Doc(id=MockES.test_id).save(MockES()) - - -def test_raise_when_pass_id_and_ids_to_doc_get(): - with pytest.raises(ValueError) as ex: - Doc.get(MockES(), id=1, ids=[1, 2]) - assert str(ex.value) == 'id and ids can not be passed together.' - - -def test_doc_get(): - doc = Doc.get(MockES(), id=MockES.test_id) +# import elasticsearch.helpers as eh +# from esengine.document import Document +# from esengine.fields import IntegerField, StringField, FloatField +from esengine.exceptions import ClientError + + +def test_build_result(Doc, MockES): + resp = MockES().search(index='index', doc_type='doc_type', size=2) + results = Doc.build_result(resp, es=MockES(), size=2) + for res in results: + print res, res.id + assert res.id in MockES.test_ids + + +def test_doc_search(Doc, QUERY, MockES): + docs = Doc.search(QUERY, es=MockES(), size=2) + for doc in docs: + assert doc.id in MockES.test_ids + + +def test_document_save(Doc, MockES): + Doc(id=MockES.test_id).save(es=MockES()) + + +def test_get_with_id(Doc, MockES): + assert Doc.get(id=MockES.test_id, es=MockES()).id == MockES.test_id + + +def test_doc_get(Doc, MockES): + doc = Doc.get(id=MockES.test_id, es=MockES()) assert doc.id == MockES.test_id -def test_doc_get_ids(): - docs = Doc.get(MockES(), ids=MockES.test_ids) +def test_filter_by_ids(Doc, MockES): + docs = Doc.filter(ids=MockES.test_ids, es=MockES()) for doc in docs: assert doc.id in MockES.test_ids -def mock_bulk(es, updates): - assert updates == [ - { - '_op_type': 'index', - '_index': Doc.__index__, - '_type': Doc.__doc_type__, - '_id': doc, - 'doc': {'id': doc} - } - for doc in MockES.test_ids - ] +def test_raise_if_filter_by_ids_and_filters(Doc, MockES): + with pytest.raises(ValueError): + Doc.filter(ids=MockES.test_ids, es=MockES(), filters={"name": "Gonzo"}) + + +def test_update_all(DocWithDefaultClient, QUERY, eh): + docs = DocWithDefaultClient.search(QUERY, size=2) + DocWithDefaultClient.update_all(docs, document_id=1) + + +def test_delete_all(DocWithDefaultClient, QUERY, eh): + docs = DocWithDefaultClient.search(QUERY, size=2) + DocWithDefaultClient.delete_all(docs) -def test_save_all(): - import elasticsearch.helpers as eh - eh.bulk = mock_bulk +def test_save_all(Doc, MockES, eh): docs = [ Doc(id=doc) for doc in MockES.test_ids ] - Doc.save_all(MockES(), docs) + Doc.save_all(docs, es=MockES()) + + +def test_client_not_defined(Doc, MockES): + doc = Doc(id=MockES.test_id) + with pytest.raises(ClientError): + doc.save() + +def test_default_client(DocWithDefaultClient, MockES): + try: + doc = DocWithDefaultClient(id=MockES.test_id) + doc.save() + DocWithDefaultClient.get(id=MockES.test_id) + except ClientError: + pytest.fail("Doc has no default connection") + + +def test_get_es_with_invalid_client(Doc): + with pytest.raises(ClientError): + Doc.get_es(int) + + +def test__es_is_invalid(Doc): + class DocWithInvalidES(Doc): + _es = int + with pytest.raises(ClientError): + DocWithInvalidES.get_es(None) + + +def test_unicode_representation(Doc, MockES): + doc = Doc(id=MockES.test_id) + assert doc.__unicode__() == u"" + + +def test_str_representation(Doc, MockES): + doc = Doc(id=MockES.test_id) + assert doc.__str__() == "" + + +def test_default_client_injected(Doc, MockES): + try: + Doc._es = MockES() + doc = Doc(id=MockES.test_id) + doc.save() + Doc.get(id=MockES.test_id) + except ClientError: + pytest.fail("Doc has no default connection") + + +def test_default_client_injected_as_lambda(Doc, MockES): + try: + Doc._es = classmethod(lambda cls: MockES()) + doc = Doc(id=MockES.test_id) + doc.save() + Doc.get(id=MockES.test_id) + except ClientError: + pytest.fail("Doc has no default connection") + + +def test_compare_attributed_values_against_fields(DocWithDefaultClient, MockES): + doc = DocWithDefaultClient(id=MockES.test_id) + doc.document_id = 123456 + doc.house_number = "42" + + with pytest.raises(KeyError): # invalid field + doc.name = 'Bruno' + with pytest.raises(ValueError): # uncastable + doc.height = "2 mtrs" + + # TODO: commented asserts will be possible when move to descriptors + # Because only with descriptors we can overwrite compare methods + assert doc.house_number == 42 + # assert doc.house_number == "42" + # assert doc.house_number in ['42'] + assert doc.house_number in [42] + assert not doc.house_number != 42 + # assert not doc.house_number != "42" + # assert doc.document_id == 123456 + assert doc.document_id == "123456" + assert doc.document_id in ['123456'] + # assert doc.document_id in [123456] + # assert not doc.document_id != 123456 + assert not doc.document_id != "123456" \ No newline at end of file diff --git a/tests/test_embedded_document.py b/tests/test_embedded_document.py index fb0fea3..acdfc20 100644 --- a/tests/test_embedded_document.py +++ b/tests/test_embedded_document.py @@ -1,9 +1,9 @@ import pytest -from es_engine.embedded_document import EmbeddedDocument -from es_engine.exceptions import RequiredField, InvalidMultiField -from es_engine.exceptions import FieldTypeMismatch -from es_engine.fields import IntegerField +from esengine.embedded_document import EmbeddedDocument +from esengine.exceptions import RequiredField, InvalidMultiField +from esengine.exceptions import FieldTypeMismatch +from esengine.fields import IntegerField class TowFields(EmbeddedDocument): @@ -59,7 +59,7 @@ def test_raise_when_multi_fild_type_missmatch(): field.validate(field_name, [10, 'asdf']) assert str(ex.value) == "`{}` expected `{}`, actual ``".format( field_name, - TowFields.__type__ + TowFields._type ) diff --git a/tests/test_fields.py b/tests/test_fields.py index 99c12a4..2ad6d03 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -1,6 +1,6 @@ import pytest from datetime import datetime -from es_engine.fields import DateField +from esengine.fields import DateField def test_date_field_to_dict(): diff --git a/tests/test_metaclass.py b/tests/test_metaclass.py index feb2218..3c8ecf0 100644 --- a/tests/test_metaclass.py +++ b/tests/test_metaclass.py @@ -1,6 +1,6 @@ -from es_engine.bases.metaclass import ModelMetaclass -from es_engine.bases.field import BaseField -from es_engine.embedded_document import EmbeddedDocument +from esengine.bases.metaclass import ModelMetaclass +from esengine.bases.field import BaseField +from esengine.embedded_document import EmbeddedDocument def test_derived_class_has_fields_attr(): @@ -28,5 +28,27 @@ def test_has_typefield_if_is_EmbeddedDocument(): # noqa (EmbeddedDocument,), {} ) - assert hasattr(obj, '__type__') - assert getattr(obj, '__type__') is obj + assert hasattr(obj, '_type') + assert getattr(obj, '_type') is obj + + +def test_id_injected_when_autoid(): + class Base(object): + __metaclass__ = ModelMetaclass + _autoid = True + + class Derived(Base): + pass + + assert hasattr(Derived, 'id') + + +def test_id_not_injected_when_not_autoid(): + class Base(object): + __metaclass__ = ModelMetaclass + _autoid = False + + class Derived(Base): + pass + + assert not hasattr(Derived, 'id') \ No newline at end of file diff --git a/tests/test_results.py b/tests/test_results.py new file mode 100644 index 0000000..af0ed71 --- /dev/null +++ b/tests/test_results.py @@ -0,0 +1,26 @@ +import pytest + +from esengine.bases.result import ResultSet + + +def test_resultset_has_values(MockES, INDEX, DOC_TYPE, Doc): + resp = MockES().search(index=INDEX, doc_type=DOC_TYPE, size=2) + values=[obj['_source'] for obj in resp['hits']['hits']] + results = ResultSet( + values=values, + model=Doc + ) + assert results._values == values + for result in results: + assert result.id in MockES().test_ids + + +def test_get_item_by_index(DocWithDefaultClient, MockES, QUERY): + results = DocWithDefaultClient.search(QUERY) + assert results[0].id == MockES().test_ids[0] + + +def test_get_item_by_index_1(DocWithDefaultClient, MockES, QUERY): + results = DocWithDefaultClient.search(QUERY) + assert results[-1].id == MockES().test_ids[-1] + diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..e389648 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,46 @@ +import pytest +from esengine.utils import validate_client +from esengine.exceptions import ClientError + + +class InvalidInterfaceClient(object): + pass + + +class InvalidClient(object): + index = 1 + search = 2 + get = 3 + + +class Client(object): + def index(self, *args, **kwargs): + return {"_id": 1, "created": True} + + def search(self, query): + return query + + def get(self, *args, **kwargs): + return {"_id": 1} + + +def test_valid_es_client(): + try: + validate_client(Client()) + except ClientError as e: + pytest.fail(e) + + +def test_raise_on_none_client(): + with pytest.raises(ClientError): + validate_client(None) + + +def test_raise_when_invalid_client(): + with pytest.raises(ClientError): + validate_client(InvalidClient()) + + +def test_client_invalid_interface(): + with pytest.raises(ClientError): + validate_client(InvalidInterfaceClient())