From 6b10c223e9745a4146cc03296f12b958bbff348c Mon Sep 17 00:00:00 2001 From: Tyler Hobbs Date: Fri, 10 Sep 2010 01:08:44 -0500 Subject: [PATCH] Initial documentation addition. --- .gitignore | 2 + doc/Makefile | 89 +++++ doc/__init__.py | 1 + doc/api/index.rst | 16 + doc/api/pycassa/batch.rst | 5 + doc/api/pycassa/columnfamily.rst | 5 + doc/api/pycassa/columnfamilymap.rst | 5 + doc/api/pycassa/connection.rst | 5 + doc/api/pycassa/index.rst | 5 + doc/api/pycassa/logger.rst | 5 + doc/api/pycassa/pool.rst | 5 + doc/api/pycassa/types.rst | 5 + doc/changelog.rst | 18 ++ doc/conf.py | 146 +++++++++ doc/example/index.rst | 10 + doc/index.rst | 75 +++++ doc/installation.rst | 22 ++ doc/tutorial.rst | 195 +++++++++++ ez_setup.py | 276 ++++++++++++++++ pycassa/batch.py | 41 ++- pycassa/columnfamily.py | 482 ++++++++++++++-------------- pycassa/columnfamilymap.py | 314 +++++++++--------- pycassa/connection.py | 93 +++--- pycassa/index.py | 38 ++- pycassa/logger.py | 13 + pycassa/pool.py | 286 +++++++++-------- pycassa/types.py | 11 + setup.py | 65 +++- 28 files changed, 1659 insertions(+), 574 deletions(-) create mode 100644 doc/Makefile create mode 100644 doc/__init__.py create mode 100644 doc/api/index.rst create mode 100644 doc/api/pycassa/batch.rst create mode 100644 doc/api/pycassa/columnfamily.rst create mode 100644 doc/api/pycassa/columnfamilymap.rst create mode 100644 doc/api/pycassa/connection.rst create mode 100644 doc/api/pycassa/index.rst create mode 100644 doc/api/pycassa/logger.rst create mode 100644 doc/api/pycassa/pool.rst create mode 100644 doc/api/pycassa/types.rst create mode 100644 doc/changelog.rst create mode 100644 doc/conf.py create mode 100644 doc/example/index.rst create mode 100644 doc/index.rst create mode 100644 doc/installation.rst create mode 100644 doc/tutorial.rst create mode 100644 ez_setup.py diff --git a/.gitignore b/.gitignore index 51348ffb..2f466ef5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.pyc *.swp +*.swo build/ +doc/_build diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 00000000..ff895bc5 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,89 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + -rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pycassa.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pycassa.qhc" + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ + "run these through (pdf)latex." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." diff --git a/doc/__init__.py b/doc/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/doc/__init__.py @@ -0,0 +1 @@ + diff --git a/doc/api/index.rst b/doc/api/index.rst new file mode 100644 index 00000000..8f7ec6de --- /dev/null +++ b/doc/api/index.rst @@ -0,0 +1,16 @@ +API Documentation +================= + +Modules: + +.. toctree:: + :maxdepth: 2 + + pycassa/connection + pycassa/pool + pycassa/columnfamily + pycassa/columnfamilymap + pycassa/index + pycassa/batch + pycassa/types + pycassa/logger diff --git a/doc/api/pycassa/batch.rst b/doc/api/pycassa/batch.rst new file mode 100644 index 00000000..6b77b23e --- /dev/null +++ b/doc/api/pycassa/batch.rst @@ -0,0 +1,5 @@ +:mod:`batch` -- Batch Operations +================================ + +.. automodule:: pycassa.batch + :members: diff --git a/doc/api/pycassa/columnfamily.rst b/doc/api/pycassa/columnfamily.rst new file mode 100644 index 00000000..8cc648f9 --- /dev/null +++ b/doc/api/pycassa/columnfamily.rst @@ -0,0 +1,5 @@ +:mod:`columnfamily` -- Column Family +==================================== + +.. automodule:: pycassa.columnfamily + :members: diff --git a/doc/api/pycassa/columnfamilymap.rst b/doc/api/pycassa/columnfamilymap.rst new file mode 100644 index 00000000..1419307b --- /dev/null +++ b/doc/api/pycassa/columnfamilymap.rst @@ -0,0 +1,5 @@ +:mod:`columnfamilymap` -- Maps Classes to Column Families +========================================================= + +.. automodule:: pycassa.columnfamilymap + :members: diff --git a/doc/api/pycassa/connection.rst b/doc/api/pycassa/connection.rst new file mode 100644 index 00000000..e2282dd3 --- /dev/null +++ b/doc/api/pycassa/connection.rst @@ -0,0 +1,5 @@ +:mod:`connection` -- Non-pooled connections to Cassandra servers +================================================================ + +.. automodule:: pycassa.connection + :members: diff --git a/doc/api/pycassa/index.rst b/doc/api/pycassa/index.rst new file mode 100644 index 00000000..62d4e2e1 --- /dev/null +++ b/doc/api/pycassa/index.rst @@ -0,0 +1,5 @@ +:mod:`index` -- Index Tools for Pycassa +======================================= + +.. automodule:: pycassa.index + :members: diff --git a/doc/api/pycassa/logger.rst b/doc/api/pycassa/logger.rst new file mode 100644 index 00000000..dff1305b --- /dev/null +++ b/doc/api/pycassa/logger.rst @@ -0,0 +1,5 @@ +:mod:`logger` -- Pycassa Logging +================================ + +.. automodule:: pycassa.logger + :members: diff --git a/doc/api/pycassa/pool.rst b/doc/api/pycassa/pool.rst new file mode 100644 index 00000000..3038fb51 --- /dev/null +++ b/doc/api/pycassa/pool.rst @@ -0,0 +1,5 @@ +:mod:`pool` -- Connection Pooling +================================= + +.. automodule:: pycassa.pool + :members: diff --git a/doc/api/pycassa/types.rst b/doc/api/pycassa/types.rst new file mode 100644 index 00000000..990c3c9e --- /dev/null +++ b/doc/api/pycassa/types.rst @@ -0,0 +1,5 @@ +:mod:`types` -- Column Types for ColumnFamilyMaps +================================================= + +.. automodule:: pycassa.types + :members: diff --git a/doc/changelog.rst b/doc/changelog.rst new file mode 100644 index 00000000..f426d6d5 --- /dev/null +++ b/doc/changelog.rst @@ -0,0 +1,18 @@ +Changelog +========= + +Changes in Version 0.5.0 +------------------------ + +Changes in Version 0.4.3 +------------------------ + +Changes in Version 0.4.2 +------------------------ + +Changes in Version 0.4.1 +------------------------ + +Changes in Version 0.4.0 +------------------------ + diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 00000000..62f363ba --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +# +# PyMongo documentation build configuration file +# +# This file is execfile()d with the current directory set to its containing dir. + +import sys, os +sys.path.append(os.path.abspath('..')) + +import pycassa + +# -- General configuration ----------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.coverage', + 'sphinx.ext.todo'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'pycassa' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.5.0' +# The full version, including alpha/beta/rc tags. +release = '0.5.0' + +# List of documents that shouldn't be included in the build. +unused_docs = [] + +# List of directories, relative to source directory, that shouldn't be searched +# for source files. +exclude_trees = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +add_module_names = True + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# -- Options for extensions ---------------------------------------------------- +autoclass_content = 'init' + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. Major themes that come with +# Sphinx are currently 'default' and 'sphinxdoc'. +html_theme = 'default' + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = '' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'pycassa' + release.replace('.', '_') + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +#latex_paper_size = 'letter' + +# The font size ('10pt', '11pt' or '12pt'). +#latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'pycassa.tex', u'pycassa Documentation', + u'Jonathan Hseu', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_use_modindex = True diff --git a/doc/example/index.rst b/doc/example/index.rst new file mode 100644 index 00000000..32191e86 --- /dev/null +++ b/doc/example/index.rst @@ -0,0 +1,10 @@ +Twissandra Example +================== + +This example shows you how to work with Twissandra, a Twitter-like +example Cassandra application. + +Setup +----- + +To be completed... diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 00000000..4e87664c --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,75 @@ +pycassa |release| Documentation +=============================== + +Contents +-------- +**pycassa** is a Python client for +`Apache Cassandra `_. + + +:doc:`installation` + How to install **pycassa**. + +:doc:`tutorial` + A short overview of **pycassa** usage. + +:doc:`example/index` + An example of how to use **pycassa** with `Twissandra `_, an example project that uses Cassandra to provide functionality similar to Twitter. + +:doc:`api/index` + The **pycassa** API documentation. + + +Help +------------ +Mailing Lists + * User list: mail to `pycassa-discuss@googlegroups.com `_ or `view online `_. + * Developer list: mail to `pycassa-devel@googlegroups.com `_ or `view online `_. + +IRC + * Use #cassandra on `irc.freenode.net `_. If you don't have an IRC client, you can use `freenode's web based client `_. + +Issues +------ +Bugs and feature requests for **pycassa** are currently tracked through the `github issue tracker `_. + +Contributing +------------ +**pycassa** encourages you to offer any contributions or ideas you have. +Contributing to the documentation or examples, reporting bugs, requesting +features, and (of course) improving the code are all equally welcome. +To contribute, fork the project on +`github `_ and make a pull request. + +Changes +------- +The :doc:`changelog` lists the changes between versions of **pycassa**. + +About This Documentation +------------------------ +This documentation is generated using the `Sphinx +`_ documentation generator. The source files +for the documentation are located in the *doc/* directory of +**pycassa**. To generate the documentation, run the +following command from the root directory of **pycassa**: + +.. code-block:: bash + + $ python setup.py doc + +Indices and tables +------------------ + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + +.. toctree:: + :hidden: + + installation + tutorial + example/index + api/index + changelog + diff --git a/doc/installation.rst b/doc/installation.rst new file mode 100644 index 00000000..70927029 --- /dev/null +++ b/doc/installation.rst @@ -0,0 +1,22 @@ +Installing +========== + +Make sure that you have Thrift's python bindings installed: + +.. code-block:: bash + + $ easy_install thrift + +You can download a release from +`github `_ +or check out the latest source from github:: + + $ git clone git://github.com/pycassa/pycassa.git + +You can simply copy the pycassa directory into your project, or +you can install pycassa system-wide: + +.. code-block:: bash + + $ cd pycassa/ + $ sudo python setup.py install diff --git a/doc/tutorial.rst b/doc/tutorial.rst new file mode 100644 index 00000000..dff53b5d --- /dev/null +++ b/doc/tutorial.rst @@ -0,0 +1,195 @@ +Tutorial +======== + +This tutorial is intended as an introduction to working with +Cassandra and **pycassa**. + +Prerequisites +------------- +Before we start, make sure that you have **pycassa** +:doc:`installed `. In the Python shell, the following +should run without raising an exception: + +.. doctest:: + + >>> import pycassa + +This tutorial also assumes that a Cassandra instance is running on the +default host and port. Read the `instructions for getting started +with Cassandra `_ , +making sure that you choose a `version that is compatible with +pycassa `_. +You can start Cassandra like so: + +.. code-block:: bash + + $ pwd + ~/cassandra + $ bin/cassandra -f + +and import the included schema to start out: + +.. code-block:: bash + + $ bin/schematool localhost 8080 import + +Making a Connection +------------------- +The first step when working with **pycassa** is to create a +:class:`~pycassa.connection.Connection` to the running cassandra instance: + +.. code-block:: python + + >>> import pycassa + >>> connection = pycassa.connect('Keyspace1') + +The above code will connect on the default host and port. We can also +specify the host and port explicitly, as follows: + +.. code-block:: python + + >>> connection = pycassa.connect('Keyspace1', ['localhost:9160']) + +Getting a ColumnFamily +---------------------- +A column family is a collection of rows and columns in Cassandra, +and can be thought of as roughly the equivalent of a table in a +relational database. We'll use one of the column families that +were already included in the schema file: + +.. code-block:: python + + >>> col_fam = pycassa.ColumnFamily(connection, 'Standard1') + +Inserting Data +-------------- +To insert a row into a column family we can use the +:meth:`~pycassa.columnfamily.ColumnFamily.insert` method: + +.. code-block:: python + + >>> col_fam.insert('row_key', {'col_name': 'col_val'}) + 1354459123410932 + +We can also insert more than one column at a time: + +.. code-block:: python + + >>> col_fam.insert('row_key', {'name1':'val1', 'name2':'val2'}) + 1354459123410932 + +And we can insert more than one row at a time: + +.. code-block:: python + + >>> col_fam.batch_insert({'row1': {'name1':'val1'}, + ... 'row2': {'foo':'bar'}) + 1354491238721387 + +Getting Data +------------ +There are many more ways to get data out of Cassandra than there are +to insert data. + +The simplest way to get data is to use +:meth:`~pycassa.columnfamily.ColumnFamily.get()`: + +.. code-block:: python + + >>> col_fam.get('row_key') + {'colname': 'col_val'} + +Without any other arguments, :meth:`~pycassa.columnfamily.ColumnFamily.get()` +returns every column in the row (up to `column_count`, which defaults to 100). +If you only want a few of the columns and you know them by name, you can +specify them using a `columns` argument: + +.. code-block:: python + + >>> col_fam.get('row_key', columns=['name1', 'name2') + {'name1': 'foo', 'name2': 'bar'} + +We may also get a slice (or subrange) or the columns in a row. To do this, +use the `column_start` and `column_finish` parameters. If one of these may be +left empty to allow the slice to extend to the end of the row in one direction. +Also note that `column_finish` is inclusive. Assuming we've inserted several +columns with names '1' through '9', we could do the following: + +.. code-block:: python + + >>> col_fam.get('row_key', column_start='5', column_finish='7') + {'5':'foo', '6':'bar', '7':'baz'} + +There are also two ways to get multiple rows at the same time. +The first is to specify them by name using +:meth:`~pycassa.columnfamily.ColumnFamily.multiget()`: + +.. code-block:: python + + >>> col_fam.multiget(['row_key1', 'row_key2']) + {'row_key1': {'name':'val'}, 'row_key2': {'name':'val'}} + +The other way is to get a range of keys at once by using +:meth:`~pycassa.columnfamily.ColumnFamily.get_range()`. The parameter +`finish` is also inclusive here too. Assuming we've inserted some rows +with keys 'row_key1' through 'row_key9', we could do this: + +.. code-block:: python + + >>> col_fam.get_range(start='row_key5', finish='row_key7') + {'row_key5': {'name':'val'}, 'row_key6': {'name':'val'}, 'row_key7': {'name':'val'}} + +It's also possible to specify a set of columns or a slice for +:meth:`~pycassa.columnfamily.ColumnFamily.multiget()` and +:meth:`~pycassa.columnfamily.ColumnFamily.get_range()` just like we did for +:meth:`~pycassa.columnfamily.ColumnFamily.get()`. + +Counting +-------- +If you just want to know how many columns are in a row, you can use +:meth:`~pycassa.columnfamily.ColumnFamily.get_count()`: + +.. code-block:: python + + >>> col_fam.get_count('row_key') + 3 + +Indexes +------- +Cassandra 0.7.0 adds support for secondary indexes, which allow you to +efficiently get only rows which match a certain expression. + +To use secondary indexes with Cassandra, you need to specify what columns +will be indexed. In a ``cassandra.yaml`` file, this might look like: + +:: + + - name: Indexed1 + column_type: Standard + column_metadata: + - name: birthdate + validator_class: LongType + index_type: KEYS + +In order to use :meth:`~pycassa.columnfamily.ColumnFamily.get_indexed_slices()` +to get data from Indexed1 using the indexed column, we need to create an +:class:`~pycassa.cassandra.ttypes.IndexClause` which contains +:class:`~pycassa.cassandra.ttypes.IndexExpression`. The module +:mod:`pycassa.index` is designed to make this easier. + +Suppose we are only interested in rows where birthdate is 1984. We might do +the following: + +.. code-block:: python + + >>> col_fam = pycassa.ColumnFamily(connection, 'Indexed1') + >>> from pycassa.index import * + >>> index_exp = create_index_expression('birthdate', 1984) + >>> index_clause = create_index_clause([index_exp]) + >>> col_fam.get_indexed_slices(index_clause) + {'winston smith': {'birthdate': 1984}} + +Although at least one +:class:`~pycassa.cassandra.ttypes.IndexExpression` in every clause +must be on an indexed column, you may also have other expressions which are +on non-indexed columns. diff --git a/ez_setup.py b/ez_setup.py new file mode 100644 index 00000000..d24e845e --- /dev/null +++ b/ez_setup.py @@ -0,0 +1,276 @@ +#!python +"""Bootstrap setuptools installation + +If you want to use setuptools in your package's setup.py, just include this +file in the same directory with it, and add this to the top of your setup.py:: + + from ez_setup import use_setuptools + use_setuptools() + +If you want to require a specific version of setuptools, set a download +mirror, or use an alternate download directory, you can do so by supplying +the appropriate options to ``use_setuptools()``. + +This file can also be run as a script to install or upgrade setuptools. +""" +import sys +DEFAULT_VERSION = "0.6c9" +DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[:3] + +md5_data = { + 'setuptools-0.6b1-py2.3.egg': '8822caf901250d848b996b7f25c6e6ca', + 'setuptools-0.6b1-py2.4.egg': 'b79a8a403e4502fbb85ee3f1941735cb', + 'setuptools-0.6b2-py2.3.egg': '5657759d8a6d8fc44070a9d07272d99b', + 'setuptools-0.6b2-py2.4.egg': '4996a8d169d2be661fa32a6e52e4f82a', + 'setuptools-0.6b3-py2.3.egg': 'bb31c0fc7399a63579975cad9f5a0618', + 'setuptools-0.6b3-py2.4.egg': '38a8c6b3d6ecd22247f179f7da669fac', + 'setuptools-0.6b4-py2.3.egg': '62045a24ed4e1ebc77fe039aa4e6f7e5', + 'setuptools-0.6b4-py2.4.egg': '4cb2a185d228dacffb2d17f103b3b1c4', + 'setuptools-0.6c1-py2.3.egg': 'b3f2b5539d65cb7f74ad79127f1a908c', + 'setuptools-0.6c1-py2.4.egg': 'b45adeda0667d2d2ffe14009364f2a4b', + 'setuptools-0.6c2-py2.3.egg': 'f0064bf6aa2b7d0f3ba0b43f20817c27', + 'setuptools-0.6c2-py2.4.egg': '616192eec35f47e8ea16cd6a122b7277', + 'setuptools-0.6c3-py2.3.egg': 'f181fa125dfe85a259c9cd6f1d7b78fa', + 'setuptools-0.6c3-py2.4.egg': 'e0ed74682c998bfb73bf803a50e7b71e', + 'setuptools-0.6c3-py2.5.egg': 'abef16fdd61955514841c7c6bd98965e', + 'setuptools-0.6c4-py2.3.egg': 'b0b9131acab32022bfac7f44c5d7971f', + 'setuptools-0.6c4-py2.4.egg': '2a1f9656d4fbf3c97bf946c0a124e6e2', + 'setuptools-0.6c4-py2.5.egg': '8f5a052e32cdb9c72bcf4b5526f28afc', + 'setuptools-0.6c5-py2.3.egg': 'ee9fd80965da04f2f3e6b3576e9d8167', + 'setuptools-0.6c5-py2.4.egg': 'afe2adf1c01701ee841761f5bcd8aa64', + 'setuptools-0.6c5-py2.5.egg': 'a8d3f61494ccaa8714dfed37bccd3d5d', + 'setuptools-0.6c6-py2.3.egg': '35686b78116a668847237b69d549ec20', + 'setuptools-0.6c6-py2.4.egg': '3c56af57be3225019260a644430065ab', + 'setuptools-0.6c6-py2.5.egg': 'b2f8a7520709a5b34f80946de5f02f53', + 'setuptools-0.6c7-py2.3.egg': '209fdf9adc3a615e5115b725658e13e2', + 'setuptools-0.6c7-py2.4.egg': '5a8f954807d46a0fb67cf1f26c55a82e', + 'setuptools-0.6c7-py2.5.egg': '45d2ad28f9750e7434111fde831e8372', + 'setuptools-0.6c8-py2.3.egg': '50759d29b349db8cfd807ba8303f1902', + 'setuptools-0.6c8-py2.4.egg': 'cba38d74f7d483c06e9daa6070cce6de', + 'setuptools-0.6c8-py2.5.egg': '1721747ee329dc150590a58b3e1ac95b', + 'setuptools-0.6c9-py2.3.egg': 'a83c4020414807b496e4cfbe08507c03', + 'setuptools-0.6c9-py2.4.egg': '260a2be2e5388d66bdaee06abec6342a', + 'setuptools-0.6c9-py2.5.egg': 'fe67c3e5a17b12c0e7c541b7ea43a8e6', + 'setuptools-0.6c9-py2.6.egg': 'ca37b1ff16fa2ede6e19383e7b59245a', +} + +import sys, os +try: from hashlib import md5 +except ImportError: from md5 import md5 + +def _validate_md5(egg_name, data): + if egg_name in md5_data: + digest = md5(data).hexdigest() + if digest != md5_data[egg_name]: + print >>sys.stderr, ( + "md5 validation of %s failed! (Possible download problem?)" + % egg_name + ) + sys.exit(2) + return data + +def use_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + download_delay=15 +): + """Automatically find/download setuptools and make it available on sys.path + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end with + a '/'). `to_dir` is the directory where setuptools will be downloaded, if + it is not already available. If `download_delay` is specified, it should + be the number of seconds that will be paused before initiating a download, + should one be required. If an older version of setuptools is installed, + this routine will print a message to ``sys.stderr`` and raise SystemExit in + an attempt to abort the calling script. + """ + was_imported = 'pkg_resources' in sys.modules or 'setuptools' in sys.modules + def do_download(): + egg = download_setuptools(version, download_base, to_dir, download_delay) + sys.path.insert(0, egg) + import setuptools; setuptools.bootstrap_install_from = egg + try: + import pkg_resources + except ImportError: + return do_download() + try: + pkg_resources.require("setuptools>="+version); return + except pkg_resources.VersionConflict, e: + if was_imported: + print >>sys.stderr, ( + "The required version of setuptools (>=%s) is not available, and\n" + "can't be installed while this script is running. Please install\n" + " a more recent version first, using 'easy_install -U setuptools'." + "\n\n(Currently using %r)" + ) % (version, e.args[0]) + sys.exit(2) + else: + del pkg_resources, sys.modules['pkg_resources'] # reload ok + return do_download() + except pkg_resources.DistributionNotFound: + return do_download() + +def download_setuptools( + version=DEFAULT_VERSION, download_base=DEFAULT_URL, to_dir=os.curdir, + delay = 15 +): + """Download setuptools from a specified location and return its filename + + `version` should be a valid setuptools version number that is available + as an egg for download under the `download_base` URL (which should end + with a '/'). `to_dir` is the directory where the egg will be downloaded. + `delay` is the number of seconds to pause before an actual download attempt. + """ + import urllib2, shutil + egg_name = "setuptools-%s-py%s.egg" % (version,sys.version[:3]) + url = download_base + egg_name + saveto = os.path.join(to_dir, egg_name) + src = dst = None + if not os.path.exists(saveto): # Avoid repeated downloads + try: + from distutils import log + if delay: + log.warn(""" +--------------------------------------------------------------------------- +This script requires setuptools version %s to run (even to display +help). I will attempt to download it for you (from +%s), but +you may need to enable firewall access for this script first. +I will start the download in %d seconds. + +(Note: if this machine does not have network access, please obtain the file + + %s + +and place it in this directory before rerunning this script.) +---------------------------------------------------------------------------""", + version, download_base, delay, url + ); from time import sleep; sleep(delay) + log.warn("Downloading %s", url) + src = urllib2.urlopen(url) + # Read/write all in one block, so we don't create a corrupt file + # if the download is interrupted. + data = _validate_md5(egg_name, src.read()) + dst = open(saveto,"wb"); dst.write(data) + finally: + if src: src.close() + if dst: dst.close() + return os.path.realpath(saveto) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +def main(argv, version=DEFAULT_VERSION): + """Install or upgrade setuptools and EasyInstall""" + try: + import setuptools + except ImportError: + egg = None + try: + egg = download_setuptools(version, delay=0) + sys.path.insert(0,egg) + from setuptools.command.easy_install import main + return main(list(argv)+[egg]) # we're done here + finally: + if egg and os.path.exists(egg): + os.unlink(egg) + else: + if setuptools.__version__ == '0.0.1': + print >>sys.stderr, ( + "You have an obsolete version of setuptools installed. Please\n" + "remove it from your system entirely before rerunning this script." + ) + sys.exit(2) + + req = "setuptools>="+version + import pkg_resources + try: + pkg_resources.require(req) + except pkg_resources.VersionConflict: + try: + from setuptools.command.easy_install import main + except ImportError: + from easy_install import main + main(list(argv)+[download_setuptools(delay=0)]) + sys.exit(0) # try to force an exit + else: + if argv: + from setuptools.command.easy_install import main + main(argv) + else: + print "Setuptools version",version,"or greater has been installed." + print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' + +def update_md5(filenames): + """Update our built-in md5 registry""" + + import re + + for name in filenames: + base = os.path.basename(name) + f = open(name,'rb') + md5_data[base] = md5(f.read()).hexdigest() + f.close() + + data = [" %r: %r,\n" % it for it in md5_data.items()] + data.sort() + repl = "".join(data) + + import inspect + srcfile = inspect.getsourcefile(sys.modules[__name__]) + f = open(srcfile, 'rb'); src = f.read(); f.close() + + match = re.search("\nmd5_data = {\n([^}]+)}", src) + if not match: + print >>sys.stderr, "Internal error!" + sys.exit(2) + + src = src[:match.start(1)] + repl + src[match.end(1):] + f = open(srcfile,'w') + f.write(src) + f.close() + + +if __name__=='__main__': + if len(sys.argv)>2 and sys.argv[1]=='--md5update': + update_md5(sys.argv[2:]) + else: + main(sys.argv[1:]) + + + + + + diff --git a/pycassa/batch.py b/pycassa/batch.py index 7be32c5a..ba64d66f 100644 --- a/pycassa/batch.py +++ b/pycassa/batch.py @@ -1,16 +1,34 @@ +"""Tools to support batch operations.""" + import threading from pycassa.cassandra.ttypes import (Clock, Column, ColumnOrSuperColumn, ConsistencyLevel, Deletion, Mutation, SlicePredicate, SuperColumn) +__all__ = ['Mutator', 'CfMutator'] class Mutator(object): - """Batch update convenience mechanism. - Queues insert/update/remove operations and executes them when the queue - is filled up or explicitly using `send`. + """ + Batch update convenience mechanism. + + Queues insert/update/remove operations and executes them when the queue + is full or `send` is called explicitly. + """ def __init__(self, client, queue_size=100, write_consistency_level=None): + """Creates a new Mutator object. + + :Parameters: + `client`: :class:`~pycassa.connection.Connection` + The connection that will be used. + `queue_size`: int + The number of operations to queue before they are executed + automatically. + `write_consistency_level`: :class:`~pycassa.cassandra.ttypes.ConsistencyLevel` + The Cassandra write consistency level. + + """ self._buffer = [] self._lock = threading.RLock() self.client = client @@ -92,7 +110,24 @@ def remove(self, column_family, key, columns=None, super_column=None, clock=None class CfMutator(Mutator): + """ + A :class:`~pycassa.batch.Mutator` that deals only with one column family. + + """ + def __init__(self, column_family, queue_size=100, write_consistency_level=None): + """Creates a new CfMutator object. + + :Parameters: + `column_family`: :class:`~pycassa.columnfamily.ColumnFamily` + The column family that all operations will be on. + `queue_size`: int + The number of operations to queue before they are executed + automatically. + `write_consistency_level`: :class:`~pycassa.cassandra.ttypes.ConsistencyLevel` + The Cassandra write consistency level. + + """ wcl = write_consistency_level or column_family.write_consistency_level super(CfMutator, self).__init__(column_family.client, queue_size=queue_size, write_consistency_level=wcl) diff --git a/pycassa/columnfamily.py b/pycassa/columnfamily.py index a239b5c4..2b2576c0 100644 --- a/pycassa/columnfamily.py +++ b/pycassa/columnfamily.py @@ -1,3 +1,10 @@ +""" +Provides an abstraction of Cassandra's data model to allow for easy +manipulation of data inside Cassandra. + +.. seealso:: :mod:`pycassa.columnfamilymap` +""" + from pycassa.cassandra.ttypes import Column, ColumnOrSuperColumn,\ ColumnParent, ColumnPath, ConsistencyLevel, NotFoundException,\ SlicePredicate, SliceRange, SuperColumn, Clock, KeyRange,\ @@ -11,6 +18,7 @@ from batch import CfMutator __all__ = ['gm_timestamp', 'ColumnFamily'] + _TYPES = ['BytesType', 'LongType', 'IntegerType', 'UTF8Type', 'AsciiType', 'LexicalUUIDType', 'TimeUUIDType'] _NON_SLICE = 0 @@ -19,9 +27,11 @@ def gm_timestamp(): """ - Returns - ------- - int : UNIX epoch time in GMT + Gets the current GMT timestamp + + :Returns: + integer UNIX epoch time in GMT + """ return int(time.time() * 1e6) @@ -33,6 +43,8 @@ def create_SlicePredicate(columns, column_start, column_finish, column_reversed, return SlicePredicate(slice_range=sr) class ColumnFamily(object): + """An abstraction of a Cassandra column family or super column family.""" + def __init__(self, client, column_family, buffer_size=1024, read_consistency_level=ConsistencyLevel.ONE, write_consistency_level=ConsistencyLevel.ONE, @@ -40,48 +52,53 @@ def __init__(self, client, column_family, buffer_size=1024, dict_class=dict, autopack_names=True, autopack_values=True): """ - Construct a ColumnFamily - - Parameters - ---------- - client : cassandra.Cassandra.Client - Cassandra client with thrift API - column_family : str - The name of this ColumnFamily - buffer_size : int - When calling get_range(), the intermediate results need to be - buffered if we are fetching many rows, otherwise the Cassandra - server will overallocate memory and fail. This is the size of - that buffer. - read_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any read operation - write_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any write operation - timestamp : function - The default timestamp function returns: - int(time.mktime(time.gmtime())) - Or the number of seconds since Unix epoch in GMT. - Set timestamp to replace the default timestamp function with your - own. - super : bool - Whether this ColumnFamily has SuperColumns - dict_class : class (must act like the dict type) - The default dict_class is dict. - If the order of columns matter to you, pass your own dictionary - class, or python 2.7's new collections.OrderedDict. All returned - rows and subcolumns are instances of this. - autopack_names : bool - Whether column and supercolumn names should be packed automatically - based on the comparator and subcomparator for the column - family. This does not typically work when used with - ColumnFamilyMaps. - autopack_values : bool - Whether column values should be packed automatically based on - the validator_class for a given column. This should probably - be set to False when used with a ColumnFamilyMap. + Constructs an abstraction of a Cassandra column family or super column family. + + Operations on this, such as `get` or `insert` will get data from or + insert data into the corresponding Cassandra column family. + + :Parameters: + `client`: :class:`cassandra.Cassandra.Client` + Cassandra client with thrift API + `column_family`: string + The name of this ColumnFamily + `buffer_size`: integer + When calling `get_range`, the intermediate results need to be + buffered if we are fetching many rows, otherwise the Cassandra + server will overallocate memory and fail. This is the size of + that buffer. + `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any read operation + `write_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any write operation + `timestamp`: function + The default timestamp function returns: + int(time.mktime(time.gmtime())) + Or the number of seconds since Unix epoch in GMT. + Set timestamp to replace the default timestamp function with your + own. + `super`: bool + Whether this ColumnFamily has SuperColumns + `dict_class` : class (must act like the dict type) + The default dict_class is :class:`dict`. + If the order of columns matter to you, pass your own dictionary + class, or python 2.7's new :class:`collections.OrderedDict`. All returned + rows and subcolumns are instances of this. + `autopack_names`: bool + Whether column and supercolumn names should be packed automatically + based on the comparator and subcomparator for the column + family. This does not typically work when used with + :class:`~pycassa.columnfamilymap.ColumnFamilyMap`. + `autopack_values`: bool + Whether column values should be packed automatically based on + the validator_class for a given column. This should probably + be set to ``False`` when used with a + :class:`~pycassa.columnfamilymap.ColumnFamilyMap`. + """ + self.client = client self.column_family = column_family self.buffer_size = buffer_size @@ -242,14 +259,14 @@ def _convert_time_to_uuid(self, datetime, lowest_val): This is to assist with getting a time slice of columns when the column names are TimeUUID. - Parameters - ---------- - datetime: datetime - - The time to use for the timestamp portion of the UUID. - lowest_val: boolean - - Whether the UUID produced should be the lowest possible value - UUID with the same timestamp as datetime or the highest possible - value. + :Parameters: + `datetime`: datetime + - The time to use for the timestamp portion of the UUID. + `lowest_val`: boolean + - Whether the UUID produced should be the lowest possible value + UUID with the same timestamp as datetime or the highest possible + value. + """ if isinstance(datetime, uuid.UUID): return datetime @@ -328,33 +345,32 @@ def get(self, key, columns=None, column_start="", column_finish="", """ Fetch a key from a Cassandra server - Parameters - ---------- - key : str - The key to fetch - columns : [str] - Limit the columns or super_columns fetched to the specified list - column_start : str - Only fetch when a column or super_column is >= column_start - column_finish : str - Only fetch when a column or super_column is <= column_finish - column_reversed : bool - Fetch the columns or super_columns in reverse order. This will do - nothing unless you passed a dict_class to the constructor. - column_count : int - Limit the number of columns or super_columns fetched per key - include_timestamp : bool - If true, return a (value, timestamp) tuple for each column - super_column : str - Return columns only in this super_column - read_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any read operation - - Returns - ------- - if include_timestamp == True: {'column': ('value', timestamp)} - else: {'column': 'value'} + :Parameters: + `key`: str + The key to fetch + `columns`: [str] + Limit the columns or super_columns fetched to the specified list + `column_start`: str + Only fetch when a column or super_column is >= column_start + `column_finish`: str + Only fetch when a column or super_column is <= column_finish + `column_reversed`: bool + Fetch the columns or super_columns in reverse order. This will do + nothing unless you passed a ``dict_class`` to the constructor. + `column_count`: int + Limit the number of columns or super_columns fetched per key + `include_timestamp` : bool + If true, return a (value, timestamp) tuple for each column + `super_column`: str + Return columns only in this super_column + `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any read operation + + :Returns: + if include_timestamp == True: {'column': ('value', timestamp)} + else: {'column': 'value'} + """ super_column, column_start, column_finish = self._pack_slice_cols( @@ -383,36 +399,35 @@ def get_indexed_slices(self, index_clause, columns=None, column_start="", column """ Fetches a list of KeySlices from a Cassandra server based on an index clause - Parameters - ---------- - index_clause : IndexClause - Limits the keys that are returned based on expressions that compare - the value of a column to a given value. At least one of the - expressions in the IndexClause must be on an indexed column. - See index_clause.create_index_clause() and create_index_expression(). - columns : [str] - Limit the columns or super_columns fetched to the specified list - column_start : str - Only fetch when a column or super_column is >= column_start - column_finish : str - Only fetch when a column or super_column is <= column_finish - column_reversed : bool - Fetch the columns or super_columns in reverse order. This will do - nothing unless you passed a dict_class to the constructor. - column_count : int - Limit the number of columns or super_columns fetched per key - include_timestamp : bool - If true, return a (value, timestamp) tuple for each column - super_column : str - Return columns only in this super_column - read_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any read operation - - Returns - ------- - if include_timestamp == True: {key : {column : (value, timestamp)}} - else: {key : {column : value}} + :Parameters: + `index_clause`: :class:`~pycassa.cassandra.ttypes.IndexClause` + Limits the keys that are returned based on expressions that compare + the value of a column to a given value. At least one of the + expressions in the IndexClause must be on an indexed column. + .. seealso:: meth::pycassa.index.create_index_clause() and + meth::pycassa.index.create_index_expression(). + `columns`: [str] + Limit the columns or super_columns fetched to the specified list + `column_start`: str + Only fetch when a column or super_column is >= column_start + `column_finish`: str + Only fetch when a column or super_column is <= column_finish + `column_reversed`: bool + Fetch the columns or super_columns in reverse order. This will do + nothing unless you passed a dict_class to the constructor. + `column_count`: int + Limit the number of columns or super_columns fetched per key + `include_timestamp`: bool + If true, return a (value, timestamp) tuple for each column + `super_column`: str + Return columns only in this super_column + `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any read operation + + :Returns: + if include_timestamp == True: {key : {column : (value, timestamp)}} + else: {key : {column : value}} """ (super_column, column_start, column_finish) = self._pack_slice_cols( @@ -446,35 +461,33 @@ def multiget(self, keys, columns=None, column_start="", column_finish="", column_reversed=False, column_count=100, include_timestamp=False, super_column=None, read_consistency_level = None): """ - Fetch multiple key from a Cassandra server - - Parameters - ---------- - keys : [str] - A list of keys to fetch - columns : [str] - Limit the columns or super_columns fetched to the specified list - column_start : str - Only fetch when a column or super_column is >= column_start - column_finish : str - Only fetch when a column or super_column is <= column_finish - column_reversed : bool - Fetch the columns or super_columns in reverse order. This will do - nothing unless you passed a dict_class to the constructor. - column_count : int - Limit the number of columns or super_columns fetched per key - include_timestamp : bool - If true, return a (value, timestamp) tuple for each column - super_column : str - Return columns only in this super_column - read_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any read operation - - Returns - ------- - if include_timestamp == True: {'key': {'column': ('value', timestamp)}} - else: {'key': {'column': 'value'}} + Fetch multiple keys from a Cassandra server + + :Parameters: + `keys`: [str] + A list of keys to fetch + `columns`: [str] + Limit the columns or super_columns fetched to the specified list + `column_start`: str + Only fetch when a column or super_column is >= column_start + `column_finish`: str + Only fetch when a column or super_column is <= column_finish + `column_reversed`: bool + Fetch the columns or super_columns in reverse order. This will do + nothing unless you passed a dict_class to the constructor. + `column_count`: int + Limit the number of columns or super_columns fetched per key + `include_timestamp`: bool + If true, return a (value, timestamp) tuple for each column + `super_column`: str + Return columns only in this super_column + `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any read operation + + :Returns: + if include_timestamp == True: {'key': {'column': ('value', timestamp)}} + else: {'key': {'column': 'value'}} """ (super_column, column_start, column_finish) = self._pack_slice_cols( @@ -504,19 +517,17 @@ def get_count(self, key, super_column=None, read_consistency_level = None): """ Count the number of columns for a key - Parameters - ---------- - key : str - The key with which to count columns - super_column : str - Count the columns only in this super_column - read_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any read operation - - Returns - ------- - int Count of columns + :Parameters: + `key` : str + The key with which to count columns + `super_column` : str + Count the columns only in this super_column + `read_consistency_level` : :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any read operation + + :Returns: + int Count of columns """ if super_column != '': @@ -536,36 +547,34 @@ def get_range(self, start="", finish="", columns=None, column_start="", """ Get an iterator over keys in a specified range - Parameters - ---------- - start : str - Start from this key (inclusive) - finish : str - End at this key (inclusive) - columns : [str] - Limit the columns or super_columns fetched to the specified list - column_start : str - Only fetch when a column or super_column is >= column_start - column_finish : str - Only fetch when a column or super_column is <= column_finish - column_reversed : bool - Fetch the columns or super_columns in reverse order. This will do - nothing unless you passed a dict_class to the constructor. - column_count : int - Limit the number of columns or super_columns fetched per key - row_count : int - Limit the number of rows fetched - include_timestamp : bool - If true, return a (value, timestamp) tuple for each column - super_column : string - Return columns only in this super_column - read_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any read operation - - Returns - ------- - iterator over ('key', {'column': 'value'}) + :Parameters: + `start`: str + Start from this key (inclusive) + `finish`: str + End at this key (inclusive) + `columns`: [str] + Limit the columns or super_columns fetched to the specified list + `column_start`: str + Only fetch when a column or super_column is >= column_start + `column_finish`: str + Only fetch when a column or super_column is <= column_finish + `column_reversed`: bool + Fetch the columns or super_columns in reverse order. This will do + nothing unless you passed a dict_class to the constructor. + `column_count`: int + Limit the number of columns or super_columns fetched per key + `row_count`: int + Limit the number of rows fetched + `include_timestamp`: bool + If true, return a (value, timestamp) tuple for each column + `super_column`: string + Return columns only in this super_column + `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any read operation + + :Returns: + iterator over ('key', {'column': 'value'}) """ (super_column, column_start, column_finish) = self._pack_slice_cols( @@ -616,21 +625,19 @@ def insert(self, key, columns, clock=None, ttl=None, """ Insert or update columns for a key - Parameters - ---------- - key : str - The key to insert or update the columns at - columns : dict - Column: {'column': 'value'} - SuperColumn: {'column': {'subcolumn': 'value'}} - The columns or supercolumns to insert or update - write_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any write operation - - Returns - ------- - int timestamp + :Parameters: + `key`: str + The key to insert or update the columns at + `columns`: dict + Column: {'column': 'value'} + SuperColumn: {'column': {'subcolumn': 'value'}} + The columns or supercolumns to insert or update + `write_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any write operation + + :Returns: + int timestamp """ return self.batch_insert({key: columns}, clock=clock, ttl=ttl, write_consistency_level=write_consistency_level) @@ -639,18 +646,16 @@ def batch_insert(self, rows, clock=None, ttl=None, write_consistency_level = Non """ Insert or update columns for multiple keys - Parameters - ---------- - rows : dict - Column: {'row': {'column': 'value'}} - SuperColumn: {'row': {'column': {'subcolumn': 'value'}}} - write_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any write operation - - Returns - ------- - int timestamp + :Parameters: + `rows`: :class:`dict` + Column: {'row': {'column': 'value'}} + SuperColumn: {'row': {'column': {'subcolumn': 'value'}}} + `write_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any write operation + + :Returns: + int timestamp """ clock = Clock(timestamp=self.timestamp()) batch = self.batch(write_consistency_level=write_consistency_level) @@ -663,21 +668,19 @@ def remove(self, key, columns=None, super_column=None, write_consistency_level = """ Remove a specified key or columns - Parameters - ---------- - key : str - The key to remove. If columns is not set, remove all columns - columns : list - Delete the columns or super_columns in this list - super_column : str - Delete the columns from this super_column - write_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any write operation - - Returns - ------- - int timestamp + :Parameters: + `key`: str + The key to remove. If columns is not set, remove all columns + `columns`: list + Delete the columns or super_columns in this list + `super_column`: str + Delete the columns from this super_column + `write_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any write operation + + :Returns: + int timestamp """ clock = Clock(timestamp=self.timestamp()) batch = self.batch(write_consistency_level=write_consistency_level) @@ -687,19 +690,17 @@ def remove(self, key, columns=None, super_column=None, write_consistency_level = def batch(self, queue_size=100, write_consistency_level=None): """ - Create batch mutator for doing multiple insert,update,remove + Create batch mutator for doing multiple insert, update, and remove operations using as few roundtrips as possible. - Parameters - ---------- - queue_size : int - Max number of mutations per request - write_consistency_level: ConsistencyLevel - Consistency level used for mutations. + :Parameters: + `queue_size`: int + Max number of mutations per request + `write_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Consistency level used for mutations. - Returns - ------- - CfMutator mutator + :Returns: + :class:`pycassa.batch.CfMutator` """ if write_consistency_level is None: write_consistency_level = self.write_consistency_level @@ -709,10 +710,15 @@ def batch(self, queue_size=100, write_consistency_level=None): def truncate(self): """ Marks the entire ColumnFamily as deleted. - From the user's perspective a successful call to truncate will result complete data deletion from cfname. - Internally, however, disk space will not be immediatily released, as with all deletes in cassandra, this one - only marks the data as deleted. - The operation succeeds only if all hosts in the cluster at available and will throw an UnavailableException if - some hosts are down. + + From the user's perspective a successful call to truncate will result + complete data deletion from cfname. Internally, however, disk space + will not be immediatily released, as with all deletes in cassandra, + this one only marks the data as deleted. + + The operation succeeds only if all hosts in the cluster at available + and will throw an :exc:`.UnavailableException` if some hosts are + down. + """ self.client.truncate(self.column_family) diff --git a/pycassa/columnfamilymap.py b/pycassa/columnfamilymap.py index 56c0d754..b1a5fcc2 100644 --- a/pycassa/columnfamilymap.py +++ b/pycassa/columnfamilymap.py @@ -1,3 +1,8 @@ +""" +Provides a means for mapping an existing class to a column family. + +""" + from pycassa.types import Column from pycassa.cassandra.ttypes import IndexExpression @@ -9,19 +14,25 @@ def create_instance(cls, **kwargs): return instance class ColumnFamilyMap(object): + """ + Maps an existing class to a column family. Class fields become columns, + and instances of that class can be represented as rows in the column + family. + + """ + def __init__(self, cls, column_family, columns=None, raw_columns=False): """ Construct a ObjectFamily - Parameters - ---------- - cls : class - Instances of cls are generated on get*() requests - column_family: ColumnFamily - The ColumnFamily to tie with cls - raw_columns: boolean - Whether all columns should be fetched into the raw_columns field in - requests + :Parameters: + `cls`: class + Instances of cls are generated on ``get*()`` requests + `column_family`: :class:`~pycassa.columnfamily.ColumnFamily` + The :class:`~pycassa.columnfamily.ColumnFamily` to tie with cls + `raw_columns`: boolean + Whether all columns should be fetched into the raw_columns field in + requests """ self.cls = cls self.column_family = column_family @@ -54,30 +65,28 @@ def get(self, key, *args, **kwargs): """ Fetch a key from a Cassandra server - Parameters - ---------- - key : str - The key to fetch - columns : [str] - Limit the columns or super_columns fetched to the specified list - column_start : str - Only fetch when a column or super_column is >= column_start - column_finish : str - Only fetch when a column or super_column is <= column_finish - column_reversed : bool - Fetch the columns or super_columns in reverse order. This will do - nothing unless you passed a dict_class to the constructor. - column_count : int - Limit the number of columns or super_columns fetched per key - super_column : str - Fetch only this super_column - read_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any read operation - - Returns - ------- - Class instance + :Parameters: + `key`: str + The key to fetch + `columns`: [str] + Limit the columns or super_columns fetched to the specified list + `column_start`: str + Only fetch when a column or super_column is >= column_start + `column_finish`: str + Only fetch when a column or super_column is <= column_finish + `column_reversed`: bool + Fetch the columns or super_columns in reverse order. This will do + nothing unless you passed a dict_class to the constructor. + `column_count`: int + Limit the number of columns or super_columns fetched per key + `super_column`: str + Fetch only this super_column + `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any read operation + + :Returns: + Class instance """ if 'columns' not in kwargs and not self.column_family.super and not self.raw_columns: kwargs['columns'] = self.columns.keys() @@ -89,12 +98,14 @@ def get(self, key, *args, **kwargs): vals = self.dict_class() for super_column, subcols in columns.iteritems(): combined = self.combine_columns(subcols) - vals[super_column] = create_instance(self.cls, key=key, super_column=super_column, **combined) - + vals[super_column] = create_instance(self.cls, key=key, + super_column=super_column, **combined) return vals combined = self.combine_columns(columns) - return create_instance(self.cls, key=key, super_column=kwargs['super_column'], **combined) + return create_instance(self.cls, key=key, + super_column=kwargs['super_column'], + **combined) combined = self.combine_columns(columns) return create_instance(self.cls, key=key, **combined) @@ -102,36 +113,37 @@ def get(self, key, *args, **kwargs): def get_indexed_slices(self, instance=None, *args, **kwargs): """ Fetches a list of KeySlices from a Cassandra server based on an index clause - - Parameters - ---------- - index_clause : IndexClause - Limits the keys that are returned based on expressions that compare - the value of a column to a given value. At least one of the - expressions in the IndexClause must be on an indexed column. - See index_clause.create_index_clause() and create_index_expression(). - columns : [str] - Limit the columns or super_columns fetched to the specified list - column_start : str - Only fetch when a column or super_column is >= column_start - column_finish : str - Only fetch when a column or super_column is <= column_finish - column_reversed : bool - Fetch the columns or super_columns in reverse order. This will do - nothing unless you passed a dict_class to the constructor. - column_count : int - Limit the number of columns or super_columns fetched per key - include_timestamp : bool - If true, return a (value, timestamp) tuple for each column - super_column : str - Return columns only in this super_column - read_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any read operation - - Returns - ------- - Class instance + + :Parameters: + `index_clause`: :class:`~pycassa.cassandra.ttypes.IndexClause` + Limits the keys that are returned based on expressions that compare + the value of a column to a given value. At least one of the + expressions in the IndexClause must be on an indexed column. + `columns`: [str] + Limit the columns or super_columns fetched to the specified list + `column_start`: str + Only fetch when a column or super_column is >= column_start + `column_finish`: str + Only fetch when a column or super_column is <= column_finish + `column_reversed`: bool + Fetch the columns or super_columns in reverse order. This will do + nothing unless you passed a dict_class to the constructor. + `column_count`: int + Limit the number of columns or super_columns fetched per key + `include_timestamp`: bool + If true, return a (value, timestamp) tuple for each column + `super_column`: str + Return columns only in this super_column + `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any read operation + + :Returns: + Class instance + + .. seealso:: :meth:`pycassa.index.create_index_clause()` and + :meth:`pycassa.index.create_index_expression()`. + """ if 'columns' not in kwargs and not self.column_family.super and not self.raw_columns: @@ -167,32 +179,32 @@ def get_indexed_slices(self, instance=None, *args, **kwargs): def multiget(self, *args, **kwargs): """ - Fetch multiple key from a Cassandra server - - Parameters - ---------- - keys : [str] - A list of keys to fetch - columns : [str] - Limit the columns or super_columns fetched to the specified list - column_start : str - Only fetch when a column or super_column is >= column_start - column_finish : str - Only fetch when a column or super_column is <= column_finish - column_reversed : bool - Fetch the columns or super_columns in reverse order. This will do - nothing unless you passed a dict_class to the constructor. - column_count : int - Limit the number of columns or super_columns fetched per key - super_column : str - Fetch only this super_column - read_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any read operation - - Returns - ------- - {'key': Class instance} + Fetch multiple keys from a Cassandra server + + :Parameters: + `keys`: [str] + A list of keys to fetch + `columns`: [str] + Limit the columns or super_columns fetched to the specified list + `column_start`: str + Only fetch when a column or super_column is >= column_start + `column_finish`: str + Only fetch when a column or super_column is <= column_finish + `column_reversed`: bool + Fetch the columns or super_columns in reverse order. This will do + nothing unless you passed a dict_class to the constructor. + `column_count`: int + Limit the number of columns or super_columns fetched per key + `include_timestamp`: bool + If true, return a (value, timestamp) tuple for each column + `super_column`: str + Return columns only in this super_column + `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any read operation + + :Returns: + {'key': Class instance} """ if 'columns' not in kwargs and not self.column_family.super and not self.raw_columns: kwargs['columns'] = self.columns.keys() @@ -218,49 +230,52 @@ def get_count(self, *args, **kwargs): """ Count the number of columns for a key - Parameters - ---------- - key : str - The key with which to count columns - - Returns - ------- - int Count of columns + :Parameters: + `key`: str + The key with which to count columns + `super_column`: str + Count the columns only in this super_column + `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any read operation + + :Returns: + int Count of columns """ return self.column_family.get_count(*args, **kwargs) def get_range(self, *args, **kwargs): """ Get an iterator over keys in a specified range - - Parameters - ---------- - start : str - Start from this key (inclusive) - finish : str - End at this key (inclusive) - columns : [str] - Limit the columns or super_columns fetched to the specified list - column_start : str - Only fetch when a column or super_column is >= column_start - column_finish : str - Only fetch when a column or super_column is <= column_finish - column_reversed : bool - Fetch the columns or super_columns in reverse order. This will do - nothing unless you passed a dict_class to the constructor. - column_count : int - Limit the number of columns or super_columns fetched per key - row_count : int - Limit the number of rows fetched - super_column : str - Fetch only this super_column - read_consistency_level : ConsistencyLevel - Affects the guaranteed replication factor before returning from - any read operation - - Returns - ------- - iterator over Class instance + + :Parameters: + `start`: str + Start from this key (inclusive) + `finish`: str + End at this key (inclusive) + `columns`: [str] + Limit the columns or super_columns fetched to the specified list + `column_start`: str + Only fetch when a column or super_column is >= column_start + `column_finish`: str + Only fetch when a column or super_column is <= column_finish + `column_reversed`: bool + Fetch the columns or super_columns in reverse order. This will do + nothing unless you passed a dict_class to the constructor. + `column_count`: int + Limit the number of columns or super_columns fetched per key + `row_count`: int + Limit the number of rows fetched + `include_timestamp`: bool + If true, return a (value, timestamp) tuple for each column + `super_column`: string + Return columns only in this super_column + `read_consistency_level`: :class:`pycassa.cassandra.ttypes.ConsistencyLevel` + Affects the guaranteed replication factor before returning from + any read operation + + :Returns: + iterator over Class instance """ if 'columns' not in kwargs and not self.column_family.super and not self.raw_columns: kwargs['columns'] = self.columns.keys() @@ -281,18 +296,19 @@ def get_range(self, *args, **kwargs): def insert(self, instance, columns=None): """ - Insert or update columns for a key - - Parameters - ---------- - instance : Class instance - The key to insert or update the columns at - columns : ['column'] - Limit the columns inserted to this list - - Returns - ------- - int timestamp + Insert or update columns + + :Parameters: + `instance`: Class instance + The class to insert or update the columns in + `columns`: dict + Column: {'column': 'value'} + SuperColumn: {'column': {'subcolumn': 'value'}} + The columns or supercolumns to limit the insertion + or update to. + + :Returns: + int timestamp """ insert_dict = {} if columns is None: @@ -311,16 +327,14 @@ def remove(self, instance, column=None): """ Remove this instance - Parameters - ---------- - instance : Class instance - Remove the instance where the key is instance.key - column : str - If set, remove only this Column. Doesn't do anything for SuperColumns + :Parameters: + `instance`: Class instance + Remove the instance where the key is instance.key + `column`: str + If set, remove only this Column. Doesn't do anything for SuperColumns - Returns - ------- - int timestamp + :Returns: + int timestamp """ # Hmm, should we only remove the columns specified on construction? # It's slower, so we'll leave it out. diff --git a/pycassa/connection.py b/pycassa/connection.py index f70cf310..e0a5a6d9 100644 --- a/pycassa/connection.py +++ b/pycassa/connection.py @@ -1,3 +1,17 @@ +"""Tools for connecting to a Cassandra cluster. + +.. seealso:: Module :mod:`~pycassa.pool` to see how connections + can be pooled. + +To get a connection object which you can use directly, with +:class:`~pycassa.columnfamily.ColumnFamily`, or with +:class:`~pycassa.columnfamilymap.ColumnFamilyMap`, you can do the +following: + + >>> import pycassa + >>> connection = pycassa.connect('Keyspace', ['hostname:9160']) +""" + from exceptions import Exception import logging import random @@ -15,7 +29,8 @@ from batch import Mutator -__all__ = ['connect', 'connect_thread_local', 'NoServerAvailable'] +__all__ = ['connect', 'connect_thread_local', 'NoServerAvailable', + 'Connection'] DEFAULT_SERVER = 'localhost:9160' API_VERSION = VERSION.split('.') @@ -23,6 +38,7 @@ log = logging.getLogger('pycassa') class NoServerAvailable(Exception): + """Raised if all servers are currently marked dead.""" pass class ClientTransport(object): @@ -71,44 +87,41 @@ def connect(keyspace, servers=None, framed_transport=True, timeout=None, If the connection fails, it will attempt to connect to each server on the list in turn until one succeeds. If it is unable to find an active server, - it will throw a NoServerAvailable exception. + it will throw a `NoServerAvailable` exception. Failing servers are kept on a separate list and eventually retried, no sooner than `retry_time` seconds after failure. - Parameters - ---------- - keyspace: string - The keyspace to associate this connection with. - servers : [server] - List of Cassandra servers with format: "hostname:port" - - Default: ['localhost:9160'] - framed_transport: bool - If True, use a TFramedTransport instead of a TBufferedTransport - timeout: float - Timeout in seconds (e.g. 0.5) + :Parameters: + `keyspace`: string + The keyspace to associate this connection with. + `servers`: [server] + List of Cassandra servers with format: "hostname:port" - Default: None (it will stall forever) - retry_time: float - Minimum time in seconds until a failed server is reinstated. (e.g. 0.5) + Default: ['localhost:9160'] + `framed_transport`: bool + If True, use a TFramedTransport instead of a TBufferedTransport + `timeout`: float + Timeout in seconds (e.g. 0.5) - Default: 60 - credentials : dict - Dictionary of Credentials + Default: None (it will stall forever) + `retry_time`: float + Minimum time in seconds until a failed server is reinstated. (e.g. 0.5) - Example: {'username':'jsmith', 'password':'havebadpass'} - recycle: float - Max time in seconds before an open connection is closed and returned to the pool. + Default: 60 + `credentials`: dict + Dictionary of Credentials - Default: None (Never recycle) + Example: {'username':'jsmith', 'password':'havebadpass'} + `recycle`: float + Max time in seconds before an open connection is closed and returned to the pool. - round_robin: bool - *DEPRECATED* + Default: None (Never recycle) + `round_robin`: bool + *DEPRECATED* - Returns - ------- - Cassandra client + :Returns: + Cassandra client """ if servers is None: @@ -156,6 +169,7 @@ def mark_dead(self, server): self._lock.release() class Connection(object): + """A connection that gives access to raw Thrift calls.""" def __init__(self, keyspace, servers, framed_transport=True, timeout=None, retry_time=10, recycle=None, credentials=None, @@ -244,16 +258,13 @@ def get_keyspace_description(self, keyspace=None): """ Describes the given keyspace. - Parameters - ---------- - keyspace: str - Defaults to the current keyspace. - - Returns - ------- - {column_family_name: CfDef} - where a CfDef has many attributes describing the column family, including - the dictionary column_metadata = {column_name: ColumnDef} + :param keyspace: The keyspace to describe. Defaults to the current + keyspace. + + :Returns: + ``{column_family_name: CfDef}`` + where a ``CfDef`` has many attributes describing the column family, including + the dictionary ``column_metadata = {column_name: ColumnDef}`` """ if keyspace is None: keyspace = self._keyspace @@ -270,4 +281,8 @@ def get_keyspace_description(self, keyspace=None): return cf_defs def batch(self, *args, **kwargs): + """ + Returns a mutator on this connection. + + """ return Mutator(self, *args, **kwargs) diff --git a/pycassa/index.py b/pycassa/index.py index 8b705730..7f3f302a 100644 --- a/pycassa/index.py +++ b/pycassa/index.py @@ -1,33 +1,41 @@ +""" +Tools for using Cassandra's secondary indexes. + +""" + from pycassa.cassandra.ttypes import IndexClause, IndexExpression,\ IndexOperator +__all__ = ['create_index_clause', 'create_index_expression'] + def create_index_clause(expr_list, start_key='', count=100): """ - Constructs an IndexClause for use with get_indexed_slices() + Constructs an :class:`~pycassa.cassandra.ttypes.IndexClause` for use with + :meth:`~pycassa.columnfamily.get_indexed_slices()` - Parameters - ---------- - expr_list : [IndexExpression] - A list of IndexExpressions to match - start_key : str + :param expr_list: [:class:`~pycassa.cassandra.ttypes.IndexExpression`] + A list of `IndexExpressions` to match + :param start_key: str The key to begin searching from - count : int + :param count: int The number of results to return + """ return IndexClause(expressions=expr_list, start_key=start_key, count=count) def create_index_expression(column_name, value, op=IndexOperator.EQ): """ - Constructs an IndexExpression to use with an IndexClause + Constructs an :class:`~pycassa.cassandra.ttypes.IndexExpression` to use + in an :class:`~pycassa.cassandra.ttypes.IndexClause` - Parameters - ---------- - column_name : str - Name of an indexed or non indexed column - value : str + :param column_name: string + Name of an indexed or non-indexed column + :param value: The value that will be compared to column values using op - op : IndexOperator - The binary operator to apply to column values and 'value' + :param op: :class:`~pycassa.cassandra.ttypes.IndexOperator` + The binary operator to apply to column values and `value`. Defaults + to `IndexOperator.EQ`, which tests for equality. + """ return IndexExpression(column_name=column_name, op=op, value=value) diff --git a/pycassa/logger.py b/pycassa/logger.py index c6642688..d8e94fc2 100644 --- a/pycassa/logger.py +++ b/pycassa/logger.py @@ -1,6 +1,9 @@ +"""Logging facilities for pycassa.""" + import logging class PycassaLogger: + """pycassa's logger.""" __shared_state = {} @@ -13,6 +16,16 @@ class PycassaLogger: 'critical': logging.CRITICAL} def __init__(self, level='info', logger_name='pycassa'): + """ + Creates a new :class:`PycassaLogger`. + + This class uses the `Borg design pattern `_ + to acheive singleton-like sharing. + + :param level: the logging level for the logger + :param logger_name: the name that will be used to call :meth:`logging.getLogger()` + + """ self.__dict__ = self.__shared_state level = PycassaLogger._levels[level] logging.basicConfig(level=level) diff --git a/pycassa/pool.py b/pycassa/pool.py index e2b64c8e..78b27d69 100644 --- a/pycassa/pool.py +++ b/pycassa/pool.py @@ -6,11 +6,15 @@ # the MIT License: http://www.opensource.org/licenses/mit-license.php -"""Connection pooling for Cassandra connections. +""" +Connection pooling for Cassandra connections. Provides a number of connection pool implementations for a variety of usage scenarios and thread behavior requirements imposed by the application. + +.. seealso:: :mod:`pycassa.connection` + """ import weakref, time, threading, random @@ -24,14 +28,23 @@ from thrift import Thrift +__all__ = ['Pool', 'QueuePool', 'SingletonThreadPool', 'StaticPool', + 'NullPool', 'AssertionPool', 'PoolListener', 'ConnectionWrapper', + 'ImmutableConnectionWrapper', 'MutableConnectionWrapper', + 'ReplaceableConnectionWrapper', 'AllServersUnavailable', + 'MaximumRetryException', 'NoConnectionAvailable', + 'InvalidRequestError'] + class Pool(object): - """Abstract base class for connection pools.""" + """An abstract base class for all other pools.""" def __init__(self, keyspace, server_list=['localhost:9160'], credentials=None, timeout=0.5, logging_name=None, use_threadlocal=True, listeners=[]): """ - Construct a Pool. + Construct an instance of the abstract base class :class:`Pool`. This + should not be called directly, only by subclass :meth:`__init__()` + methods. :param keyspace: The keyspace this connection pool will make all connections to. @@ -49,16 +62,16 @@ def __init__(self, keyspace, server_list=['localhost:9160'], :param logging_name: String identifier which will be used within the "name" field of logging records generated within the - "pycassa.pool" logger. Defaults to id(pool). + "pycassa.pool" logger. Defaults to ``id(pool)``. :param use_threadlocal: If set to True, repeated calls to - :meth:`get` within the same application thread will + :meth:`get()` within the same application thread will return the same ConnectionWrapper object, if one has already been retrieved from the pool and has not been returned yet. :param listeners: A list of - :class:`~PoolListener`-like objects or + :class:`PoolListener`-like objects or dictionaries of callables that receive events when Cassandra connections are created, checked out and checked in to the pool. @@ -160,22 +173,31 @@ def recreate(self): raise NotImplementedError() def dispose(self): - """Dispose of this pool. + """ + Dispose of this pool. This method leaves the possibility of checked-out connections - remaining open, It is advised to not reuse the pool once dispose() - is called, and to instead use a new pool constructed by the - recreate() method. - """ + remaining open, It is advised to not reuse the pool once + :meth:`dispose()` is called, and to instead use a new pool + constructed by :meth:`recreate()`. + """ raise NotImplementedError() def return_conn(self, record): - """Returns a ConnectionWrapper to the pool.""" + """ + Return a ConnectionWrapper to the pool. + + :param record: The :class:`ConnectionWrapper` to retrun to the pool. + + """ self._do_return_conn(record) def get(self): - """Gets a ConnectionWrapper from the pool.""" + """ + Get a :class:`ConnectionWrapper` from the pool. + + """ return self._do_get() def _do_get(self): @@ -188,11 +210,12 @@ def status(self): raise NotImplementedError() def add_listener(self, listener): - """Add a ``PoolListener``-like object to this pool. + """ + Add a :class:`PoolListener`-like object to this pool. - ``listener`` may be an object that implements some or all of - PoolListener, or a dictionary of callables containing implementations - of some or all of the named methods in PoolListener. + `listener` may be an object that implements some or all of + :class:`PoolListener`, or a dictionary of callables containing implementations + of some or all of the named methods in :class:`PoolListener`. """ @@ -322,12 +345,7 @@ def _get_dic(self): class ConnectionWrapper(connection.Connection): """ - Wraps a pycassa.connection.Connection object, adding pooling - related functionality while still allowing access to the - thrift API calls. - - These should not be created directly, only obtained through - Pool's get() method. + A wrapper class for :class:`Connection`s that adds pooling functionality. """ @@ -339,6 +357,15 @@ class ConnectionWrapper(connection.Connection): _DISPOSED = 2 def __init__(self, pool, *args, **kwargs): + """ + Creates a wrapper for a :class:`pycassa.connection.Connection` + object, adding pooling related functionality while still allowing + access to the thrift API calls. + + These should not be created directly, only obtained through + Pool's :meth:`~pycassa.pool.Pool.get()` method. + + """ self._pool = pool self._lock = threading.Lock() self.info = {} @@ -350,8 +377,13 @@ def __init__(self, pool, *args, **kwargs): self._pool._notify_on_connect(self) def return_to_pool(self): - """Returns the Connection to the pool. This has the same - effect as calling Pool.return_conn() on the wrapper.""" + """ + Returns this to the pool. + + This has the same effect as calling :meth:`Pool.return_conn()` + on the wrapper. + + """ self._pool.return_conn(self) def _checkin(self): @@ -399,20 +431,21 @@ def __getattr__(self, attr): raise NotImplementedError() class ImmutableConnectionWrapper(ConnectionWrapper): - """ - A ConnectionWrapper that does not support retries through replacing - one wrapper with another or by swapping out the lower-level - pycassa.connection.Connection. + """A connection wrapper that may not be altered.""" - This is currently only used by a StaticPool. Here, the connection - is immutable because multiple threads may be using the same connection - at the same time. + def __init__(self, pool, *args, **kwargs): + """ + Create a ConnectionWrapper that does not support retries through replacing + one wrapper with another or by swapping out the lower-level + :class:`pycassa.connection.Connection`. - These should not be created directly. + This is currently only used by :class:`StaticPool`. Here, the connection + is immutable because multiple threads may be using the same connection + at the same time. - """ + These should not be created directly. - def __init__(self, pool, *args, **kwargs): + """ super(ImmutableConnectionWrapper, self).__init__(pool, *args, **kwargs) def __getattr__(self, attr): @@ -429,24 +462,28 @@ def _client_call(*args, **kwargs): return getattr(self, attr) class ReplaceableConnectionWrapper(ConnectionWrapper): - """ - A ConnectionWrapper that supports retries by obtaining another wrapper - from the pool and swapping all contents with it. + """A connection wrapper that may be replaced by another wrapper.""" - Caution should be used when this not used with use_threadlocal=True. + def __init__(self, pool, max_retries, *args, **kwargs): + """ + Create a ConnectionWrapper that supports retries by obtaining another + wrapper from the pool and swapping all contents with it. - These should not be created directly. + Caution should be used when this is used with ``use_threadlocal=False``. - """ + These should not be created directly. - def __init__(self, pool, max_retries, *args, **kwargs): + """ super(ReplaceableConnectionWrapper, self).__init__(pool, *args, **kwargs) self._retry_count = 0 self._max_retries = max_retries def _replace(self, new_conn_wrapper): - """Get another wrapper from the pool and replace our own contents - with its contents.""" + """ + Get another wrapper from the pool and replace our own contents + with its contents. + + """ super(ConnectionWrapper, self)._replace(new_conn_wrapper) self._lock = new_conn_wrapper._lock self._info = new_conn_wrapper.info @@ -483,25 +520,30 @@ def _client_call(*args, **kwargs): return getattr(self, attr) class MutableConnectionWrapper(ConnectionWrapper): - """A ConnectionWrapper that supports retries by opening a new - connection to the next server in Pool's list. + """A connection wrapper that may be altered.""" - Caution should be used when this is not used with use_threadlocal=True. + def __init__(self, pool, max_retries, *args, **kwargs): + """ + Create a :class:`ConnectionWrapper` that supports retries by + opening a new connection to the next server in Pool's list. - These should not be created directly. + Caution should be used when this is used with ``use_threadlocal=False``. - """ + These should not be created directly. - def __init__(self, pool, max_retries, *args, **kwargs): + """ super(MutableConnectionWrapper, self).__init__(pool, *args, **kwargs) self._retry_count = 0 self._max_retries = max_retries def _replace_conn(self): - """Try getting servers from Pool's list and open connections to them + """ + Try getting servers from Pool's list and open connections to them until one succeeds or we have failed enough times; if we succeed, swap the contents of our pycassa.connection.Connection attributes with - that connection's.""" + that connection's. + + """ self.close() failure_count = 0 while failure_count < 2 * len(self._pool.server_list): @@ -539,22 +581,19 @@ def _client_call(*args, **kwargs): return getattr(self, attr) class QueuePool(Pool): - """ - A Pool that maintains a queue of open connections. - - This is typically what you want to use for connection pooling. - - Be careful when using a QueuePool with use_threadlocal=True, - especially with retries enabled. Synchronization may be required to - prevent the connection from changing while another thread is using it. - - """ + """A pool that maintains a queue of open connections.""" def __init__(self, pool_size=5, max_overflow=10, pool_timeout=30, recycle=10000, max_retries=5, prefill=True, *args, **kwargs): """ - Construct a QueuePool. + Construct a Pool that maintains a queue of open connections. + + This is typically what you want to use for connection pooling. + + Be careful when using a QueuePool with ``use_threadlocal=False``, + especially with retries enabled. Synchronization may be required to + prevent the connection from changing while another thread is using it. :param pool_size: The size of the pool to be maintained, defaults to 5. This is the largest number of connections that @@ -562,9 +601,9 @@ def __init__(self, pool_size=5, max_overflow=10, A good choice for this is usually a multiple of the number of servers passed to the Pool constructor. If a size less than this is chosen, - the last (len(server_list) - pool_size) servers may not be used until + the last ``(len(server_list) - pool_size)`` servers may not be used until either overflow occurs, a connection is recycled, or a connection - fails. Similarly, if a multiple of len(server_list) is not chosen, + fails. Similarly, if a multiple of ``len(server_list)`` is not chosen, those same servers would have a decreased load. :param max_overflow: The maximum overflow size of the @@ -775,16 +814,14 @@ def checkedout(self): return self._pool_size - self._q.qsize() + self._overflow class SingletonThreadPool(Pool): - """A Pool that maintains one connection per thread. - - Maintains one connection per each thread, never moving a connection to a - thread other than the one which it was created in. - - """ + """A Pool that maintains one connection per thread.""" def __init__(self, pool_size=5, max_retries=5, *args, **kwargs): """ - Creates a SingletonThreadPool. + Creates a Pool that maintains one connection per thread. + + Maintains one connection per each thread, never moving a connection to a + thread other than the one which it was created in. Options are the same as those of :class:`Pool`, as well as: @@ -817,8 +854,6 @@ def recreate(self): listeners=self.listeners) def dispose(self): - """Dispose of this pool.""" - for conn in self._all_conns: try: conn._dispose_wrapper() @@ -865,19 +900,19 @@ def _do_get(self): return c class NullPool(Pool): - """A Pool which does not pool connections. + """A Pool which does not pool connections.""" - Instead, it opens and closes the underlying Cassandra connection - per each get()/return(). NullPools do offer retry behavior. + def __init__(self, max_retries=5, *args, **kwargs): + """ + Creates a Pool which does not pool connections. - Instead of using this with threadlocal storage, you should use a - SingletonThreadPool. + Instead, it opens and closes the underlying Cassandra connection + per each :meth:`~Pool.get()` and :meth:`~Pool.return_conn()`. - """ + NullPools support retry behavior. - def __init__(self, max_retries=5, *args, **kwargs): - """ - Creates a NullPool. + Instead of using this with threadlocal storage, you should use a + :class:`SingletonThreadPool`. Options are the same as those of :class:`Pool`, as well as: @@ -924,14 +959,16 @@ def dispose(self): class StaticPool(Pool): - """ - A Pool of exactly one connection, used for all requests. + """A Pool of exactly one connection, used for all requests.""" - StaticPools do not currently automatic retries. + def __init__(self, *args, **kwargs): + """ + Creates a pool with exactly one connection that is used + for all requests. - """ + Automatic retries are not currently supported. - def __init__(self, *args, **kwargs): + """ Pool.__init__(self, *args, **kwargs) self._conn = self._create_connection() @@ -975,19 +1012,18 @@ def _do_get(self): class AssertionPool(Pool): """A Pool that allows at most one checked out connection at any given - time. - - This will raise an AssertionError if more than one connection is checked - out at a time. Useful for debugging code that is using more connections - than desired. - - AssertionPools do support automatic retries. - - """ + time.""" def __init__(self, max_retries=5, *args, **kwargs): """ - Creates an AssertionPool. + Creates a Pool that allows at most one checked out connection at any given + time. + + This will raise an :exc:`AssertionError` if more than one connection is checked + out at a time. Useful for debugging code that is using more connections + than desired. + + AssertionPools support automatic retries. Options are the same as those of :class:`Pool`, as well as: @@ -1055,34 +1091,32 @@ def _do_get(self): class PoolListener(object): - """Hooks into the lifecycle of connections in a ``Pool``. + """Hooks into the lifecycle of connections in a :class:`Pool`. Usage:: class MyListener(PoolListener): - def connect(self, dic): + def connection_created(self, dic): '''perform connect operations''' # etc. # create a new pool with a listener p = QueuePool(..., listeners=[MyListener()]) - # add a listener after the fact + # or add a listener after the fact p.add_listener(MyListener()) - All of the standard connection :class:`~pycassa.pool.Pool` types can - accept event listeners for key connection lifecycle events: - creation, pool check-out and check-in. There are no events fired - when a connection closes. + All of the standard connection :class:`Pool` types can + accept event listeners for key connection lifecycle events. Listeners receive a dictionary that contains event information and is indexed by a string describing that piece of info. For example, all event dictionaries include 'level', so dic['level'] will return the prescribed logging level. - There is no need to subclass ``PoolListener`` to handle events. + There is no need to subclass :class:`PoolListener` to handle events. Any class that implements one or more of these methods can be used - as a pool listener. The ``Pool`` will inspect the methods + as a pool listener. The :class:`Pool` will inspect the methods provided by a listener object and add the listener to one or more internal event queues based on its capabilities. In terms of efficiency and function call overhead, you're much better off only @@ -1094,7 +1128,7 @@ def connection_created(self, dic): """Called once for each new Cassandra connection. dic['connection'] - The ``ConnectionWrapper`` that persistently manages the connection + The :class:`ConnectionWrapper` that persistently manages the connection dic['message'] A reason for closing the connection, if any. @@ -1103,7 +1137,7 @@ def connection_created(self, dic): An error that occured while closing the connection, if any. dic['pool_type'] - The type of pool the connection was created in; e.g. QueuePool + The type of pool the connection was created in; e.g. :class:`QueuePool` dic['pool_id'] The logging name of the connection's pool (defaults to id(pool)) @@ -1118,10 +1152,10 @@ def connection_checked_out(self, dic): """Called when a connection is retrieved from the Pool. dic['connection'] - The ``ConnectionWrapper`` that persistently manages the connection + The :class:`ConnectionWrapper` that persistently manages the connection dic['pool_type'] - The type of pool the connection was created in; e.g. QueuePool + The type of pool the connection was created in; e.g. :class:`QueuePool` dic['pool_id'] The logging name of the connection's pool (defaults to id(pool)) @@ -1138,10 +1172,10 @@ def connection_checked_in(self, dic): Note that the connection may be None if the connection has been closed. dic['connection'] - The ``ConnectionWrapper`` that persistently manages the connection + The :class:`ConnectionWrapper` that persistently manages the connection dic['pool_type'] - The type of pool the connection was created in; e.g. QueuePool + The type of pool the connection was created in; e.g. :class:`QueuePool` dic['pool_id'] The logging name of the connection's pool (defaults to id(pool)) @@ -1156,7 +1190,7 @@ def connection_disposed(self, dic): """Called when a connection is closed. dic['connection'] - The ``ConnectionWrapper`` that persistently manages the connection + The :class:`ConnectionWrapper` that persistently manages the connection dic['message'] A reason for closing the connection, if any. @@ -1165,7 +1199,7 @@ def connection_disposed(self, dic): An error that occured while closing the connection, if any. dic['pool_type'] - The type of pool the connection was created in; e.g. QueuePool + The type of pool the connection was created in; e.g. :class:`QueuePool` dic['pool_id'] The logging name of the connection's pool (defaults to id(pool)) @@ -1180,13 +1214,13 @@ def connection_recycled(self, dic): """Called when a connection is recycled. dic['old_conn'] - The ``ConnectionWrapper`` that is being recycled + The :class:`ConnectionWrapper` that is being recycled dic['new_conn'] - The ``ConnectionWrapper`` that is replacing it + The :class:`ConnectionWrapper` that is replacing it dic['pool_type'] - The type of pool the connection was created in; e.g. QueuePool + The type of pool the connection was created in; e.g. :class:`QueuePool` dic['pool_id'] The logging name of the connection's pool (defaults to id(pool)) @@ -1204,7 +1238,7 @@ def connection_failed(self, dic): The connection error (Exception). dic['pool_type'] - The type of pool the connection was created in; e.g. QueuePool + The type of pool the connection was created in; e.g. :class:`QueuePool` dic['pool_id'] The logging name of the connection's pool (defaults to id(pool)) @@ -1221,7 +1255,7 @@ def server_list_obtained(self, dic): The randomly permuted list of servers. dic['pool_type'] - The type of pool the connection was created in; e.g. QueuePool + The type of pool the connection was created in; e.g. :class:`QueuePool` dic['pool_id'] The logging name of the connection's pool (defaults to id(pool)) @@ -1236,7 +1270,7 @@ def pool_recreated(self, dic): """Called when a pool is recreated. dic['pool_type'] - The type of pool the connection was created in; e.g. QueuePool + The type of pool the connection was created in; e.g. :class:`QueuePool` dic['pool_id'] The logging name of the connection's pool (defaults to id(pool)) @@ -1251,7 +1285,7 @@ def pool_disposed(self, dic): """Called when a pool is recreated. dic['pool_type'] - The type of pool the connection was created in; e.g. QueuePool + The type of pool the connection was created in; e.g. :class:`QueuePool` dic['pool_id'] The logging name of the connection's pool (defaults to id(pool)) @@ -1268,7 +1302,7 @@ def pool_at_max(self, dic): pool, but the pool is already at its max size. dic['pool_type'] - The type of pool the connection was created in; e.g. QueuePool + The type of pool the connection was created in; e.g. :class:`QueuePool` dic['pool_id'] The logging name of the connection's pool (defaults to id(pool)) @@ -1287,14 +1321,16 @@ class NoConnectionAvailable(Exception): """Raised when there are no connections left in a pool.""" class MaximumRetryException(Exception): - """Raised when a connection wrapper has retried the maximum + """ + Raised when a :class:`ConnectionWrapper` has retried the maximum allowed times before being returned to the pool; note that all of the retries do not have to be on the same operation. """ class InvalidRequestError(Exception): - """Pycassa was asked to do something it can't do. + """ + Pycassa was asked to do something it can't do. This error generally corresponds to runtime state errors. diff --git a/pycassa/types.py b/pycassa/types.py index ecc1aa78..d495125d 100644 --- a/pycassa/types.py +++ b/pycassa/types.py @@ -6,10 +6,12 @@ 'Int64', 'IntString', 'String'] class Column(object): + """Base class for typed columns.""" def __init__(self, default=None): self.default = default class DateTime(Column): + """Column for :class:`datetime` objects stored as long timestamps.""" def __init__(self, *args, **kwargs): Column.__init__(self, *args, **kwargs) self.struct = struct.Struct('q') @@ -23,6 +25,10 @@ def unpack(self, val): return datetime.fromtimestamp(self.struct.unpack(val)[0]) class DateTimeString(Column): + """ + Column for :class:`datetime` objects stored as ``%Y-%m-%d %H:%M:%S`` + + """ format = '%Y-%m-%d %H:%M:%S' def pack(self, val): if not isinstance(val, datetime): @@ -33,6 +39,7 @@ def unpack(self, val): return datetime.strptime(val, self.format) class Float64(Column): + """Column for 64bit floats.""" def __init__(self, *args, **kwargs): Column.__init__(self, *args, **kwargs) self.struct = struct.Struct('d') @@ -46,6 +53,7 @@ def unpack(self, val): return self.struct.unpack(val)[0] class FloatString(Column): + """Column for floats stored as strings.""" def pack(self, val): if not isinstance(val, float): raise TypeError('expected float, %s found' % type(val).__name__) @@ -55,6 +63,7 @@ def unpack(self, val): return float(val) class Int64(Column): + """Column for 64bit ints.""" def __init__(self, *args, **kwargs): Column.__init__(self, *args, **kwargs) self.struct = struct.Struct('q') @@ -68,6 +77,7 @@ def unpack(self, val): return self.struct.unpack(val)[0] class IntString(Column): + """Column for ints stored as strings.""" def pack(self, val): if not isinstance(val, (int, long)): raise TypeError('expected int or long, %s found' % type(val).__name__) @@ -77,6 +87,7 @@ def unpack(self, val): return int(val) class String(Column): + """Column for :class:`str` or :class:`unicode` objects.""" def pack(self, val): if not isinstance(val, basestring): raise TypeError('expected str or unicode, %s found' % type(val).__name__) diff --git a/setup.py b/setup.py index 853cee2a..b33499bd 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,24 @@ # -*- coding: utf-8 -*- # +import sys +import os + +try: + import subprocess + has_subprocess = True +except: + has_subprocess = False + +from ez_setup import use_setuptools +use_setuptools() +from setuptools import setup +from distutils.cmd import Command + __version_info__ = (0, 5, 0) __version__ = '.'.join([str(v) for v in __version_info__]) -"""pycassa is a Cassandra library with the following features: +long_description = """pycassa is a Cassandra library with the following features: 1. Auto-failover single or thread-local connections 2. A simplified version of the thrift interface @@ -13,8 +27,50 @@ 4. Support for SuperColumns """ -from distutils.core import setup -import sys +class doc(Command): + + description = "generate or test documentation" + + user_options = [("test", "t", + "run doctests instead of generating documentation")] + + boolean_options = ["test"] + + def initialize_options(self): + self.test = False + + def finalize_options(self): + pass + + def run(self): + if self.test: + path = "doc/_build/doctest" + mode = "doctest" + else: + path = "doc/_build/%s" % __version__ + mode = "html" + + try: + os.makedirs(path) + except: + pass + + if has_subprocess: + status = subprocess.call(["sphinx-build", "-b", mode, "doc", path]) + + if status: + raise RuntimeError("documentation step '%s' failed" % mode) + + print "" + print "Documentation step '%s' performed, results here:" % mode + print " %s/" % path + else: + print """ +`setup.py doc` is not supported for this version of Python. + +Please ask in the user forums for help. +""" + setup( name = 'pycassa', @@ -22,7 +78,7 @@ author = 'Jonathan Hseu', author_email = 'pycassa.maintainer@gmail.com', description = 'Simple python library for Cassandra', - long_description = __doc__, + long_description = long_description, url = 'http://github.com/pycassa/pycassa', download_url = 'http://github.com/pycassa/pycassa', license = 'MIT', @@ -31,4 +87,5 @@ platforms = 'any', install_requires = ['thrift'], scripts=['pycassaShell'], + cmdclass={"doc": doc} )