Skip to content

Commit

Permalink
docstring updates
Browse files Browse the repository at this point in the history
  • Loading branch information
svenkreiss committed Jul 19, 2016
1 parent a958bc7 commit ae190c1
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 142 deletions.
6 changes: 3 additions & 3 deletions docs/sphinx/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ The functionality provided by this module is used in ``Context.textFile()``
for reading and in ``RDD.saveAsTextFile()`` for writing. You can use this
submodule for writing files directly with ``File(filename).dump(some_data)``,
``File(filename).load()`` and ``File.exists(path)`` to read, write and check
for existance of a file. All methods transparently handle ``http://``, ``s3://``
and ``file://`` locations and compression/decompression of ``.gz`` and
``.bz2`` files.
for existance of a file. All methods transparently handle various schemas
(for example ``http://``, ``s3://`` and ``file://``) and
compression/decompression of ``.gz`` and ``.bz2`` files (among others).

Use environment variables ``AWS_SECRET_ACCESS_KEY`` and ``AWS_ACCESS_KEY_ID``
for auth and use file paths of the form ``s3://bucket_name/filename.txt``.
Expand Down
94 changes: 11 additions & 83 deletions docs/sphinx/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@
'sphinx.ext.viewcode',
]

primary_domain = 'py'
default_role = 'py:obj'

autodoc_member_order = 'bysource'
autoclass_content = 'both'

# workaround for 'viewcode' extension on readthedocs
autodoc_docstring_signature = False

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

Expand Down Expand Up @@ -160,7 +169,7 @@
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
#html_static_path = ['_static']

# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
Expand Down Expand Up @@ -192,7 +201,7 @@
#html_split_index = False

# If true, links to the reST sources are added to the pages.
html_show_sourcelink = False
#html_show_sourcelink = False

# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
html_show_sphinx = False
Expand Down Expand Up @@ -225,87 +234,6 @@
# Output file base name for HTML help builder.
htmlhelp_basename = 'pysparklingdoc'

# -- Options for LaTeX output ---------------------------------------------

latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
#'papersize': 'letterpaper',

# The font size ('10pt', '11pt' or '12pt').
#'pointsize': '10pt',

# Additional stuff for the LaTeX preamble.
#'preamble': '',

# Latex figure (float) alignment
#'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'pysparkling.tex', u'pysparkling Documentation',
u'Sven Kreiss', 'manual'),
]

# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None

# For "manual" documents, if this is true, then toplevel headings are parts,
# not chapters.
#latex_use_parts = False

# If true, show page references after internal links.
#latex_show_pagerefs = False

# If true, show URL addresses after external links.
#latex_show_urls = False

# Documents to append as an appendix to all manuals.
#latex_appendices = []

# If false, no module index is generated.
#latex_domain_indices = True


# -- Options for manual page output ---------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'pysparkling', u'pysparkling Documentation',
[author], 1)
]

# If true, show URL addresses after external links.
#man_show_urls = False


# -- Options for Texinfo output -------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'pysparkling', u'pysparkling Documentation',
author, 'pysparkling', 'One line description of project.',
'Miscellaneous'),
]

# Documents to append as an appendix to all manuals.
#texinfo_appendices = []

# If false, no module index is generated.
#texinfo_domain_indices = True

# How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote'

# If true, do not generate a @detailmenu in the "Top" node's menu.
#texinfo_no_detailmenu = False


# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}
39 changes: 12 additions & 27 deletions pysparkling/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def newRddId(self):
return Context.__last_rdd_id

def parallelize(self, x, numPartitions=None):
"""parallelize x
"""Parallelize x.
:param x:
An iterable (e.g. a list) that represents the data.
Expand All @@ -129,9 +129,7 @@ def parallelize(self, x, numPartitions=None):
A partition is a unit of data that is processed at a time.
Can be ``None``.
:returns:
New RDD.
:rtype: RDD
"""
if not numPartitions:
return RDD([Partition(x, 0)], self)
Expand All @@ -150,14 +148,10 @@ def partitioned():
return self._parallelize_partitions(partitioned())

def _parallelize_partitions(self, partitions):
"""helper to parallelize partitions
:param partitions:
An iterable over the partitioned data.
:returns:
New RDD.
"""Helper to parallelize partitions.
:param partitions: An iterable over the partitioned data.
:rtype: RDD
"""

return RDD(
Expand All @@ -166,7 +160,7 @@ def _parallelize_partitions(self, partitions):
)

def pickleFile(self, name, minPartitions=None):
"""read a pickle file
"""Read a pickle file.
Reads files created with :func:`RDD.saveAsPickleFile()` into an RDD.
Expand All @@ -179,8 +173,7 @@ def pickleFile(self, name, minPartitions=None):
By default, every file is a partition, but this option allows to
split these further.
:returns:
New RDD.
:rtype: RDD
Example with a serialized list:
Expand Down Expand Up @@ -323,9 +316,7 @@ def textFile(self, filename, minPartitions=None, use_unicode=True):
:param use_unicode: (optional, default=True)
Use ``utf8`` if ``True`` and ``ascii`` if ``False``.
:returns:
New RDD.
:rtype: RDD
"""
resolved_names = TextFile.resolve_filenames(filename)
log.debug('textFile() resolved "{0}" to {1} files.'
Expand All @@ -346,14 +337,10 @@ def textFile(self, filename, minPartitions=None, use_unicode=True):
return rdd

def union(self, rdds):
"""union
:param rdds:
Iterable of RDDs.
:returns:
New RDD.
"""Create a union of rdds.
:param rdds: Iterable of RDDs.
:rtype: RDD
"""
return self.parallelize(
(x for rdd in rdds for x in rdd.collect())
Expand All @@ -374,9 +361,7 @@ def wholeTextFiles(self, path, minPartitions=None, use_unicode=True):
:param use_unicode: (optional, default=True)
Use ``utf8`` if ``True`` and ``ascii`` if ``False``.
:returns:
New RDD.
:rtype: RDD
"""
resolved_names = TextFile.resolve_filenames(path)
log.debug('wholeTextFiles() resolved "{0}" to {1} files.'
Expand Down
27 changes: 8 additions & 19 deletions pysparkling/fileio/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
class File(object):
"""file object
:param file_name:
Any file name. Supports the schemes ``http://``, ``s3://`` and
``file://``.
:param file_name: Any file name.
"""

def __init__(self, file_name):
Expand All @@ -34,9 +31,8 @@ def resolve_filenames(all_expr):
(i.e. ``my_data`` gets resolved to
``[my_data/part-00000, my_data/part-00001]``).
:returns:
A list of file names.
:returns: A list of file names.
:rtype: list
"""
files = []
for expr in all_expr.split(','):
Expand All @@ -48,18 +44,14 @@ def resolve_filenames(all_expr):
def exists(self):
"""Checks both for a file or directory at this location.
:returns:
True or false.
:returns: True or false.
"""
return self.fs.exists()

def load(self):
"""Load the data from a file.
:returns:
A ``io.BytesIO`` instance. Use ``getvalue()`` to get a string.
:rtype: io.BytesIO
"""
stream = self.fs.load()
stream = self.codec.decompress(stream)
Expand All @@ -72,9 +64,7 @@ def dump(self, stream=None):
A BytesIO instance. ``bytes`` are also possible and are converted
to BytesIO.
:returns:
self
:rtype: File
"""
if stream is None:
stream = BytesIO()
Expand All @@ -90,9 +80,8 @@ def dump(self, stream=None):
def make_public(self, recursive=False):
"""Makes the file public. Currently only supported on S3.
:param recursive:
Whether to apply this recursively.
:param recursive: Whether to apply this recursively.
:rtype: File
"""
self.fs.make_public(recursive)
return self
13 changes: 3 additions & 10 deletions pysparkling/fileio/textfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@
class TextFile(File):
"""Derived from :class:`pysparkling.fileio.File`.
:param file_name:
Any text file name. Supports the schemes ``http://``, ``s3://`` and
``file://``.
:param file_name: Any text file name.
"""

def __init__(self, file_name):
Expand All @@ -33,9 +30,7 @@ def load(self, encoding='utf8', encoding_errors='ignore'):
:param encoding: (optional)
The character encoding of the file.
:returns:
An ``io.StringIO`` instance. Use ``read()`` to get a string.
:rtype: io.StringIO
"""
if type(self.codec) == codec.Codec and \
hasattr(self.fs, 'load_text'):
Expand All @@ -56,9 +51,7 @@ def dump(self, stream=None, encoding='utf8', encoding_errors='ignore'):
:param encoding: (optional)
The character encoding of the file.
:returns:
self
:rtype: TextFile
"""
if stream is None:
stream = StringIO()
Expand Down

0 comments on commit ae190c1

Please sign in to comment.