Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/pandas-dev/pandas into in…
Browse files Browse the repository at this point in the history
…tarrays4
  • Loading branch information
jbrockmendel committed Mar 2, 2018
2 parents 39d8ee7 + a7a7f8c commit 8780466
Show file tree
Hide file tree
Showing 51 changed files with 1,404 additions and 215 deletions.
16 changes: 14 additions & 2 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
from .pandas_vb_common import setup # noqa


method_blacklist = {
'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean',
'max', 'skew', 'cumprod', 'cummax', 'rank', 'pct_change', 'min',
'var', 'mad', 'describe', 'std'}
}


class ApplyDictReturn(object):
goal_time = 0.2

Expand Down Expand Up @@ -153,6 +160,7 @@ def time_frame_nth_any(self, df):
def time_frame_nth(self, df):
df.groupby(0).nth(0)


def time_series_nth_any(self, df):
df[1].groupby(df[0]).nth(0, dropna='any')

Expand Down Expand Up @@ -369,23 +377,27 @@ class GroupByMethods(object):
goal_time = 0.2

param_names = ['dtype', 'method']
params = [['int', 'float'],
params = [['int', 'float', 'object'],
['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin',
'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head',
'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique',
'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew',
'std', 'sum', 'tail', 'unique', 'value_counts', 'var']]

def setup(self, dtype, method):
if method in method_blacklist.get(dtype, {}):
raise NotImplementedError # skip benchmark
ngroups = 1000
size = ngroups * 2
rng = np.arange(ngroups)
values = rng.take(np.random.randint(0, ngroups, size=size))
if dtype == 'int':
key = np.random.randint(0, size, size=size)
else:
elif dtype == 'float':
key = np.concatenate([np.random.random(ngroups) * 0.1,
np.random.random(ngroups) * 10.0])
elif dtype == 'object':
key = ['foo'] * size

df = DataFrame({'values': values, 'key': key})
self.df_groupby_method = getattr(df.groupby('key')['values'], method)
Expand Down
166 changes: 135 additions & 31 deletions doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,21 @@
import sys
import os
import shutil
import subprocess
# import subprocess
import argparse
from contextlib import contextmanager
import webbrowser
import jinja2

import pandas


DOC_PATH = os.path.dirname(os.path.abspath(__file__))
SOURCE_PATH = os.path.join(DOC_PATH, 'source')
BUILD_PATH = os.path.join(DOC_PATH, 'build')
BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates']


def _generate_index(include_api, single_doc=None):
"""Create index.rst file with the specified sections.
Parameters
----------
include_api : bool
Whether API documentation will be built.
single_doc : str or None
If provided, this single documentation page will be generated.
"""
if single_doc is not None:
single_doc = os.path.splitext(os.path.basename(single_doc))[0]
include_api = False

with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
t = jinja2.Template(f.read())

with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
f.write(t.render(include_api=include_api,
single_doc=single_doc))


@contextmanager
def _maybe_exclude_notebooks():
"""Skip building the notebooks if pandoc is not installed.
Expand All @@ -58,6 +39,7 @@ def _maybe_exclude_notebooks():
1. nbconvert isn't installed, or
2. nbconvert is installed, but pandoc isn't
"""
# TODO move to exclude_pattern
base = os.path.dirname(__file__)
notebooks = [os.path.join(base, 'source', nb)
for nb in ['style.ipynb']]
Expand Down Expand Up @@ -96,8 +78,110 @@ class DocBuilder:
All public methods of this class can be called as parameters of the
script.
"""
def __init__(self, num_jobs=1):
def __init__(self, num_jobs=1, include_api=True, single_doc=None):
self.num_jobs = num_jobs
self.include_api = include_api
self.single_doc = None
self.single_doc_type = None
if single_doc is not None:
self._process_single_doc(single_doc)
self.exclude_patterns = self._exclude_patterns

self._generate_index()
if self.single_doc_type == 'docstring':
self._run_os('sphinx-autogen', '-o',
'source/generated_single', 'source/index.rst')

@property
def _exclude_patterns(self):
"""Docs source files that will be excluded from building."""
# TODO move maybe_exclude_notebooks here
if self.single_doc is not None:
rst_files = [f for f in os.listdir(SOURCE_PATH)
if ((f.endswith('.rst') or f.endswith('.ipynb'))
and (f != 'index.rst')
and (f != '{0}.rst'.format(self.single_doc)))]
if self.single_doc_type != 'api':
rst_files += ['generated/*.rst']
elif not self.include_api:
rst_files = ['api.rst', 'generated/*.rst']
else:
rst_files = ['generated_single/*.rst']

exclude_patterns = ','.join(
'{!r}'.format(i) for i in ['**.ipynb_checkpoints'] + rst_files)

return exclude_patterns

def _process_single_doc(self, single_doc):
"""Extract self.single_doc (base name) and self.single_doc_type from
passed single_doc kwarg.
"""
self.include_api = False

if single_doc == 'api.rst':
self.single_doc_type = 'api'
self.single_doc = 'api'
elif os.path.exists(os.path.join(SOURCE_PATH, single_doc)):
self.single_doc_type = 'rst'
self.single_doc = os.path.splitext(os.path.basename(single_doc))[0]
elif os.path.exists(
os.path.join(SOURCE_PATH, '{}.rst'.format(single_doc))):
self.single_doc_type = 'rst'
self.single_doc = single_doc
elif single_doc is not None:
try:
obj = pandas
for name in single_doc.split('.'):
obj = getattr(obj, name)
except AttributeError:
raise ValueError('Single document not understood, it should '
'be a file in doc/source/*.rst (e.g. '
'"contributing.rst" or a pandas function or '
'method (e.g. "pandas.DataFrame.head")')
else:
self.single_doc_type = 'docstring'
if single_doc.startswith('pandas.'):
self.single_doc = single_doc[len('pandas.'):]
else:
self.single_doc = single_doc

def _copy_generated_docstring(self):
"""Copy existing generated (from api.rst) docstring page because
this is more correct in certain cases (where a custom autodoc
template is used).
"""
fname = os.path.join(SOURCE_PATH, 'generated',
'pandas.{}.rst'.format(self.single_doc))
temp_dir = os.path.join(SOURCE_PATH, 'generated_single')

try:
os.makedirs(temp_dir)
except OSError:
pass

if os.path.exists(fname):
try:
# copying to make sure sphinx always thinks it is new
# and needs to be re-generated (to pick source code changes)
shutil.copy(fname, temp_dir)
except: # noqa
pass

def _generate_index(self):
"""Create index.rst file with the specified sections."""
if self.single_doc_type == 'docstring':
self._copy_generated_docstring()

with open(os.path.join(SOURCE_PATH, 'index.rst.template')) as f:
t = jinja2.Template(f.read())

with open(os.path.join(SOURCE_PATH, 'index.rst'), 'w') as f:
f.write(t.render(include_api=self.include_api,
single_doc=self.single_doc,
single_doc_type=self.single_doc_type))

@staticmethod
def _create_build_structure():
Expand All @@ -121,7 +205,10 @@ def _run_os(*args):
--------
>>> DocBuilder()._run_os('python', '--version')
"""
subprocess.check_call(args, stderr=subprocess.STDOUT)
# TODO check_call should be more safe, but it fails with
# exclude patterns, needs investigation
# subprocess.check_call(args, stderr=subprocess.STDOUT)
os.system(' '.join(args))

def _sphinx_build(self, kind):
"""Call sphinx to build documentation.
Expand All @@ -142,11 +229,21 @@ def _sphinx_build(self, kind):
self._run_os('sphinx-build',
'-j{}'.format(self.num_jobs),
'-b{}'.format(kind),
'-d{}'.format(os.path.join(BUILD_PATH,
'doctrees')),
'-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')),
'-Dexclude_patterns={}'.format(self.exclude_patterns),
SOURCE_PATH,
os.path.join(BUILD_PATH, kind))

def _open_browser(self):
base_url = os.path.join('file://', DOC_PATH, 'build', 'html')
if self.single_doc_type == 'docstring':
url = os.path.join(
base_url,
'generated_single', 'pandas.{}.html'.format(self.single_doc))
else:
url = os.path.join(base_url, '{}.html'.format(self.single_doc))
webbrowser.open(url, new=2)

def html(self):
"""Build HTML documentation."""
self._create_build_structure()
Expand All @@ -156,6 +253,11 @@ def html(self):
if os.path.exists(zip_fname):
os.remove(zip_fname)

if self.single_doc is not None:
self._open_browser()
shutil.rmtree(os.path.join(SOURCE_PATH, 'generated_single'),
ignore_errors=True)

def latex(self, force=False):
"""Build PDF documentation."""
self._create_build_structure()
Expand Down Expand Up @@ -222,8 +324,8 @@ def main():
metavar='FILENAME',
type=str,
default=None,
help=('filename of section to compile, '
'e.g. "indexing"'))
help=('filename of section or method name to '
'compile, e.g. "indexing", "DataFrame.join"'))
argparser.add_argument('--python-path',
type=str,
default=os.path.join(DOC_PATH, '..'),
Expand All @@ -235,8 +337,10 @@ def main():
args.command, ', '.join(cmds)))

os.environ['PYTHONPATH'] = args.python_path
_generate_index(not args.no_api, args.single)
getattr(DocBuilder(args.num_jobs), args.command)()

getattr(DocBuilder(args.num_jobs,
not args.no_api,
args.single), args.command)()


if __name__ == '__main__':
Expand Down
5 changes: 5 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2179,8 +2179,12 @@ Computations / Descriptive Stats
.. autosummary::
:toctree: generated/

GroupBy.all
GroupBy.any
GroupBy.bfill
GroupBy.count
GroupBy.cumcount
GroupBy.ffill
GroupBy.first
GroupBy.head
GroupBy.last
Expand All @@ -2192,6 +2196,7 @@ Computations / Descriptive Stats
GroupBy.nth
GroupBy.ohlc
GroupBy.prod
GroupBy.rank
GroupBy.size
GroupBy.sem
GroupBy.std
Expand Down
Loading

0 comments on commit 8780466

Please sign in to comment.