Skip to content

Buglet in groupby when passing function like len with as_index False #421

@wesm

Description

@wesm
import numpy as np
import pandas

def webuse(data, baseurl='http://www.stata-press.com/data/r11/'):
    """
    Parameters
    ----------
    data : str
        Name of dataset to fetch.

    Examples
    --------
    >>> dta = webuse('auto')

    Notes
    -----
    Make sure baseurl has trailing forward slash. Doesn't do any 
    error checking in response URLs.
    """
    # lazy imports
    from scikits.statsmodels.iolib import genfromdta
    from urllib2 import urlopen
    from urlparse import urljoin
    from StringIO import StringIO

    url = urljoin(baseurl, data+'.dta')
    dta = urlopen(url)
    dta = StringIO(dta.read()) # make it truly file-like
    return genfromdta(dta)

dta = webuse('auto')
df = pandas.DataFrame.from_records(dta)

then

df.groupby(['foreign', 'rep78'], as_index=False)['mpg'].agg(len)

raises an exception

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions