Skip to content

Commit

Permalink
ENH: DataFrame.set_index function, and bugfix when setting DataFrame …
Browse files Browse the repository at this point in the history
…index, GH #266
  • Loading branch information
wesm committed Oct 24, 2011
1 parent e5193c0 commit 837b7dc
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 8 deletions.
5 changes: 4 additions & 1 deletion RELEASE.rst
Expand Up @@ -131,6 +131,8 @@ feedback on the library.
- Implemented `Panel.take`
- Add `set_eng_float_format` function for setting alternate DataFrame
floating point string formatting
- Add convenience `set_index` function for creating a DataFrame index from
its existing columns

**Improvements to existing features**

Expand Down Expand Up @@ -194,9 +196,10 @@ feedback on the library.
- Can pass level name to `DataFrame.stack`
- Support set operations between MultiIndex and Index
- Fix many corner cases in MultiIndex set operations
- Fix MultiIndex-handling bug with GroupBy.apply when returned groups are not
- Fix MultiIndex-handling bug with GroupBy.apply when returned groups are not
indexed the same
- Fix corner case bugs in DataFrame.apply
- Setting DataFrame index did not cause Series cache to get cleared

Thanks
------
Expand Down
53 changes: 51 additions & 2 deletions pandas/core/frame.py
Expand Up @@ -643,8 +643,13 @@ def _set_columns(self, value):
self._series_cache.clear()
columns = property(fset=_set_columns, fget=_get_columns)

# reference underlying BlockManager
index = AxisProperty(1)
def _get_index(self):
return self._data.axes[1]

def _set_index(self, value):
self._data.set_axis(1, value)
self._series_cache.clear()
index = property(fset=_set_index, fget=_get_index)

def as_matrix(self, columns=None):
"""
Expand Down Expand Up @@ -1099,6 +1104,50 @@ def reindex_like(self, other, method=None, copy=True):
return self.reindex(index=other.index, columns=other.columns,
method=method, copy=copy)

def set_index(self, col_or_cols, drop=True, inplace=False):
"""
Set the DataFrame index (row labels) using one or more existing
columns. By default yields a new object.
Parameters
----------
col_or_cols : column label or list of column labels
drop : boolean, default True
Delete columns to be used as the new index
inplace : boolean, default False
Modify the DataFrame in place (do not create a new object)
Returns
-------
dataframe : DataFrame
"""
cols = col_or_cols
if not isinstance(col_or_cols, (list, tuple)):
cols = [col_or_cols]

if inplace:
frame = self

else:
frame = self.copy()

arrays = []
for col in cols:
level = frame[col]
if drop:
del frame[col]
arrays.append(level)

index = MultiIndex.from_arrays(arrays, names=cols)

if not index._verify_integrity():
duplicates = index._get_duplicates()
raise Exception('Index has duplicate keys: %s' % duplicates)

frame.index = index

return frame

def take(self, indices, axis=0):
"""
Analogous to ndarray.take, return DataFrame corresponding to requested
Expand Down
62 changes: 57 additions & 5 deletions pandas/tests/test_frame.py
Expand Up @@ -2101,11 +2101,6 @@ def test_pivot_duplicates(self):
data = DataFrame({'a' : ['bar', 'bar', 'foo', 'foo', 'foo'],
'b' : ['one', 'two', 'one', 'one', 'two'],
'c' : [1., 2., 3., 3., 4.]})
# expected = DataFrame([[1., 2.], [3., 4.]], index=['bar', 'foo'],
# columns=['one', 'two'])
# result = data.pivot('a', 'b', 'c')
# assert_frame_equal(result, expected)

self.assertRaises(Exception, data.pivot, 'a', 'b', 'c')

def test_reindex(self):
Expand Down Expand Up @@ -2192,6 +2187,63 @@ def test_reindex_columns(self):
newFrame = self.frame.reindex(columns=[])
self.assert_(not newFrame)

def test_add_index(self):
df = DataFrame({'A' : ['foo', 'foo', 'foo', 'bar', 'bar'],
'B' : ['one', 'two', 'three', 'one', 'two'],
'C' : ['a', 'b', 'c', 'd', 'e'],
'D' : np.random.randn(5),
'E' : np.random.randn(5)})

# new object, single-column
result = df.set_index('C')
result_nodrop = df.set_index('C', drop=False)

index = Index(df['C'], name='C')

expected = df.ix[:, ['A', 'B', 'D', 'E']]
expected.index = index

expected_nodrop = df.copy()
expected_nodrop.index = index

assert_frame_equal(result, expected)
assert_frame_equal(result_nodrop, expected_nodrop)
self.assertEqual(result.index.name, index.name)

# inplace, single
df2 = df.copy()
df2.set_index('C', inplace=True)
assert_frame_equal(df2, expected)

df3 = df.copy()
df3.set_index('C', drop=False, inplace=True)
assert_frame_equal(df3, expected_nodrop)

# create new object, multi-column
result = df.set_index(['A', 'B'])
result_nodrop = df.set_index(['A', 'B'], drop=False)

index = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B'])

expected = df.ix[:, ['C', 'D', 'E']]
expected.index = index

expected_nodrop = df.copy()
expected_nodrop.index = index

assert_frame_equal(result, expected)
assert_frame_equal(result_nodrop, expected_nodrop)
self.assertEqual(result.index.names, index.names)

# inplace
df2 = df.copy()
df2.set_index(['A', 'B'], inplace=True)
assert_frame_equal(df2, expected)

df3 = df.copy()
df3.set_index(['A', 'B'], drop=False, inplace=True)
assert_frame_equal(df3, expected_nodrop)

def test_align(self):

af, bf = self.frame.align(self.frame)
Expand Down

0 comments on commit 837b7dc

Please sign in to comment.