In [15]:
%pylab inline
from pandas import Series,DataFrame
import pandas as pd

Populating the interactive namespace from numpy and matplotlib


## Index
* [sort vs. sort_index: no difference](#sort-and-sort_index:-no-difference)
* [sort by row or column index](#sort-by-row-or-column-index)
    * [example 1](#example-1)
    * [example 2](#example-2)
* [sort by values](#sort-by-values)

In [16]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 
        'year': [2000, 2001, 2002, 2001, 2002], 
        'population': [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data, columns=['year', 'state', 'population', 'debt'], index=['one', 'two', 'three', 'four', 'five'])
frame

Unnamed: 0,year,state,population,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,


In [17]:
# sort by columns names
frame.sort_index(axis=1)

Unnamed: 0,debt,population,state,year
one,,1.5,Ohio,2000
two,,1.7,Ohio,2001
three,,3.6,Ohio,2002
four,,2.4,Nevada,2001
five,,2.9,Nevada,2002


### sort and sort_index: no difference
[see here](#http://stackoverflow.com/questions/19332171/difference-between-sort-and-sort-index):
* They are the same. Sort is an older version that accepts a slightly different calling convention. Use sort_index
* The difference is entirely in the way it is called. The source code for sort is literally a one-line call to sort_index.

### sort by row or column index
To sort lexicographically **by row or column index, use the sort_index method**, which returns a new, sorted object:

#### example 1

In [18]:
df = pd.DataFrame(np.arange(8).reshape((2, 4)), index=['three', 'one'],columns=['d', 'a', 'b', 'c'])
df

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [19]:
df.sort_index()# sort by row index

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [20]:
df.sort_index(axis=1)# sort by column index

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [21]:
df.sort_index(axis=1,ascending=False)# sort by column index in descending order

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


#### example 2

In [22]:
frame.sort_index()

Unnamed: 0,year,state,population,debt
five,2002,Nevada,2.9,
four,2001,Nevada,2.4,
one,2000,Ohio,1.5,
three,2002,Ohio,3.6,
two,2001,Ohio,1.7,


In [30]:
frame.sort_index(axis=1,inplace=True)
frame

Unnamed: 0,debt,population,state,year
one,,1.5,Ohio,2000
two,,1.7,Ohio,2001
three,,3.6,Ohio,2002
four,,2.4,Nevada,2001
five,,2.9,Nevada,2002


### sort by values

In [24]:
df = pd.DataFrame({'b': [4, 7, -3, 2], 'a': [0, 1, 0, 1]},index=[ "r%d" % i for i in xrange(1,5)])
df

Unnamed: 0,a,b
r1,0,4
r2,1,7
r3,0,-3
r4,1,2


In [25]:
df.sort_values(by="b") # by a single column

Unnamed: 0,a,b
r3,0,-3
r4,1,2
r1,0,4
r2,1,7


In [26]:
df.sort_values(by=["a","b"],inplace=True,ascending=False) # by multiple column and sort in place
df

Unnamed: 0,a,b
r2,1,7
r4,1,2
r1,0,4
r3,0,-3


In [27]:
s = pd.Series( np.random.rand(4));s

0    0.470994
1    0.684108
2    0.921847
3    0.851270
dtype: float64

In [28]:
# by default, inplace=False, so return a sorted copy, but the original is unchanged
# set inplace=True to sort in place
sorted_copy = s.sort_values()
print "------------ sorted copy"
print sorted_copy
print "------------ original is unchanged"
print s

------------ sorted copy
0    0.470994
1    0.684108
3    0.851270
2    0.921847
dtype: float64
------------ original is unchanged
0    0.470994
1    0.684108
2    0.921847
3    0.851270
dtype: float64
