## Task
Explore sorting in pandas

## Notebook Summary
* Series
 * `sort_values`
 * `sort_index`
 * `sortlevel`
 * `argsort`
 * `searchsorted`
 * `rank`

## References
* *Python for Data Analysis*, Wes McKinney, O'Reilly, 2012
* *Numerical Python*, Robert Johansson, APress, 2015
* *Python Data Science Handbook*, Jake VanderPlas, O'Reilly, 2016


In [2]:
# display output from all cmds just like Python shell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import platform
print 'python.version = ', platform.python_version()
import IPython
print 'ipython.version =', IPython.version_info

import numpy as np
print 'numpy.version =', np.__version__

import pandas as pd
print 'pandas.version =', pd.__version__
from pandas import Series, DataFrame


python.version =  2.7.10
ipython.version = (5, 1, 0, '')
numpy.version = 1.11.3
pandas.version = 0.19.2


In [None]:

df
df.sort_index(ascending=False)
df.sort_index(axis=1, ascending=False)
df.sort_values(by='Col1', ascending=False)

df.rank()
df.rank(axis=1)


In [22]:
# sort_values - Series

s = pd.Series(np.random.randint(low=50, size=10), index=['E','H','D','B','J','C','I','G','A','F'])
s

print 'Sort values asc'
s.sort_values()

print 'Sort values desc'
s.sort_values(ascending=False)

print 'Sort values asc in-place'
s.sort_values(inplace=True)
s


E    43
H    43
D    22
B    35
J    26
C    26
I    16
G    40
A    20
F     5
dtype: int64

Sort values asc


F     5
I    16
A    20
D    22
J    26
C    26
B    35
G    40
E    43
H    43
dtype: int64

Sort values desc


H    43
E    43
G    40
B    35
C    26
J    26
D    22
A    20
I    16
F     5
dtype: int64

Sort values asc in-place


F     5
I    16
A    20
D    22
J    26
C    26
B    35
G    40
E    43
H    43
dtype: int64

In [61]:
# sort_index - Series


arr = [[3]*3 + [1]*3 + [4] + [2]*3, ['E','H','D','B','J','C','I','G','A','F']]
arr
arr[0]
list(zip(arr[0], arr[1])) # same as...
list(zip(*arr))

idx = pd.MultiIndex.from_tuples(list(zip(*arr)), names=['outer', 'inner'])
idx

s = pd.Series(np.random.randint(low=50, size=10), index=idx)
s

print '---'

s.sort_index() # same as...
s.sort_index(level=0) # same as...
s.sort_index(level='outer')

print '---'
s.sort_index(level='inner', ascending=False)
print 'Before sorting in-place: \n', s
s.sort_index(level='inner', ascending=True, inplace=True)
print 'After sorting in-place: \n', s


[[3, 3, 3, 1, 1, 1, 4, 2, 2, 2],
 ['E', 'H', 'D', 'B', 'J', 'C', 'I', 'G', 'A', 'F']]

[3, 3, 3, 1, 1, 1, 4, 2, 2, 2]

[(3, 'E'),
 (3, 'H'),
 (3, 'D'),
 (1, 'B'),
 (1, 'J'),
 (1, 'C'),
 (4, 'I'),
 (2, 'G'),
 (2, 'A'),
 (2, 'F')]

[(3, 'E'),
 (3, 'H'),
 (3, 'D'),
 (1, 'B'),
 (1, 'J'),
 (1, 'C'),
 (4, 'I'),
 (2, 'G'),
 (2, 'A'),
 (2, 'F')]

MultiIndex(levels=[[1, 2, 3, 4], [u'A', u'B', u'C', u'D', u'E', u'F', u'G', u'H', u'I', u'J']],
           labels=[[2, 2, 2, 0, 0, 0, 3, 1, 1, 1], [4, 7, 3, 1, 9, 2, 8, 6, 0, 5]],
           names=[u'outer', u'inner'])

outer  inner
3      E        33
       H        36
       D        21
1      B        35
       J        25
       C        33
4      I        28
2      G        24
       A        31
       F        15
dtype: int64

---


outer  inner
1      B        35
       C        33
       J        25
2      A        31
       F        15
       G        24
3      D        21
       E        33
       H        36
4      I        28
dtype: int64

outer  inner
1      B        35
       C        33
       J        25
2      A        31
       F        15
       G        24
3      D        21
       E        33
       H        36
4      I        28
dtype: int64

outer  inner
1      B        35
       C        33
       J        25
2      A        31
       F        15
       G        24
3      D        21
       E        33
       H        36
4      I        28
dtype: int64

---


outer  inner
1      J        25
4      I        28
3      H        36
2      G        24
       F        15
3      E        33
       D        21
1      C        33
       B        35
2      A        31
dtype: int64

Before sorting in-place: 
outer  inner
3      E        33
       H        36
       D        21
1      B        35
       J        25
       C        33
4      I        28
2      G        24
       A        31
       F        15
dtype: int64
After sorting in-place: 
outer  inner
2      A        31
1      B        35
       C        33
3      D        21
       E        33
2      F        15
       G        24
3      H        36
4      I        28
1      J        25
dtype: int64


In [65]:
# sortlevel - Series

arr = [[3]*3 + [1]*3 + [4] + [2]*3, ['E','H','D','B','J','C','I','G','A','F']]
idx = pd.MultiIndex.from_tuples(list(zip(*arr)), names=['outer', 'inner'])
s = pd.Series(np.random.randint(low=50, size=10), index=idx)
s

s.sortlevel() # sort level specified lexicographicall and then other levels
s.sortlevel(level=1)


outer  inner
3      E        25
       H        35
       D        17
1      B         0
       J        10
       C         6
4      I        17
2      G        20
       A        45
       F        15
dtype: int64

outer  inner
1      B         0
       C         6
       J        10
2      A        45
       F        15
       G        20
3      D        17
       E        25
       H        35
4      I        17
dtype: int64

outer  inner
2      A        45
1      B         0
       C         6
3      D        17
       E        25
2      F        15
       G        20
3      H        35
4      I        17
1      J        10
dtype: int64

In [75]:
# argsort - Series

s = Series(np.arange(10)[::-1], index=['E','H','D','B','J','C','I','G','A','F'])
s

print '---'

idx = s.argsort()
type(idx)
idx.sort_values()

s[::-1] # series sorted in reverse


E    9
H    8
D    7
B    6
J    5
C    4
I    3
G    2
A    1
F    0
dtype: int64

---


pandas.core.series.Series

F    0
A    1
G    2
I    3
C    4
J    5
B    6
D    7
H    8
E    9
dtype: int64

F    0
A    1
G    2
I    3
C    4
J    5
B    6
D    7
H    8
E    9
dtype: int64

In [83]:
# searchsorted - Series

s = Series(np.arange(10)[::-1], index=['E','H','D','B','J','C','I','G','A','F'])
s.sort_values(inplace=True)
s

s.searchsorted(0)
s.searchsorted(1)
s.searchsorted(2)
s.searchsorted(11)
s.searchsorted(5)


F    0
A    1
G    2
I    3
C    4
J    5
B    6
D    7
H    8
E    9
dtype: int64

array([0])

array([1])

array([2])

array([10])

array([5])

In [7]:
# rank

s = Series([1,1,2,4,6,4,4,3,3,3], index=['E','H','D','B','J','C','I','G','A','F'])
s

print '---'

s.rank() # default method = average
s.rank(method='first')
s.rank(method='max')
s.rank(method='min')
s.rank(method='dense') # like min, but rank always increases between groups


E    1
H    1
D    2
B    4
J    6
C    4
I    4
G    3
A    3
F    3
dtype: int64

---


E     1.5
H     1.5
D     3.0
B     8.0
J    10.0
C     8.0
I     8.0
G     5.0
A     5.0
F     5.0
dtype: float64

E     1.0
H     2.0
D     3.0
B     7.0
J    10.0
C     8.0
I     9.0
G     4.0
A     5.0
F     6.0
dtype: float64

E     2.0
H     2.0
D     3.0
B     9.0
J    10.0
C     9.0
I     9.0
G     6.0
A     6.0
F     6.0
dtype: float64

E     1.0
H     1.0
D     3.0
B     7.0
J    10.0
C     7.0
I     7.0
G     4.0
A     4.0
F     4.0
dtype: float64

E    1.0
H    1.0
D    2.0
B    4.0
J    5.0
C    4.0
I    4.0
G    3.0
A    3.0
F    3.0
dtype: float64