# Agenda: Sorting

1. Series
    - Sort by index
    - Sort by values
2. Data frames
    - Sorting by index
    - Sorting by one column
    - Sorting by multiple columns

In [1]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame

The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.


In [2]:
# create a series of random values from -50 to 50

np.random.seed(0)
s = Series(np.random.randint(-50, 50, 10),
           index=list('acegihfjdb'))
s

a    -6
c    -3
e    14
g    17
i    17
h   -41
f    33
j   -29
d   -14
b    37
dtype: int64

In [3]:
# retrieve via the index with .loc
s.loc['e']

14

In [4]:
# fancy index
s.loc[['e', 'a']]

e    14
a    -6
dtype: int64

In [5]:
# slice
s.loc['e':'h']   # up to and including with .loc

e    14
g    17
i    17
h   -41
dtype: int64

In [6]:
# if we want to sort the index, we can use the .sort_index method
# this method returns a new series, with the same index and values as before,
# but with the index sorted

(
    s
    .sort_index()
    .head()
)

a    -6
b    37
c    -3
d   -14
e    14
dtype: int64

In [7]:
# get specific values from this series
(
    s
    .sort_index()
    .head()
    .loc[['b', 'd']]
)

b    37
d   -14
dtype: int64

In [8]:
# sort + head is very common
(
    s
    .sort_index()
    .head()
)

a    -6
b    37
c    -3
d   -14
e    14
dtype: int64

In [9]:
# sort + tail is very common
(
    s
    .sort_index()
    .tail()
)

f    33
g    17
h   -41
i    17
j   -29
dtype: int64

In [10]:
# if you want (but don't!) you can pass inplace=True to sort_index (and other sorting methods).
# If you do that, you'll get None as a result, and the data structure will be modified in place

s.sort_index(inplace=True)

In [11]:
s

a    -6
b    37
c    -3
d   -14
e    14
f    33
g    17
h   -41
i    17
j   -29
dtype: int64

In [12]:
# let's define the series again, but with doubled a and b in the index

np.random.seed(0)
s = Series(np.random.randint(-50, 50, 10),
           index=list('aceaihbjdb'))
s

a    -6
c    -3
e    14
a    17
i    17
h   -41
b    33
j   -29
d   -14
b    37
dtype: int64

In [13]:
# this is totally fine!
s.loc['a']

a    -6
a    17
dtype: int64

In [14]:
s.loc['b']

b    33
b    37
dtype: int64

In [15]:
s.loc['c']

-3

In [16]:
s.loc['a':'c']

KeyError: "Cannot get left slice bound for non-unique label: 'a'"