# Indexing DataFrames

A bit of a discussion around how use of index devates away from tidy data format. But it can make slicing dataframes more easily readable. 

In [1]:
import seaborn as sns
import pandas as pd

In [4]:
penguins = sns.load_dataset("penguins")
penguins.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female


In [5]:
# set species as index
penguins.set_index("species")

Unnamed: 0_level_0,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
Adelie,Torgersen,,,,,
Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female
...,...,...,...,...,...,...
Gentoo,Biscoe,,,,,
Gentoo,Biscoe,46.8,14.3,215.0,4850.0,Female
Gentoo,Biscoe,50.4,15.7,222.0,5750.0,Male
Gentoo,Biscoe,45.2,14.8,212.0,5200.0,Female


In [8]:
# set a multiindex
peng_multi = penguins.set_index(["island", "species"])
peng_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
island,species,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Torgersen,Adelie,39.1,18.7,181.0,3750.0,Male
Torgersen,Adelie,39.5,17.4,186.0,3800.0,Female
Torgersen,Adelie,40.3,18.0,195.0,3250.0,Female
Torgersen,Adelie,,,,,
Torgersen,Adelie,36.7,19.3,193.0,3450.0,Female
...,...,...,...,...,...,...
Biscoe,Gentoo,,,,,
Biscoe,Gentoo,46.8,14.3,215.0,4850.0,Female
Biscoe,Gentoo,50.4,15.7,222.0,5750.0,Male
Biscoe,Gentoo,45.2,14.8,212.0,5200.0,Female


## Subsetting Comparison

In [17]:
aoi = ["Dream", "Torgersen"]

In [18]:
# subsetting with isin
penguins[penguins["island"].isin(aoi)]

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female
...,...,...,...,...,...,...,...
215,Chinstrap,Dream,55.8,19.8,207.0,4000.0,Male
216,Chinstrap,Dream,43.5,18.1,202.0,3400.0,Female
217,Chinstrap,Dream,49.6,18.2,193.0,3775.0,Male
218,Chinstrap,Dream,50.8,19.0,210.0,4100.0,Male


In [19]:
# using .loc with single argument returns all cols for specified index
peng_multi.loc[aoi]
# reads neater, by default outer hierarchy is targetted, but you can be more
# precise

Unnamed: 0_level_0,Unnamed: 1_level_0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
island,species,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dream,Adelie,39.5,16.7,178.0,3250.0,Female
Dream,Adelie,37.2,18.1,178.0,3900.0,Male
Dream,Adelie,39.5,17.8,188.0,3300.0,Female
Dream,Adelie,40.9,18.9,184.0,3900.0,Male
Dream,Adelie,36.4,17.0,195.0,3325.0,Female
...,...,...,...,...,...,...
Torgersen,Adelie,41.5,18.3,195.0,4300.0,Male
Torgersen,Adelie,39.0,17.1,191.0,3050.0,Female
Torgersen,Adelie,44.1,18.0,210.0,4000.0,Male
Torgersen,Adelie,38.5,17.9,190.0,3325.0,Female


In [20]:
# To get more specific with your indexing, pass in tuples, several can be
# passed
peng_multi.loc[("Dream", "Chinstrap")]

  peng_multi.loc[("Dream", "Chinstrap")]


Unnamed: 0_level_0,Unnamed: 1_level_0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
island,species,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dream,Chinstrap,46.5,17.9,192.0,3500.0,Female
Dream,Chinstrap,50.0,19.5,196.0,3900.0,Male
Dream,Chinstrap,51.3,19.2,193.0,3650.0,Male
Dream,Chinstrap,45.4,18.7,188.0,3525.0,Female
Dream,Chinstrap,52.7,19.8,197.0,3725.0,Male
Dream,...,...,...,...,...,...
Dream,Chinstrap,55.8,19.8,207.0,4000.0,Male
Dream,Chinstrap,43.5,18.1,202.0,3400.0,Female
Dream,Chinstrap,49.6,18.2,193.0,3775.0,Male
Dream,Chinstrap,50.8,19.0,210.0,4100.0,Male


## Sorting

In [22]:
peng_multi.sort_index(level="species", ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
island,species,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Biscoe,Gentoo,49.9,16.1,213.0,5400.0,Male
Biscoe,Gentoo,45.2,14.8,212.0,5200.0,Female
Biscoe,Gentoo,50.4,15.7,222.0,5750.0,Male
Biscoe,Gentoo,46.8,14.3,215.0,4850.0,Female
Biscoe,Gentoo,,,,,
Biscoe,...,...,...,...,...,...
Biscoe,Adelie,38.8,17.2,180.0,3800.0,Male
Biscoe,Adelie,38.2,18.1,185.0,3950.0,Male
Biscoe,Adelie,35.9,19.2,189.0,3800.0,Female
Biscoe,Adelie,37.7,18.7,180.0,3600.0,Male


In [23]:
# You can sort on several levels
peng_multi.sort_index(level=["island", "species"], ascending=[False, True])

Unnamed: 0_level_0,Unnamed: 1_level_0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
island,species,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Torgersen,Adelie,39.1,18.7,181.0,3750.0,Male
Torgersen,Adelie,39.5,17.4,186.0,3800.0,Female
Torgersen,Adelie,40.3,18.0,195.0,3250.0,Female
Torgersen,Adelie,,,,,
Torgersen,Adelie,36.7,19.3,193.0,3450.0,Female
...,...,...,...,...,...,...
Biscoe,Gentoo,,,,,
Biscoe,Gentoo,46.8,14.3,215.0,4850.0,Female
Biscoe,Gentoo,50.4,15.7,222.0,5750.0,Male
Biscoe,Gentoo,45.2,14.8,212.0,5200.0,Female
