# Explicit Indexes

In [2]:
import pandas as pd

dogs_dict = {
    'breed': ['Labrador', 'Poodle', 'Chow Chow', 'Schnauzer', 'Labrador', 'Chihuahua', 'Poodle', 'Chihuahua', 'Labrador', 'Labrador'],
    'color': ['Chocolate', 'White', 'Brown', 'Gray', 'Black', 'Brown', 'White', 'Black', 'Yellow', 'Black'],
    'name': ['Buddy', 'Lucy', 'Cooper', 'Riley', 'Bear', 'Bella', 'Daisy', 'Lola', 'Max', 'Stella'],
    'height_cm': [56, 43, 46, 49, 56, 18, 43, 18, 59, 56],
    'weight_kg': [25, 6, 23, 17, 29, 2, 7, 2, 29, 29],
    'date_of_birth': ['2013-07-01', '2016-10-11', '2011-10-12', '2014-09-01', '2017-01-20', '2015-04-20', '2017-01-20', '2015-08-25', '2017-01-20', '2017-10-05']
}

dogs = pd.DataFrame(dogs_dict)

In [3]:
dogs.head()

Unnamed: 0,breed,color,name,height_cm,weight_kg,date_of_birth
0,Labrador,Chocolate,Buddy,56,25,2013-07-01
1,Poodle,White,Lucy,43,6,2016-10-11
2,Chow Chow,Brown,Cooper,46,23,2011-10-12
3,Schnauzer,Gray,Riley,49,17,2014-09-01
4,Labrador,Black,Bear,56,29,2017-01-20


In [4]:
dogs.columns

Index(['breed', 'color', 'name', 'height_cm', 'weight_kg', 'date_of_birth'], dtype='object')

In [5]:
dogs.index

RangeIndex(start=0, stop=10, step=1)

In [6]:
dogs_ind = dogs.set_index('name')
print(dogs_ind.head())

            breed      color  height_cm  weight_kg date_of_birth
name                                                            
Buddy    Labrador  Chocolate         56         25    2013-07-01
Lucy       Poodle      White         43          6    2016-10-11
Cooper  Chow Chow      Brown         46         23    2011-10-12
Riley   Schnauzer       Gray         49         17    2014-09-01
Bear     Labrador      Black         56         29    2017-01-20


In [7]:
dogs_ind.reset_index(drop=False, inplace=False).head()

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Buddy,Labrador,Chocolate,56,25,2013-07-01
1,Lucy,Poodle,White,43,6,2016-10-11
2,Cooper,Chow Chow,Brown,46,23,2011-10-12
3,Riley,Schnauzer,Gray,49,17,2014-09-01
4,Bear,Labrador,Black,56,29,2017-01-20


## Indexes make subsetting simpler

In [8]:
dogs[dogs["name"].isin(["Buddy", "Bear"])]

Unnamed: 0,breed,color,name,height_cm,weight_kg,date_of_birth
0,Labrador,Chocolate,Buddy,56,25,2013-07-01
4,Labrador,Black,Bear,56,29,2017-01-20


In [9]:
dogs_ind.loc[["Buddy", "Bear"]]

Unnamed: 0_level_0,breed,color,height_cm,weight_kg,date_of_birth
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Buddy,Labrador,Chocolate,56,25,2013-07-01
Bear,Labrador,Black,56,29,2017-01-20


In [10]:
dogs_ind2 = dogs.set_index("breed")
print(dogs_ind2.head())

               color    name  height_cm  weight_kg date_of_birth
breed                                                           
Labrador   Chocolate   Buddy         56         25    2013-07-01
Poodle         White    Lucy         43          6    2016-10-11
Chow Chow      Brown  Cooper         46         23    2011-10-12
Schnauzer       Gray   Riley         49         17    2014-09-01
Labrador       Black    Bear         56         29    2017-01-20


In [11]:
dogs_ind2.loc["Labrador"]

Unnamed: 0_level_0,color,name,height_cm,weight_kg,date_of_birth
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Chocolate,Buddy,56,25,2013-07-01
Labrador,Black,Bear,56,29,2017-01-20
Labrador,Yellow,Max,59,29,2017-01-20
Labrador,Black,Stella,56,29,2017-10-05


## Multi-level indexes a.k.a. hierarchical indexes

In [12]:
dogs_ind3 = dogs.set_index(["breed", "color"])
print(dogs_ind3.head())

                       name  height_cm  weight_kg date_of_birth
breed     color                                                
Labrador  Chocolate   Buddy         56         25    2013-07-01
Poodle    White        Lucy         43          6    2016-10-11
Chow Chow Brown      Cooper         46         23    2011-10-12
Schnauzer Gray        Riley         49         17    2014-09-01
Labrador  Black        Bear         56         29    2017-01-20


### Subset outer levels

In [13]:
dogs_ind3.loc[["Labrador", "Chihuahua"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Chocolate,Buddy,56,25,2013-07-01
Labrador,Black,Bear,56,29,2017-01-20
Labrador,Yellow,Max,59,29,2017-01-20
Labrador,Black,Stella,56,29,2017-10-05
Chihuahua,Brown,Bella,18,2,2015-04-20
Chihuahua,Black,Lola,18,2,2015-08-25


### Subset inner levels with a list of tuples

In [14]:
dogs_ind3.loc[[("Labrador", "Chocolate"), ("Chihuahua", "Brown")]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Chocolate,Buddy,56,25,2013-07-01
Chihuahua,Brown,Bella,18,2,2015-04-20


## Sorting by index values

In [15]:
dogs_ind3.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,Black,Lola,18,2,2015-08-25
Chihuahua,Brown,Bella,18,2,2015-04-20
Chow Chow,Brown,Cooper,46,23,2011-10-12
Labrador,Black,Bear,56,29,2017-01-20
Labrador,Black,Stella,56,29,2017-10-05
Labrador,Chocolate,Buddy,56,25,2013-07-01
Labrador,Yellow,Max,59,29,2017-01-20
Poodle,White,Lucy,43,6,2016-10-11
Poodle,White,Daisy,43,7,2017-01-20
Schnauzer,Gray,Riley,49,17,2014-09-01


In [16]:
dogs_ind3.sort_index(level=["breed", "color"], ascending=[True, False])

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,Brown,Bella,18,2,2015-04-20
Chihuahua,Black,Lola,18,2,2015-08-25
Chow Chow,Brown,Cooper,46,23,2011-10-12
Labrador,Yellow,Max,59,29,2017-01-20
Labrador,Chocolate,Buddy,56,25,2013-07-01
Labrador,Black,Bear,56,29,2017-01-20
Labrador,Black,Stella,56,29,2017-10-05
Poodle,White,Lucy,43,6,2016-10-11
Poodle,White,Daisy,43,7,2017-01-20
Schnauzer,Gray,Riley,49,17,2014-09-01


# Slicing

In [17]:
breeds = ["Labrador", "Poodel", "Chow Chow", "Schnauzer", "Labrador", "Chihuahua", "St. Bernard"]

In [18]:
breeds[2:5]

['Chow Chow', 'Schnauzer', 'Labrador']

In [19]:
breeds[:3]

['Labrador', 'Poodel', 'Chow Chow']

In [20]:
breeds[:]

['Labrador',
 'Poodel',
 'Chow Chow',
 'Schnauzer',
 'Labrador',
 'Chihuahua',
 'St. Bernard']

## Slicing the outer index level

In [21]:
dogs_srt = dogs.set_index(["breed", "color"]).sort_index()
print(dogs_srt)

                       name  height_cm  weight_kg date_of_birth
breed     color                                                
Chihuahua Black        Lola         18          2    2015-08-25
          Brown       Bella         18          2    2015-04-20
Chow Chow Brown      Cooper         46         23    2011-10-12
Labrador  Black        Bear         56         29    2017-01-20
          Black      Stella         56         29    2017-10-05
          Chocolate   Buddy         56         25    2013-07-01
          Yellow        Max         59         29    2017-01-20
Poodle    White        Lucy         43          6    2016-10-11
          White       Daisy         43          7    2017-01-20
Schnauzer Gray        Riley         49         17    2014-09-01


In [22]:
dogs_srt.loc["Chow Chow":"Schnauzer"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chow Chow,Brown,Cooper,46,23,2011-10-12
Labrador,Black,Bear,56,29,2017-01-20
Labrador,Black,Stella,56,29,2017-10-05
Labrador,Chocolate,Buddy,56,25,2013-07-01
Labrador,Yellow,Max,59,29,2017-01-20
Poodle,White,Lucy,43,6,2016-10-11
Poodle,White,Daisy,43,7,2017-01-20
Schnauzer,Gray,Riley,49,17,2014-09-01


## Slicing the inner index level

In [23]:
dogs_srt.loc[("Chow Chow", "Brown"):("Poodle", "White")]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chow Chow,Brown,Cooper,46,23,2011-10-12
Labrador,Black,Bear,56,29,2017-01-20
Labrador,Black,Stella,56,29,2017-10-05
Labrador,Chocolate,Buddy,56,25,2013-07-01
Labrador,Yellow,Max,59,29,2017-01-20
Poodle,White,Lucy,43,6,2016-10-11
Poodle,White,Daisy,43,7,2017-01-20


## Slicing columns

In [24]:
dogs_srt.loc[:, "name":"height_cm"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1
Chihuahua,Black,Lola,18
Chihuahua,Brown,Bella,18
Chow Chow,Brown,Cooper,46
Labrador,Black,Bear,56
Labrador,Black,Stella,56
Labrador,Chocolate,Buddy,56
Labrador,Yellow,Max,59
Poodle,White,Lucy,43
Poodle,White,Daisy,43
Schnauzer,Gray,Riley,49


## Slicing All

In [25]:
dogs_srt.loc[("Chow Chow", "Brown"):("Poodle", "White"), "name":"height_cm"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1
Chow Chow,Brown,Cooper,46
Labrador,Black,Bear,56
Labrador,Black,Stella,56
Labrador,Chocolate,Buddy,56
Labrador,Yellow,Max,59
Poodle,White,Lucy,43
Poodle,White,Daisy,43


## Slicing by Dates

In [26]:
dogs = dogs.set_index("date_of_birth").sort_index()
print(dogs.head())

                   breed      color    name  height_cm  weight_kg
date_of_birth                                                    
2011-10-12     Chow Chow      Brown  Cooper         46         23
2013-07-01      Labrador  Chocolate   Buddy         56         25
2014-09-01     Schnauzer       Gray   Riley         49         17
2015-04-20     Chihuahua      Brown   Bella         18          2
2015-08-25     Chihuahua      Black    Lola         18          2


In [27]:
dogs.loc["2014-09-01":"2016-10-11"]

Unnamed: 0_level_0,breed,color,name,height_cm,weight_kg
date_of_birth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-09-01,Schnauzer,Gray,Riley,49,17
2015-04-20,Chihuahua,Brown,Bella,18,2
2015-08-25,Chihuahua,Black,Lola,18,2
2016-10-11,Poodle,White,Lucy,43,6


In [28]:
dogs.loc["2014":"2016"]

Unnamed: 0_level_0,breed,color,name,height_cm,weight_kg
date_of_birth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-09-01,Schnauzer,Gray,Riley,49,17
2015-04-20,Chihuahua,Brown,Bella,18,2
2015-08-25,Chihuahua,Black,Lola,18,2


## Subsetting by row/column number

In [29]:
print(dogs.iloc[2:5, 1:4]) 

               color   name  height_cm
date_of_birth                         
2014-09-01      Gray  Riley         49
2015-04-20     Brown  Bella         18
2015-08-25     Black   Lola         18


# Pivot Tables

In [30]:
dogs_height_by_breed_vs_color = dogs.pivot_table("height_cm", index="breed", columns="color")
print(dogs_height_by_breed_vs_color)

color      Black  Brown  Chocolate  Gray  White  Yellow
breed                                                  
Chihuahua   18.0   18.0        NaN   NaN    NaN     NaN
Chow Chow    NaN   46.0        NaN   NaN    NaN     NaN
Labrador    56.0    NaN       56.0   NaN    NaN    59.0
Poodle       NaN    NaN        NaN   NaN   43.0     NaN
Schnauzer    NaN    NaN        NaN  49.0    NaN     NaN
