## Slicing and Indexing
### Explicit indexes

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
dogs = pd.read_csv('dogs.csv')

In [7]:
dogs.columns

Index(['name', 'breed', 'color', 'height_cm', 'weight_kg'], dtype='object')

In [8]:
dogs.index

RangeIndex(start=0, stop=7, step=1)

### Setting a column as the index

In [9]:
dogs

Unnamed: 0,name,breed,color,height_cm,weight_kg
0,Bella,Labrador,Brown,56,25
1,Charlie,Poodle,Black,43,23
2,Lucy,Chow Chow,Brown,46,22
3,Cooper,Schnauzer,Grey,49,17
4,Max,Labrador,Black,59,29
5,Stella,Chihuahua,Tan,18,2
6,Bernie,St. Bernard,White,77,74


In [11]:
dogs_ind = dogs.set_index("name")
dogs_ind

Unnamed: 0_level_0,breed,color,height_cm,weight_kg
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bella,Labrador,Brown,56,25
Charlie,Poodle,Black,43,23
Lucy,Chow Chow,Brown,46,22
Cooper,Schnauzer,Grey,49,17
Max,Labrador,Black,59,29
Stella,Chihuahua,Tan,18,2
Bernie,St. Bernard,White,77,74


### Removing an index

In [12]:
dogs_ind.reset_index()

Unnamed: 0,name,breed,color,height_cm,weight_kg
0,Bella,Labrador,Brown,56,25
1,Charlie,Poodle,Black,43,23
2,Lucy,Chow Chow,Brown,46,22
3,Cooper,Schnauzer,Grey,49,17
4,Max,Labrador,Black,59,29
5,Stella,Chihuahua,Tan,18,2
6,Bernie,St. Bernard,White,77,74


### Dropping/discarding an index

In [16]:
dogs_ind.reset_index(drop = True) # Entirely removes dogs names

Unnamed: 0,breed,color,height_cm,weight_kg
0,Labrador,Brown,56,25
1,Poodle,Black,43,23
2,Chow Chow,Brown,46,22
3,Schnauzer,Grey,49,17
4,Labrador,Black,59,29
5,Chihuahua,Tan,18,2
6,St. Bernard,White,77,74


### Indexes make subsetting simpler

In [17]:
#subsetting for the rows where the dog is called Bella or Stella
dogs[dogs["name"].isin(["Bella", "Stella"])]

Unnamed: 0,name,breed,color,height_cm,weight_kg
0,Bella,Labrador,Brown,56,25
5,Stella,Chihuahua,Tan,18,2


In [27]:
#Equivalent code when the names are in the index
# loc for selecting rows
#df have a subsetting method called "loc", which filters on index values
#Passing dog names to loc as list
dogs_ind.loc[["Bella", "Stella"]]

Unnamed: 0_level_0,breed,color,height_cm,weight_kg
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bella,Labrador,Brown,56,25
Stella,Chihuahua,Tan,18,2


### Index values don't need to be unique

In [30]:
dogs_ind2 = dogs.set_index("breed")
dogs_ind2
# Here there are two labradors in the index

Unnamed: 0_level_0,name,color,height_cm,weight_kg
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Labrador,Bella,Brown,56,25
Poodle,Charlie,Black,43,23
Chow Chow,Lucy,Brown,46,22
Schnauzer,Cooper,Grey,49,17
Labrador,Max,Black,59,29
Chihuahua,Stella,Tan,18,2
St. Bernard,Bernie,White,77,74


### Subsetting on duplicated index values


In [31]:
# Subset on "Labrador" using loc, all the Labrador data is returned
dogs_ind2.loc["Labrador"]

Unnamed: 0_level_0,name,color,height_cm,weight_kg
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Labrador,Bella,Brown,56,25
Labrador,Max,Black,59,29


### Multi-level indexes a.k.a. hierarchical indexes

In [34]:
# We can include multiple columns in the index by 
# passing a list of column names to set index
dogs_ind3 = dogs.set_index(["breed", "color"])
dogs_ind3

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Labrador,Brown,Bella,56,25
Poodle,Black,Charlie,43,23
Chow Chow,Brown,Lucy,46,22
Schnauzer,Grey,Cooper,49,17
Labrador,Black,Max,59,29
Chihuahua,Tan,Stella,18,2
St. Bernard,White,Bernie,77,74


### Subset the outer level i.e breed with a list

In [38]:
dogs_ind3.loc[["Labrador", "Chihuahua"]]
# To take a subset of rows at the outer level index, you pass a list
# of index values to loc
# Here the list contains Labrador and Chihuahua, and the resulting
# subset contains all dogs from both breeds

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Labrador,Brown,Bella,56,25
Labrador,Black,Max,59,29
Chihuahua,Tan,Stella,18,2


### Subset inner levels with a list of tuples

In [40]:
# To subset on inner levels, you need to pass a list of tuples
# Here, the first tuple specifies Labrador at the outer level and 
# Brown at the inner level. The resulting rows have to match all
# conditions from a tuple for eg. the black labrador wasn't returned 
# because the brown condition wasn't matched
dogs_ind3.loc[[("Labrador", "Brown"), ("Chihuahua", "Tan")]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Labrador,Brown,Bella,56,25
Chihuahua,Tan,Stella,18,2


### Sorting by index values

In [42]:
# We can also sort by index values using sort_index
# By default it sorts all index levels from outer to inner in
# ascending order
dogs_ind3.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chihuahua,Tan,Stella,18,2
Chow Chow,Brown,Lucy,46,22
Labrador,Black,Max,59,29
Labrador,Brown,Bella,56,25
Poodle,Black,Charlie,43,23
Schnauzer,Grey,Cooper,49,17
St. Bernard,White,Bernie,77,74


In [43]:
dogs_ind3

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Labrador,Brown,Bella,56,25
Poodle,Black,Charlie,43,23
Chow Chow,Brown,Lucy,46,22
Schnauzer,Grey,Cooper,49,17
Labrador,Black,Max,59,29
Chihuahua,Tan,Stella,18,2
St. Bernard,White,Bernie,77,74


### Controlling sort_index

In [44]:
dogs_ind3.sort_index(level = ["color", "breed"], ascending = [True, False])

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Poodle,Black,Charlie,43,23
Labrador,Black,Max,59,29
Labrador,Brown,Bella,56,25
Chow Chow,Brown,Lucy,46,22
Schnauzer,Grey,Cooper,49,17
Chihuahua,Tan,Stella,18,2
St. Bernard,White,Bernie,77,74
