# Advanced indexing

In [19]:
import pandas as pd
import numpy as np

### Changing index of a DataFrame

In [9]:
sales = pd.read_csv('sales/sales.csv', index_col='month')
sales.head()

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52


In [11]:
new_idx = [i.upper() for i in sales.index]
sales.index = new_idx
sales

Unnamed: 0,eggs,salt,spam
JAN,47,12.0,17
FEB,110,50.0,31
MAR,221,89.0,72
APR,77,87.0,20
MAY,132,,52
JUN,205,60.0,55


### Changing index name labels

In [12]:
sales.index.name = 'MONTHS'
sales.columns.name = 'PRODUCTS'
sales

PRODUCTS,eggs,salt,spam
MONTHS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
JAN,47,12.0,17
FEB,110,50.0,31
MAR,221,89.0,72
APR,77,87.0,20
MAY,132,,52
JUN,205,60.0,55


### Building an index, then a DataFrame

In [13]:
sales = pd.read_csv('sales/sales.csv')
sales.head()

Unnamed: 0,month,eggs,salt,spam
0,Jan,47,12.0,17
1,Feb,110,50.0,31
2,Mar,221,89.0,72
3,Apr,77,87.0,20
4,May,132,,52


In [14]:
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']
sales.index = months
sales

Unnamed: 0,month,eggs,salt,spam
Jan,Jan,47,12.0,17
Feb,Feb,110,50.0,31
Mar,Mar,221,89.0,72
Apr,Apr,77,87.0,20
May,May,132,,52
Jun,Jun,205,60.0,55


### Extracting data with a MultiIndex, seting index

In [45]:
sales = pd.read_csv('sales/sales.csv')
sales.head()

Unnamed: 0,month,eggs,salt,spam
0,Jan,47,12.0,17
1,Feb,110,50.0,31
2,Mar,221,89.0,72
3,Apr,77,87.0,20
4,May,132,,52


In [54]:
sales['month'] = [1,2,1,2,1,2]
sales['state'] = ['CA','CA','NY','NY','TX','TX']
sales = sales.set_index(['state','month'])
sales = sales.sort_index()
sales

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,1,47,12.0,17
CA,2,110,50.0,31
NY,1,221,89.0,72
NY,2,77,87.0,20
TX,1,132,,52
TX,2,205,60.0,55


In [52]:
sales.loc[['CA','TX']]

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,1,47,12.0,17
CA,2,110,50.0,31
TX,1,132,,52
TX,2,205,60.0,55


In [53]:
sales['CA':'TX']

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,1,47,12.0,17
CA,2,110,50.0,31
NY,1,221,89.0,72
NY,2,77,87.0,20
TX,1,132,,52
TX,2,205,60.0,55


In [56]:
sales.loc['CA']

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,47,12.0,17
2,110,50.0,31


### Indexing multiple levels of a MultiIndex

In [60]:
sales.loc['NY',1]

eggs    221.0
salt     89.0
spam     72.0
Name: (NY, 1), dtype: float64

In [63]:
sales.loc[(['CA','TX'],2),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2,110,50.0,31
TX,2,205,60.0,55


In [66]:
sales.loc[(slice(None),2),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2,110,50.0,31
NY,2,77,87.0,20
TX,2,205,60.0,55
