In [1]:
import pandas as pd

In [2]:
bigmac = pd.read_csv('pandas/bigmac.csv', parse_dates=['Date'])
bigmac['Country'] = bigmac['Country'].astype('category')
bigmac.head()

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35
3,2016-01-01,Britain,4.22
4,2016-01-01,Canada,4.14


In [3]:
bigmac.dtypes

Date                   datetime64[ns]
Country                      category
Price in US Dollars           float64
dtype: object

In [4]:
bigmac.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652 entries, 0 to 651
Data columns (total 3 columns):
Date                   652 non-null datetime64[ns]
Country                652 non-null category
Price in US Dollars    652 non-null float64
dtypes: category(1), datetime64[ns](1), float64(1)
memory usage: 12.6 KB


# Creating a multi-index

In [5]:
bigmac = pd.read_csv('pandas/bigmac.csv', parse_dates=['Date'])
bigmac['Country'] = bigmac['Country'].astype('category')
bigmac.head(2)

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74


## Setting a single column as index using `.set_index( )` method

In [6]:
result = bigmac.set_index(keys='Date')
result.head(2)

Unnamed: 0_level_0,Country,Price in US Dollars
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74


In [7]:
result = bigmac.set_index(keys='Country')
result.head(2)

Unnamed: 0_level_0,Date,Price in US Dollars
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Argentina,2016-01-01,2.39
Australia,2016-01-01,3.74


## Multiple indices

In [8]:
bigmac.set_index(['Date', 'Country'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14
2016-01-01,Chile,2.94
2016-01-01,China,2.68
2016-01-01,Colombia,2.43
2016-01-01,Costa Rica,4.02
2016-01-01,Czech Republic,2.98


**It is a general practice to make the column with the least number of unique values as the outermost index of a DataFrame**

In [9]:
bigmac.set_index(['Date', 'Country'],inplace=True)

bigmac.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97
2010-01-01,Chile,3.18
2010-01-01,China,1.83
2010-01-01,Colombia,3.91
2010-01-01,Costa Rica,3.52
2010-01-01,Czech Republic,3.71


**First sorted by the outermost index and subsequently by the inner indices**

In [10]:
bigmac.index

MultiIndex(levels=[[2010-01-01 00:00:00, 2010-07-01 00:00:00, 2011-07-01 00:00:00, 2012-01-01 00:00:00, 2012-07-01 00:00:00, 2013-01-01 00:00:00, 2013-07-01 00:00:00, 2014-01-01 00:00:00, 2014-07-01 00:00:00, 2015-01-01 00:00:00, 2015-07-01 00:00:00, 2016-01-01 00:00:00], ['Argentina', 'Australia', 'Austria', 'Belgium', 'Brazil', 'Britain', 'Canada', 'Chile', 'China', 'Colombia', 'Costa Rica', 'Czech Republic', 'Denmark', 'Egypt', 'Estonia', 'Euro area', 'Finland', 'France', 'Germany', 'Greece', 'Hong Kong', 'Hungary', 'India', 'Indonesia', 'Ireland', 'Israel', 'Italy', 'Japan', 'Latvia', 'Lithuania', 'Malaysia', 'Mexico', 'Netherlands', 'New Zealand', 'Norway', 'Pakistan', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Russia', 'Saudi Arabia', 'Singapore', 'South Africa', 'South Korea', 'Spain', 'Sri Lanka', 'Sweden', 'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'UAE', 'Ukraine', 'United States', 'Uruguay', 'Venezuela', 'Vietnam']],
           labels=[[11, 11, 11, 11, 11, 11, 11, 11,

In [11]:
bigmac.index.names

FrozenList(['Date', 'Country'])

In [12]:
type(bigmac.index)

pandas.core.indexes.multi.MultiIndex

In [13]:
bigmac.index[0]

(Timestamp('2016-01-01 00:00:00'), 'Argentina')

---
# The `.get_level_values( )` method

In [14]:
bigmac = pd.read_csv('pandas/bigmac.csv', parse_dates=['Date'], index_col=['Date','Country'])
bigmac.sort_index(inplace=True) # For faster operations
bigmac.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98


In [15]:
bigmac.index

MultiIndex(levels=[[2010-01-01 00:00:00, 2010-07-01 00:00:00, 2011-07-01 00:00:00, 2012-01-01 00:00:00, 2012-07-01 00:00:00, 2013-01-01 00:00:00, 2013-07-01 00:00:00, 2014-01-01 00:00:00, 2014-07-01 00:00:00, 2015-01-01 00:00:00, 2015-07-01 00:00:00, 2016-01-01 00:00:00], ['Argentina', 'Australia', 'Austria', 'Belgium', 'Brazil', 'Britain', 'Canada', 'Chile', 'China', 'Colombia', 'Costa Rica', 'Czech Republic', 'Denmark', 'Egypt', 'Estonia', 'Euro area', 'Finland', 'France', 'Germany', 'Greece', 'Hong Kong', 'Hungary', 'India', 'Indonesia', 'Ireland', 'Israel', 'Italy', 'Japan', 'Latvia', 'Lithuania', 'Malaysia', 'Mexico', 'Netherlands', 'New Zealand', 'Norway', 'Pakistan', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Russia', 'Saudi Arabia', 'Singapore', 'South Africa', 'South Korea', 'Spain', 'Sri Lanka', 'Sweden', 'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'UAE', 'Ukraine', 'United States', 'Uruguay', 'Venezuela', 'Vietnam']],
           labels=[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [16]:
bigmac.index.get_level_values(0) # Date is at the 0th index of the DataFrame

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01'],
              dtype='datetime64[ns]', name='Date', length=652, freq=None)

In [17]:
bigmac.index.get_level_values(1) # Country is at the 1st index of the DataFrame

Index(['Argentina', 'Australia', 'Brazil', 'Britain', 'Canada', 'Chile',
       'China', 'Colombia', 'Costa Rica', 'Czech Republic',
       ...
       'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'UAE', 'Ukraine',
       'United States', 'Uruguay', 'Venezuela', 'Vietnam'],
      dtype='object', name='Country', length=652)

**We can also provide the name of the index instead of the index position.**

In [18]:
bigmac.index.get_level_values('Date')

DatetimeIndex(['2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01',
               ...
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01'],
              dtype='datetime64[ns]', name='Date', length=652, freq=None)

In [19]:
bigmac.index.get_level_values('Country')

Index(['Argentina', 'Australia', 'Brazil', 'Britain', 'Canada', 'Chile',
       'China', 'Colombia', 'Costa Rica', 'Czech Republic',
       ...
       'Switzerland', 'Taiwan', 'Thailand', 'Turkey', 'UAE', 'Ukraine',
       'United States', 'Uruguay', 'Venezuela', 'Vietnam'],
      dtype='object', name='Country', length=652)

---
# The `.set_names( )` method on `MultiIndex`
This method is called on the `index` attribute of the DataFrame which returns the MultiIndex object storing those combinations of indices/levels/layers that make up the MultiIndex

In [20]:
bigmac = pd.read_csv('pandas/bigmac.csv',parse_dates=['Date'], index_col=['Date','Country'])
bigmac.sort_index(inplace=True)
bigmac.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98


**Let's say we want to change the names of the indices from `Date` and `Country` to `Day` and `Location` respectively**

In [21]:
bigmac.index.set_names(['Day','Location'], inplace=True)
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Day,Location,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


---
# The `.sort_index( )` method on MultiIndex
The `ascending =  True` parameter to the `.sort_index( )` method sorts ALL the indices of the MultiIndex in ascending order

In [22]:
bigmac = pd.read_csv('pandas/bigmac.csv',parse_dates=['Date'], index_col=['Date','Country'])
bigmac.sort_index(ascending=True)
bigmac.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74


**The `ascending =  False` parameter to the `.sort_index( )` method sorts ALL the indices of the MultiIndex in descending order**

In [23]:
bigmac.sort_index(ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Vietnam,2.67
2016-01-01,Venezuela,0.66
2016-01-01,Uruguay,3.74
2016-01-01,United States,4.93
2016-01-01,Ukraine,1.54
2016-01-01,UAE,3.54
2016-01-01,Turkey,3.41
2016-01-01,Thailand,3.09
2016-01-01,Taiwan,2.08
2016-01-01,Switzerland,6.44


**If we want to sort different indices in different fashion, that is onr or more indices to be sorted in ascending order and the other(s) in descending order, we can provide a list of booleans to the `ascending` parameter.**

In [24]:
bigmac.sort_index(ascending=[True,False]) # Date in ascending order, Country in descending order

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Uruguay,3.32
2010-01-01,United States,3.58
2010-01-01,Ukraine,1.83
2010-01-01,UAE,2.99
2010-01-01,Turkey,3.83
2010-01-01,Thailand,2.11
2010-01-01,Taiwan,2.36
2010-01-01,Switzerland,6.30
2010-01-01,Sweden,5.51
2010-01-01,Sri Lanka,1.83


---
# Extracting rows from a MultiIndex DataFrame

In [25]:
bigmac = pd.read_csv('pandas/bigmac.csv', parse_dates=['Date'],index_col=['Date','Country'])
bigmac.sort_index(inplace=True)
bigmac.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76


## Using the `.loc[ ]` method
To provide a multiple indices, a tuple of the index columns has to be passed. NOTE: Passing a list instead of a tuple would result into an error.

In [26]:
bigmac.loc[('2010-01-01')]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97
2010-01-01,Chile,3.18
2010-01-01,China,1.83
2010-01-01,Colombia,3.91
2010-01-01,Costa Rica,3.52
2010-01-01,Czech Republic,3.71


In [27]:
bigmac.loc[('2010-01-01','Brazil')]

Price in US Dollars    4.76
Name: (2010-01-01 00:00:00, Brazil), dtype: float64

In [28]:
bigmac.loc[('2010-01-01','Brazil'), 'Price in US Dollars']

Date        Country
2010-01-01  Brazil     4.76
Name: Price in US Dollars, dtype: float64