In [1]:
import pandas as pd

# Import Data & Optimization

In [2]:
# import data
bigmac = pd.read_csv('data/bigmac.csv', parse_dates = ['Date'])

# import data with multi-index set (will use prior df for illustrative purposes below)
bigmac_indexed_start = pd.read_csv(
    filepath_or_buffer = 'data/bigmac.csv',
    parse_dates = ['Date'],
    index_col = ['Date', 'Country']
)

bigmac.head()

Unnamed: 0,Date,Country,Price in US Dollars
0,2016-01-01,Argentina,2.39
1,2016-01-01,Australia,3.74
2,2016-01-01,Brazil,3.35
3,2016-01-01,Britain,4.22
4,2016-01-01,Canada,4.14


In [3]:
# display data types
bigmac.dtypes

Date                   datetime64[ns]
Country                        object
Price in US Dollars           float64
dtype: object

In [4]:
# display info
bigmac.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 652 entries, 0 to 651
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Date                 652 non-null    datetime64[ns]
 1   Country              652 non-null    object        
 2   Price in US Dollars  652 non-null    float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 15.4+ KB


# MultiIndex DataFrames

In [5]:
# creating multi-index with set_index method
bigmac_indexed = bigmac.set_index(
    keys = ['Date', 'Country'], # pass list when setting multi index
    drop = True # default (will drop column from df)
)

# order of keys list dictates order of results
# best practice to use variable with least distinct values first

bigmac_indexed.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14


In [6]:
# sorting multi-indexed dataframes
bigmac_indexed.sort_index() # will sort groupings from left to right

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Country,Unnamed: 2_level_1
2010-01-01,Argentina,1.84
2010-01-01,Australia,3.98
2010-01-01,Brazil,4.76
2010-01-01,Britain,3.67
2010-01-01,Canada,3.97
...,...,...
2016-01-01,Ukraine,1.54
2016-01-01,United States,4.93
2016-01-01,Uruguay,3.74
2016-01-01,Venezuela,0.66


In [7]:
# extract index level values using labels
from datetime import date


date_level = bigmac_indexed.index.get_level_values('Date')
country_level = bigmac_indexed.index.get_level_values('Country')

print(date_level)
print('\n')
print(country_level)

DatetimeIndex(['2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01',
               ...
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01'],
              dtype='datetime64[ns]', name='Date', length=652, freq=None)


Index(['Argentina', 'Australia', 'Brazil', 'Britain', 'Canada', 'Chile',
       'China', 'Colombia', 'Costa Rica', 'Czech Republic',
       ...
       'Sri Lanka', 'Sweden', 'Switzerland', 'Taiwan', 'Thailand', 'Turkey',
       'UAE', 'Ukraine', 'United States', 'Uruguay'],
      dtype='object', name='Country', length=652)


In [8]:
# extract index level values using index position
from datetime import date


date_level = bigmac_indexed.index.get_level_values(0)
country_level = bigmac_indexed.index.get_level_values(1)

print(date_level)
print('\n')
print(country_level)

DatetimeIndex(['2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01', '2016-01-01', '2016-01-01',
               '2016-01-01', '2016-01-01',
               ...
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01', '2010-01-01', '2010-01-01',
               '2010-01-01', '2010-01-01'],
              dtype='datetime64[ns]', name='Date', length=652, freq=None)


Index(['Argentina', 'Australia', 'Brazil', 'Britain', 'Canada', 'Chile',
       'China', 'Colombia', 'Costa Rica', 'Czech Republic',
       ...
       'Sri Lanka', 'Sweden', 'Switzerland', 'Taiwan', 'Thailand', 'Turkey',
       'UAE', 'Ukraine', 'United States', 'Uruguay'],
      dtype='object', name='Country', length=652)


In [9]:
# checking for inlusion
'Argentina' in country_level

True

In [12]:
# changing index label name in position order
bigmac_indexed.index.set_names(names = ['Day', 'Location'], inplace = True)
bigmac_indexed.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Day,Location,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14


In [13]:
# changing index label name explicitly (level also accepts index position)
bigmac_indexed.index.set_names(names = 'Date', level = 'Day', inplace = True)
bigmac_indexed.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Location,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14


In [23]:
# sorting at different levels
bigmac_indexed = bigmac_indexed.sort_index(ascending = [False, True]) # corresponds to index order
bigmac_indexed.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Location,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Austria,3.76
2016-01-01,Belgium,4.25
2016-01-01,Brazil,3.35


In [24]:
# sorting specified levels
bigmac_indexed = bigmac_indexed.sort_index(
    level = 'Date', # level also takes index position
    ascending = False
)

bigmac_indexed.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Location,Unnamed: 2_level_1
2016-01-01,Vietnam,2.67
2016-01-01,Venezuela,0.66
2016-01-01,Uruguay,3.74
2016-01-01,United States,4.93
2016-01-01,Ukraine,1.54


In [26]:
# sorting at different levels explicitly
bigmac_indexed = bigmac_indexed.sort_index(
    level = ['Date', 'Location'], # level also takes index position
    ascending = [False, True]
)

bigmac_indexed.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Location,Unnamed: 2_level_1
2016-01-01,Argentina,2.39
2016-01-01,Australia,3.74
2016-01-01,Austria,3.76
2016-01-01,Belgium,4.25
2016-01-01,Brazil,3.35


# Extracting Rows from MultiIndex DataFrames

In [33]:
# extracting specified grouping
bigmac_indexed.loc['2010-01-01'].head()

Unnamed: 0_level_0,Price in US Dollars
Location,Unnamed: 1_level_1
Argentina,1.84
Australia,3.98
Brazil,4.76
Britain,3.67
Canada,3.97


In [32]:
# extracting specified row explicitly
bigmac_indexed.loc[
    ('2010-01-01', 'Argentina'), # tuple
    'Price in US Dollars' # col name
]

1.84

In [38]:
# extracting row with iloc
# regardless of multi-index lables, each row has a specific index position
bigmac_indexed.iloc[0]

Price in US Dollars    2.39
Name: (2016-01-01 00:00:00, Argentina), dtype: float64

In [46]:
# extracting multiple rows with iloc
bigmac_indexed.iloc[[10, 20, 100, 120, 200, 220]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Location,Unnamed: 2_level_1
2016-01-01,Costa Rica,4.02
2016-01-01,Hong Kong,2.48
2015-07-01,Sri Lanka,2.61
2015-01-01,China,2.77
2014-07-01,New Zealand,4.94
2014-07-01,Ukraine,1.63


In [48]:
# extracting multiple rows with iloc (slicing)
bigmac_indexed.iloc[1:10]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price in US Dollars
Date,Location,Unnamed: 2_level_1
2016-01-01,Australia,3.74
2016-01-01,Austria,3.76
2016-01-01,Belgium,4.25
2016-01-01,Brazil,3.35
2016-01-01,Britain,4.22
2016-01-01,Canada,4.14
2016-01-01,Chile,2.94
2016-01-01,China,2.68
2016-01-01,Colombia,2.43


# Swapping Levels, Stacking, Unstacking, and Pivoting