# The `MultiIndex` object

__Methods Covered__

* `.get_level_values()`
* `.set_names()` 
* `.sort_index()` 
* `.loc()` 
* `.transpose()` 
* `.stack()` 
* `.unstack()` 
* `.pivot()` 
* `.pivot_table()` 
* `.melt()` 


In [None]:
import pandas as pd

In [None]:
pd.read_csv('data/bigmac.csv').head()

In [None]:
pd.read_csv('data/bigmac.csv', parse_dates = ['Date']).head()

In [None]:
bigmac = pd.read_csv('data/bigmac.csv', parse_dates = ['Date'])
bigmac.head()

In [None]:
bigmac.set_index(keys = 'Date').head()

In [None]:
bigmac.set_index(keys = 'Country').head()

In [None]:
bigmac.set_index(keys = ['Date', 'Country'])
# see how each date has all countries

In [None]:
bigmac.set_index(keys = ['Date', 'Country'], inplace = True)
bigmac.head()

In [None]:
bigmac.sort_index() # sorts both indexes, in ascending order

In [None]:
bigmac.index

In [None]:
bigmac.index.names

In [None]:
type(bigmac)

In [None]:
bigmac.index[0]

## The `.get_level_values()` Method

In [None]:
bigmac = pd.read_csv('data/bigmac.csv', parse_dates = ['Date'])
bigmac.head(3)

In [None]:
# alternate method for setting multiIndex
bigmac = pd.read_csv('data/bigmac.csv', parse_dates = ['Date'], index_col = ['Date','Country'])
bigmac.sort_index(inplace = True) # sorting greatly speeds up large datasets
bigmac.head(3)

In [None]:
# bigmac.index.get_level_values(0)
# bigmac.index.get_level_values('Date')

# bigmac.index.get_level_values(1)
bigmac.index.get_level_values('Country')

## The `.set_names()` Method on MultiIndex

In [None]:
bigmac = pd.read_csv('data/bigmac.csv', parse_dates = ['Date'], index_col = ['Date','Country'])
bigmac.sort_index(inplace = True)
bigmac.head(3)

In [None]:
bigmac.index.names

In [None]:
bigmac.index.set_names(['Day','Location'], inplace = True)
bigmac.index.names

In [None]:
bigmac.head()

## The `.sort_index()` on MultiIndex dataframe

In [None]:
bigmac = pd.read_csv('data/bigmac.csv', parse_dates = ['Date'], index_col = ['Date','Country'])
bigmac.sort_index(inplace = True)
bigmac.head(3)

In [None]:
bigmac.sort_index(ascending = [False, True], inplace = True)

In [None]:
bigmac.head()

## Extract rows with the `.loc()` Method from a MultiIndex DataFrame

In [None]:
bigmac = pd.read_csv('data/bigmac.csv', parse_dates = ['Date'], index_col = ['Date','Country'])
bigmac.sort_index(inplace = True)
bigmac.head(3)

In [None]:
# needto use a Tuple instead of a list - imutable. A Tuple uses parens instead of square brackets
bigmac.loc[('2010-01-01','Brazil')]

In [None]:
bigmac.loc[('2010-01-01','Brazil'),'Price in US Dollars']

In [None]:
bigmac.loc[('2015-07-01','Chile'), 'Price in US Dollars']

In [None]:
bigmac.ix[('2016-01-01','Argentina'), 0]

## The `.transpose()` Method 

In [None]:
bigmac = pd.read_csv('data/bigmac.csv', parse_dates = ['Date'], index_col = ['Date','Country'])
bigmac.sort_index(inplace = True)
bigmac.head(3)

In [None]:
bigmac = bigmac.transpose()
bigmac

In [None]:
bigmac = bigmac.transpose()
bigmac.head()

In [None]:
bigmac = bigmac.swaplevel()
bigmac.head()

### reverse the fields

In [None]:
bigmac = pd.read_csv('data/bigmac.csv', parse_dates = ['Date'], index_col = ['Country','Date'])
bigmac.sort_index(inplace = True)
bigmac.head(20)

## The `.stack()` Method

In [None]:
world = pd.read_csv('data/worldstats.csv', index_col=['country','year'])
world.head()

In [None]:
# .stack() combines the columns and moves them into rows within a dictionary object
world.stack()   # increases rows, decreases cols. Turns it into a Series. with two indexes and a dictionary object

In [None]:
world.stack().to_frame()

## The `.unstack()` Method 

In [None]:
world = pd.read_csv('data/worldstats.csv', index_col=['country','year'])
world.head()

In [None]:
s = world.stack()
s.head()

In [None]:
s.unstack()

In [None]:
s.unstack().unstack()

In [None]:
s.unstack().unstack().unstack()

In [None]:
world = pd.read_csv('data/worldstats.csv', index_col=['country','year'])
world.head()

In [None]:
s = world.stack()

In [None]:
s.head()

In [None]:
s.unstack()

In [None]:
s.unstack(0)

In [None]:
s.unstack(1)

In [None]:
s.unstack(2)

In [None]:
s.unstack(-1) # same as 2 

In [None]:
s.unstack('year')

In [None]:
s.unstack('country')

### usntack by multiple values

In [None]:
world = pd.read_csv('data/worldstats.csv', index_col=['country','year'])
s = world.stack()
s.head()

In [None]:
s.unstack(level = [1,0])

In [None]:
s.unstack(level = [0,1])

In [None]:
s.unstack(['country','year'])

In [None]:
# fill_value parameter
s.unstack('year', fill_value = 0)

## The `.pivot()` Method

In [None]:
sales = pd.read_csv('data/salesmen.csv', parse_dates=['Date'])
sales.head()

In [None]:
# note that 'Salesman' column has a ton of repeat values so prolly a candidate for 'category' type
sales['Salesman'].value_counts()

In [None]:
sales['Salesman'] = sales['Salesman'].astype('category')

In [None]:
sales.head()

In [None]:
sales.pivot(index = 'Date', columns='Salesman', values = 'Revenue')

In [None]:
sales.pivot(index = 'Date', columns='Salesman', values = 'Revenue').describe()

## The `.pivot_table()` Method

In [None]:
foods = pd.read_csv('data/foods.csv')
foods.head()

In [None]:
foods.describe()

In [None]:
foods.pivot_table(values = "Spend", index='Gender', aggfunc = 'mean')

In [None]:
foods.pivot_table(values = "Spend", index='Gender', aggfunc = 'sum')

In [None]:
foods.pivot_table(values = "Spend", index='Item', aggfunc = 'sum')

In [None]:
foods.pivot_table(values = "Spend", index=['Gender',"Item"], aggfunc = 'sum')

In [None]:
foods.pivot_table(values = "Spend", index=['Gender',"Item"], aggfunc = 'sum', columns=['Frequency','City'])

In [None]:
foods.pivot_table(values = "Spend", index=['Gender',"Item"], aggfunc = 'sum', columns='City')

In [None]:
foods.pivot_table(values = "Spend", index=['Gender',"Item"], aggfunc = 'max', columns='City',)

In [None]:
# optionally can call pivot_table() on pandas library directly
pd.pivot_table(data = foods, values = 'Spend', index=['Gender',"Item"], aggfunc = 'sum', columns='City')


## The `.melt()` Method

In [None]:
sales = pd.read_csv('data/quarters.csv')
sales

In [None]:
pd.melt(sales, id_vars = 'Salesman')

In [None]:
pd.melt(sales, id_vars = 'Salesman', var_name='Quarter')

In [None]:
pd.melt(sales, id_vars = 'Salesman', var_name='Quarter', value_name = 'Revenue')

### code below used to break out table from column of Cruncbase search results


In [None]:
import pandas as pd

In [None]:
supco = pd.read_csv('top-competing-brands.csv', index_col='No')

In [None]:
# supco.drop('No', axis=1, inplace=True)

In [None]:
supco.head(12)

In [None]:
supco = supco.pivot(index='row', columns='col', values='value')

In [None]:
supco.tail()

In [None]:
supco.head()

In [None]:
supco.to_csv('supco.csv',)