<center><font size=6 color="#00416d">Multi Index</font></center>

In [None]:
import pandas as pd

### Performing initial data processing

In [None]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=["Date"])
bigmac.head()

In [None]:
# Data Type of each column
bigmac.dtypes

In [None]:
# Basic info about dataframe
bigmac.info()

In [None]:
bigmac['Country'] = bigmac['Country'].astype('category')

In [None]:
bigmac.info()

### Creating multi index on DataFrame

In [None]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=['Date'], dtype={'Country':'category'})
bigmac.head()

In [None]:
bigmac.nunique()

In [None]:
# Setting multi index using set_index method
# It is good practice to have least number of unique values column in the outer most loop.
# In our case Date has least number of unique values
# We can set multiple index's like this
bigmac.set_index(['Date', 'Country'], inplace=True)

In [None]:
# Sorting index
# First it will sort the 'Date' index and then 'Country' index
bigmac.sort_index()

In [None]:
# It list of tuples as index's
bigmac.index[[0, 1]]

In [None]:
type(bigmac.index)

### df.index.get_level_values()

To get index column values

In [None]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=['Date'], dtype={'Country':'category'}, index_col=['Date', 'Country'])
bigmac.head()

In [None]:
bigmac.index.get_level_values('Date')
bigmac.index.get_level_values(0)

In [None]:
bigmac.index.get_level_values('Country')
bigmac.index.get_level_values(1)

### df.set_name()
To change name of the index

In [None]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=['Date'], dtype={'Country':'category'}, index_col=['Date', 'Country'])
bigmac.head()

In [None]:
# Changing index names
bigmac.index.set_names(['Day', 'Location'], inplace=True)
bigmac.head()

In [None]:
# if want to change specific index name,
bigmac.index.set_names('Country', level=1)
# which is simlar to
bigmac.index.set_names('Country', level="Location")

### sort_index() method on MultiIndex DataFrame

In [None]:
df = pd.read_csv("bigmac.csv", parse_dates=['Date'])
df.set_index(['Date', 'Country'], inplace=True)
df

In [None]:
# If we want to sort all index then
df.sort_index()
# To sort in decending order
df.sort_index(ascending=True)

In [None]:
# To target individual column separately
df.sort_index(ascending=[True, False])

In [None]:
# To taget once level at once
df.sort_index(level=0)

In [None]:
df.sort_index(level=1, ascending=0)

### Extracting in multi index DataFrame

we can use loc and iloc methods to extract data <br>
<b>NOTE:</b> <br>
Make sure you data sorted before or you will get below error <br>
UnsortedIndexError: 'MultiIndex slicing requires the index to be lexsorted: slicing on levels [0], lexsort depth 0'

In [None]:
bigmac = pd.read_csv("bigmac.csv", parse_dates=['Date'], index_col=['Date', 'Country']).sort_index()
bigmac.head()

In [None]:
bigmac.loc["2016-01-01", "Argentina"]

In [None]:
# The problem with above approch is we usaully pass column as second argument like below
bigmac.loc["2016-01-01", "Price in US Dollars"]

In [None]:
# To deal with it you pass multiple index in tuple
bigmac.loc[("2016-01-01", "Argentina"), ["Price in US Dollars", "Price in US Dollars"]]

In [None]:
bigmac.iloc[1:4,]

### .transpose()
Interchanges the column and rows

In [None]:
df = pd.read_csv("bigmac.csv", parse_dates=['Date'], index_col=['Date', 'Country']).sort_index()
df.head()

In [None]:
df = df.transpose()
df

In [None]:
# Accessing column
df["2016-01-01"]
df[("2016-01-01", "Australia")]

In [None]:
# Accessing rows with some perticular column
df.loc["Price in US Dollars", "2016-01-01"]

In [None]:
df.loc["Price in US Dollars", "2010-01-01":"2016-01-01"]

In [None]:
df.loc["Price in US Dollars", ("2010-01-01", "Brazil"):("2010-01-01", "Canada")]

### .swaplevel() method
It is used to swap the level of index's

In [None]:
df = pd.read_csv("bigmac.csv", parse_dates=['Date'])
df.set_index(['Date', 'Country', "Price in US Dollars"], inplace=True, drop=False)
df.head()

In [None]:
df.swaplevel("Price in US Dollars", "Date")

### .stack() method
<strong>Documentation:</strong><a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.stack.html">df.stack()</a>
<p><strong>Defination:</strong>Stack the prescribed level(s) from columns to index.</p>

In [None]:
world_stats = pd.read_csv("worldstats.csv", index_col=["country", "year"])
world_stats.head()

In [None]:
world_stats.stack()

In [None]:
world_stats.stack().to_frame()

### .unstack() method
It is just opposite to stack() method

In [None]:
world_stats = pd.read_csv("worldstats.csv", index_col=["country", "year"])
world_stats.head()

In [None]:
s = world_stats.stack().to_frame()
s.head(3)

In [None]:
# Makes inner most index as column name
s.unstack()

In [None]:
# Since unstack returns DF, we can again also call unstack method
s.unstack().unstack()

In [None]:
# If we want specific index get unstacked, we can do like below
# s.unstack("<column_name>/posion_number")
s.unstack("year")
# above statement equals to
s.unstack(1)
# or
s.unstack(-2)

In [None]:
# if we want multiple index to get unstack
s.unstack(["country", "year"])

In [None]:
# Let's say, if we have any NaN values we can fill them using fill_value parameter
s.unstack("year", fill_value=0)

###  .pivot() method
Return reshaped DataFrame organized by given index / column values.

In [None]:
sales_men = pd.read_csv("salesmen.csv", parse_dates = ['Date'], dtype={'Salesman': 'category'})
sales_men.head()

In [None]:
sales_men.info()

In [None]:
sales_men.pivot(index="Date", columns="Salesman", values="Revenue")

### .pivot_table()

<strong>Syntax:</strong>df.pivot_table(values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All', observed=False, sort=True) <br>
<strong>Definition:</strong>Create a spreadsheet-style pivot table as a DataFrame.
The levels in the pivot table will be stored in MultiIndex objects (hierarchical indexes) on the index and columns of the result DataFrame.

In [None]:
foods = pd.read_csv("foods.csv")
foods.head()

In [None]:
# Find mean value of how much Female and Male are spending
foods.pivot_table(values="Spend", index="Gender")

In [None]:
# Finding sum of amount both Female and Male spent on Items
foods.pivot_table(values="Spend", index=["Gender", "Item"], aggfunc="sum")

In [None]:
# Finding sum of amount both Female and Male spent on Items on each city
foods.pivot_table(values="Spend", index=["Gender", "Item"], columns="City", aggfunc="sum")

In [None]:
# Finding sum of amount both Female and Male spent on Items on each city on weekly and Monthly
foods.pivot_table(values="Spend", index=["Gender", "Item"], columns=["Frequency", "City"], aggfunc="sum")[["Weekly","Monthly"]]

#### Commonly passed values to the aggfunc parameter
count, max, min, avg etc..m

### pd.melt()

In [None]:
sales = pd.read_csv("quarters.csv")
sales.head()

In [None]:
pd.melt(sales, id_vars="Salesman", var_name="Quater", value_name="Revenue")

In [None]:
pd.melt(sales, id_vars="Salesman", value_vars=['Q1', 'Q2'], var_name="Quater", value_name="Revenue")

### Synopsis

<table style="margin-left: 0;">
  <tr style="text-align:center;">
    <th>Implementation</th>
    <th>Description</th>
  </tr>
  <tr>
      <td>DataFrame.set_index(["column1", "column2",..,]) or <br>
          pd.read_csv(index_cols=["column1", "column2", ..])</td>
      <td>To create multi index DataFrame</td>
  </tr>
  <tr>
      <td>df.index.get_level_value("index_column_name")</td>
      <td>We get list of all index values</td>
  </tr>
  <tr>
      <td>df.index.set_names('new_index_col_name', level="old_index_col_name")</td>
      <td>To change the name of index column</td>
  </tr>
  <tr>
      <td>df.sort_index(level=value, ascending=[boolean, boolean])</td>
      <td>If you don't pass any parameter then it will sort the all level index's, if you want to sort specific level then pass value to the level parameter</td>
  </tr>
  <tr>
      <td>df.loc[] and df.iloc[]</td>
      <td>are used to extract data from multi index table</td>
  </tr>
  <tr>
      <td>df.swaplevel("Column1", "Column2")</td>
      <td>Swaps the positions of column1 and column2</td>
  </tr>
  <tr>
      <td>df.stack()</td>
      <td>Stack bring the columns in rows(refer example)</td>
  </tr>
  <tr>
      <td>df.unstack()</td>
      <td>It convert row names as columns(refer example)</td>
  </tr>
  <tr>
      <td>df.pivot() and df.pivot_table()</td>
      <td>Check example for better understanding</td>
  </tr>
  <tr>
      <td>df.melt()</td>
      <td>Converts columns in to a row</td>
  </tr>
</table>