## Multi Index DataFrame

In [1]:
import numpy as np
import pandas as pd

`**Series are 1D**` data and only need index to fetch the information while `**DataFrames are 2D**` and will need index along with the column name to fetch the data.

In [2]:
index_val = [
    ("BCT", 2021),
    ("BCE", 2021),
    ("BCT", 2020),
    ("BEI", 2022)
]

In [3]:
a = pd.Series([10, 20, 30, 40], index=index_val, name="batch_students")

In [4]:
a

(BCT, 2021)    10
(BCE, 2021)    20
(BCT, 2020)    30
(BEI, 2022)    40
Name: batch_students, dtype: int64

In [5]:
# trying to fetch all BCT
# a["BCT"] # error solution is Hierarchial Indexing

In [6]:
a = pd.MultiIndex.from_tuples(index_val)
a

MultiIndex([('BCT', 2021),
            ('BCE', 2021),
            ('BCT', 2020),
            ('BEI', 2022)],
           )

In [11]:
a.levels

FrozenList([['BCE', 'BCT', 'BEI'], [2020, 2021, 2022]])

In [12]:
a.levels[0]

Index(['BCE', 'BCT', 'BEI'], dtype='object')

In [13]:
b = pd.MultiIndex.from_product([["BCE", "BCT", "BEI"],[2020, 2021, 2022]])

In [14]:
# Creating a multiIndex series
multi_index = pd.MultiIndex.from_tuples(index_val)
m_series = pd.Series([10, 20, 30, 40], index=multi_index)

In [15]:
m_series

BCT  2021    10
BCE  2021    20
BCT  2020    30
BEI  2022    40
dtype: int64

In [16]:
m_series["BCT"]

2021    10
2020    30
dtype: int64

In [17]:
m_series[("BCE", 2021)]

20

#### Unstacking and stacking
Convert from multi index series to dataframe and reverse

In [18]:
m_series.unstack()

Unnamed: 0,2020,2021,2022
BCE,,20.0,
BCT,30.0,10.0,
BEI,,,40.0


#### Multi Index Dataframe

In [20]:
multi_index

MultiIndex([('BCT', 2021),
            ('BCE', 2021),
            ('BCT', 2020),
            ('BEI', 2022)],
           )

In [22]:
df_m = pd.DataFrame([[1,10],[2,20],[1,30],[6, 90]], index=multi_index, columns=["avg_package", "students"])

In [23]:
df_m

Unnamed: 0,Unnamed: 1,avg_package,students
BCT,2021,1,10
BCE,2021,2,20
BCT,2020,1,30
BEI,2022,6,90


In [24]:
df_m.shape

(4, 2)

In [25]:
df_m.loc["BCE"]

Unnamed: 0,avg_package,students
2021,2,20


In [26]:
df_m.loc["BCT"]

Unnamed: 0,avg_package,students
2021,1,10
2020,1,30


In [27]:
df_m.loc[("BEI", 2022)]

avg_package     6
students       90
Name: (BEI, 2022), dtype: int64