In [1]:
import numpy as np
import pandas as pd

In [2]:
# can we have multiple index? Let's try
index_val = [('cse',2023),('cse',2024),('ece',2023),('ece',2024)]
a = pd.Series([1,2,3,4],index=index_val)
a

(cse, 2023)    1
(cse, 2024)    2
(ece, 2023)    3
(ece, 2024)    4
dtype: int64

In [3]:
a[('cse',2024)]

2

In [4]:
# problem
a['cse']

KeyError: 'cse'

In [None]:
# The solution -> multiindex series(also known as Hierarchical Indexing)
# multiple index levels within a single index

In [5]:
# how to create multiindex object
# 1. pd.multipleIndex.from_tuPle()
index_val = [('cse',2023),('cse',2024),('ece',2023),('ece',2024)]
multiindex = pd.MultiIndex.from_tuples(index_val)
multiindex.levels[1]
# 2. pd.MultiIndex.from_product()
pd.MultiIndex.from_product([['cse','ece'],[2023,2023]])

MultiIndex([('cse', 2023),
            ('cse', 2023),
            ('ece', 2023),
            ('ece', 2023)],
           )

In [None]:
# level inside multiindex object

In [6]:
# creating a series with multiindex object
s = pd.Series([1,2,3,4],index=multiindex)
s

cse  2023    1
     2024    2
ece  2023    3
     2024    4
dtype: int64

In [7]:
# how to fetch items from such a series
s[('cse',2023)]

1

In [8]:
s['cse']

2023    1
2024    2
dtype: int64

In [None]:
# a logical question to ask

In [9]:
# unstack
temp = s.unstack() # converted in DataFrame
temp

Unnamed: 0,2023,2024
cse,1,2
ece,3,4


In [10]:
# stack
temp.stack()

cse  2023    1
     2024    2
ece  2023    3
     2024    4
dtype: int64

In [None]:
# Then what was the point of multiindex series?

In [None]:
# multiindex dataframe

In [11]:
branch_df1 = pd.DataFrame(
    [
        [1,2],
        [3,4],
        [5,6],
        [7,8]
    ],
    index = multiindex,
    columns = ['avg_package','students']
)


branch_df1

Unnamed: 0,Unnamed: 1,avg_package,students
cse,2023,1,2
cse,2024,3,4
ece,2023,5,6
ece,2024,7,8


In [12]:
branch_df1['avg_package']

cse  2023    1
     2024    3
ece  2023    5
     2024    7
Name: avg_package, dtype: int64

In [None]:
# Are columns really different from index?

In [13]:
# multiple df from columns prespective
branch_df2 = pd.DataFrame(
    [
        [1,2,0,0],
        [3,4,0,0]
    ],
    index = [2023,2024],
    columns = pd.MultiIndex.from_product([['delhi','mumbai'],['avg_package','students']])
)

branch_df2

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,avg_package,students,avg_package,students
2023,1,2,0,0
2024,3,4,0,0


In [14]:
branch_df2['delhi']

Unnamed: 0,avg_package,students
2023,1,2
2024,3,4


In [15]:
branch_df2['delhi']['students']

2023    2
2024    4
Name: students, dtype: int64

Stacking and Unstaking

In [16]:
branch_df1

Unnamed: 0,Unnamed: 1,avg_package,students
cse,2023,1,2
cse,2024,3,4
ece,2023,5,6
ece,2024,7,8


In [20]:
branch_df1.unstack()

Unnamed: 0_level_0,avg_package,avg_package,students,students
Unnamed: 0_level_1,2023,2024,2023,2024
cse,1,3,2,4
ece,5,7,6,8


In [19]:
branch_df1.unstack().unstack()

avg_package  2023  cse    1
                   ece    5
             2024  cse    3
                   ece    7
students     2023  cse    2
                   ece    6
             2024  cse    4
                   ece    8
dtype: int64

In [21]:
branch_df1.unstack().stack()

Unnamed: 0,Unnamed: 1,avg_package,students
cse,2023,1,2
cse,2024,3,4
ece,2023,5,6
ece,2024,7,8


In [23]:
branch_df1.unstack().stack().stack()

cse  2023  avg_package    1
           students       2
     2024  avg_package    3
           students       4
ece  2023  avg_package    5
           students       6
     2024  avg_package    7
           students       8
dtype: int64

In [26]:
branch_df3 = pd.DataFrame(
    [
        [1,2,0,0],
        [3,4,0,0],
        [5,6,0,0],
        [7,8,0,0],

    ],
    index = multiindex,
    columns = pd.MultiIndex.from_product([['delhi','mumbai'],['avg_package','students']])
)

branch_df3

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,avg_package,students,avg_package,students
cse,2023,1,2,0,0
cse,2024,3,4,0,0
ece,2023,5,6,0,0
ece,2024,7,8,0,0


Working With Multiple Index DataFrame

In [27]:
# head and tail
branch_df3.head()
# shape
branch_df3.shape
# info
branch_df3.info()
# duplicate -> isnull
branch_df3.duplicated()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4 entries, ('cse', 2023) to ('ece', 2024)
Data columns (total 4 columns):
 #   Column                 Non-Null Count  Dtype
---  ------                 --------------  -----
 0   (delhi, avg_package)   4 non-null      int64
 1   (delhi, students)      4 non-null      int64
 2   (mumbai, avg_package)  4 non-null      int64
 3   (mumbai, students)     4 non-null      int64
dtypes: int64(4)
memory usage: 588.0+ bytes


cse  2023    False
     2024    False
ece  2023    False
     2024    False
dtype: bool

In [29]:
# Extracting rows single
branch_df3.loc[('cse',2023)]

delhi   avg_package    1
        students       2
mumbai  avg_package    0
        students       0
Name: (cse, 2023), dtype: int64

In [30]:
# multiple
branch_df3.loc[('cse',2023):('ece',2024):2]

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,avg_package,students,avg_package,students
cse,2023,1,2,0,0
ece,2023,5,6,0,0


In [33]:
# Extracting cols
branch_df3['delhi']['students']

cse  2023    2
     2024    4
ece  2023    6
     2024    8
Name: students, dtype: int64

In [34]:
branch_df3.iloc[:,1:3]

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,students,avg_package
cse,2023,2,0
cse,2024,4,0
ece,2023,6,0
ece,2024,8,0


In [35]:
branch_df3.iloc[[1,2],[1,2]]

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,students,avg_package
cse,2024,4,0
ece,2023,6,0


In [36]:
# sort index
# both -> decending -> diff order
# based on one level
branch_df3.sort_index(ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,avg_package,students,avg_package,students
ece,2024,7,8,0,0
ece,2023,5,6,0,0
cse,2024,3,4,0,0
cse,2023,1,2,0,0


In [37]:
branch_df3.sort_index(ascending=[False,True])

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,avg_package,students,avg_package,students
ece,2023,5,6,0,0
ece,2024,7,8,0,0
cse,2023,1,2,0,0
cse,2024,3,4,0,0


In [38]:
# sort year
branch_df3.sort_index(level=1,ascending=[False])

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,avg_package,students,avg_package,students
cse,2024,3,4,0,0
ece,2024,7,8,0,0
cse,2023,1,2,0,0
ece,2023,5,6,0,0


In [39]:
# multiple dataframe(col) -> transpose
branch_df3.transpose()

Unnamed: 0_level_0,Unnamed: 1_level_0,cse,cse,ece,ece
Unnamed: 0_level_1,Unnamed: 1_level_1,2023,2024,2023,2024
delhi,avg_package,1,3,5,7
delhi,students,2,4,6,8
mumbai,avg_package,0,0,0,0
mumbai,students,0,0,0,0


In [40]:
# swaplevel
branch_df3.swaplevel()

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,avg_package,students,avg_package,students
2023,cse,1,2,0,0
2024,cse,3,4,0,0
2023,ece,5,6,0,0
2024,ece,7,8,0,0


In [41]:
branch_df3.swaplevel(axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,avg_package,students,avg_package,students
Unnamed: 0_level_1,Unnamed: 1_level_1,delhi,delhi,mumbai,mumbai
cse,2023,1,2,0,0
cse,2024,3,4,0,0
ece,2023,5,6,0,0
ece,2024,7,8,0,0
