In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [2]:
df1 = pd.read_csv("time_series_covid19_confirmed_global.csv")
df2 = pd.read_csv("time_series_covid19_deaths_global.csv")

In [3]:
index_value = [("cse",2019),("cse",2020),("cse",2021),("cse",2022),("ece",2019),("ece",2020),("ece",2021),("ece",2022)]
a =pd.Series([1,2,3,4,5,6,7,8],index=index_value)

This was not a good way of making multi index

### Proper way to create a multiindex

In [4]:
index_value = [("cse",2019),("cse",2020),("cse",2021),("cse",2022),("ece",2019),("ece",2020),("ece",2021),("ece",2022)]
multiindex = pd.MultiIndex.from_tuples(index_value)
multiindex.levels

FrozenList([['cse', 'ece'], [2019, 2020, 2021, 2022]])

In [5]:
multiindex.levels[0]

Index(['cse', 'ece'], dtype='object')

In [6]:
multiindex.levels[1]

Index([2019, 2020, 2021, 2022], dtype='int64')

In [7]:
## pd.MultiIndex.from_product
## it basically works on cartesian product
pd.MultiIndex.from_product([['cse', 'ece'], [2019, 2020, 2021, 2022]])

MultiIndex([('cse', 2019),
            ('cse', 2020),
            ('cse', 2021),
            ('cse', 2022),
            ('ece', 2019),
            ('ece', 2020),
            ('ece', 2021),
            ('ece', 2022)],
           )

In [8]:
# creating a series with multiindex object
# hierarchial architecture
s = pd.Series([1,2,3,4,5,6,7,8],index=multiindex)
s

cse  2019    1
     2020    2
     2021    3
     2022    4
ece  2019    5
     2020    6
     2021    7
     2022    8
dtype: int64

In [9]:
# how to fetch items from such a series
s["cse"]

2019    1
2020    2
2021    3
2022    4
dtype: int64

In [10]:
# Unstack -- converts the multindex series inton a dataframe
temp_df = s.unstack()
temp_df

Unnamed: 0,2019,2020,2021,2022
cse,1,2,3,4
ece,5,6,7,8


In [11]:
# stack -- changes it back to multiindex series
temp_df.stack()

cse  2019    1
     2020    2
     2021    3
     2022    4
ece  2019    5
     2020    6
     2021    7
     2022    8
dtype: int64

### Multiindex DataFrame

#### 3d data ---> 2d Data

In [12]:
branch_Df1 = pd.DataFrame([
    [1,2],
    [3,4],
    [5,6],
    [7,8,],
    [9,10],
    [11,12],
    [13,14],
    [15,16]
],index=multiindex,columns=["Avg package","Students"])
branch_Df1

Unnamed: 0,Unnamed: 1,Avg package,Students
cse,2019,1,2
cse,2020,3,4
cse,2021,5,6
cse,2022,7,8
ece,2019,9,10
ece,2020,11,12
ece,2021,13,14
ece,2022,15,16


In [13]:
branch_Df1.loc["cse"]

Unnamed: 0,Avg package,Students
2019,1,2
2020,3,4
2021,5,6
2022,7,8


In [14]:
branch_Df1.loc["ece"]

Unnamed: 0,Avg package,Students
2019,9,10
2020,11,12
2021,13,14
2022,15,16


In [15]:
branch_Df1["Avg package"]

cse  2019     1
     2020     3
     2021     5
     2022     7
ece  2019     9
     2020    11
     2021    13
     2022    15
Name: Avg package, dtype: int64

In [16]:
branch_Df1.T

Unnamed: 0_level_0,cse,cse,cse,cse,ece,ece,ece,ece
Unnamed: 0_level_1,2019,2020,2021,2022,2019,2020,2021,2022
Avg package,1,3,5,7,9,11,13,15
Students,2,4,6,8,10,12,14,16


### Multiindex based on columns

In [17]:
branch_Df1 = pd.DataFrame([
    [1,2,3,4],
    [5,6,7,8],
    [9,10,11,12],
    [13,14,15,16]
],index=[2019,2020,2021,2022],columns=pd.MultiIndex.from_product([["delhi","mumbai"],["Avg Package","Students"]]))
branch_Df1

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Avg Package,Students,Avg Package,Students
2019,1,2,3,4
2020,5,6,7,8
2021,9,10,11,12
2022,13,14,15,16


In [18]:
branch_Df1["delhi"]["Avg Package"][2022]

13

In [19]:
branch_Df1.loc[[2019]]

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Avg Package,Students,Avg Package,Students
2019,1,2,3,4


In [20]:
multiindex = pd.MultiIndex.from_product([["cse","ece"],[2019,2020,2021,2022]])
multiindex

MultiIndex([('cse', 2019),
            ('cse', 2020),
            ('cse', 2021),
            ('cse', 2022),
            ('ece', 2019),
            ('ece', 2020),
            ('ece', 2021),
            ('ece', 2022)],
           )

In [21]:
branch_Df1 = pd.DataFrame([
    [1,2,3,4],
    [5,6,7,8],
    [9,10,11,12],
    [13,14,15,16],
    [17,18,19,20],
    [21,22,23,24],
    [26,27,28,29],
    [30,31,32,33]
],index=multiindex,columns=pd.MultiIndex.from_product([["delhi","mumbai"],["Avg Package","Students"]]))
branch_Df1

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
cse,2019,1,2,3,4
cse,2020,5,6,7,8
cse,2021,9,10,11,12
cse,2022,13,14,15,16
ece,2019,17,18,19,20
ece,2020,21,22,23,24
ece,2021,26,27,28,29
ece,2022,30,31,32,33


### Stacking and Unstacking

In [22]:
branch_Df1

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
cse,2019,1,2,3,4
cse,2020,5,6,7,8
cse,2021,9,10,11,12
cse,2022,13,14,15,16
ece,2019,17,18,19,20
ece,2020,21,22,23,24
ece,2021,26,27,28,29
ece,2022,30,31,32,33


##### unstack basically moves a row into a column into the deeper level like in this case we had two levels and applying unstack mode the years row to the third level

In [23]:
branch_Df1.unstack()

Unnamed: 0_level_0,delhi,delhi,delhi,delhi,delhi,delhi,delhi,delhi,mumbai,mumbai,mumbai,mumbai,mumbai,mumbai,mumbai,mumbai
Unnamed: 0_level_1,Avg Package,Avg Package,Avg Package,Avg Package,Students,Students,Students,Students,Avg Package,Avg Package,Avg Package,Avg Package,Students,Students,Students,Students
Unnamed: 0_level_2,2019,2020,2021,2022,2019,2020,2021,2022,2019,2020,2021,2022,2019,2020,2021,2022
cse,1,5,9,13,2,6,10,14,3,7,11,15,4,8,12,16
ece,17,21,26,30,18,22,27,31,19,23,28,32,20,24,29,33


In [24]:
pd.DataFrame(branch_Df1.unstack().unstack())

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,0
delhi,Avg Package,2019,cse,1
delhi,Avg Package,2019,ece,17
delhi,Avg Package,2020,cse,5
delhi,Avg Package,2020,ece,21
delhi,Avg Package,2021,cse,9
delhi,Avg Package,2021,ece,26
delhi,Avg Package,2022,cse,13
delhi,Avg Package,2022,ece,30
delhi,Students,2019,cse,2
delhi,Students,2019,ece,18


In [25]:
pd.DataFrame(branch_Df1.unstack().stack())

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
cse,2019,1,2,3,4
cse,2020,5,6,7,8
cse,2021,9,10,11,12
cse,2022,13,14,15,16
ece,2019,17,18,19,20
ece,2020,21,22,23,24
ece,2021,26,27,28,29
ece,2022,30,31,32,33


##### stack moves the deeper level of columns into the deeper level if rows in this case changing the third level of column into 2 level of row

In [26]:
pd.DataFrame(branch_Df1.unstack().stack().stack())

Unnamed: 0,Unnamed: 1,Unnamed: 2,delhi,mumbai
cse,2019,Avg Package,1,3
cse,2019,Students,2,4
cse,2020,Avg Package,5,7
cse,2020,Students,6,8
cse,2021,Avg Package,9,11
cse,2021,Students,10,12
cse,2022,Avg Package,13,15
cse,2022,Students,14,16
ece,2019,Avg Package,17,19
ece,2019,Students,18,20


#### 1. **Stack** --> Higher Dimension to Lower Dimension
#### 2. **Untack** --> Lower Dimension to Higher Dimension

In [27]:
branch_Df1.unstack().info()

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, cse to ece
Data columns (total 16 columns):
 #   Column                       Non-Null Count  Dtype
---  ------                       --------------  -----
 0   (delhi, Avg Package, 2019)   2 non-null      int64
 1   (delhi, Avg Package, 2020)   2 non-null      int64
 2   (delhi, Avg Package, 2021)   2 non-null      int64
 3   (delhi, Avg Package, 2022)   2 non-null      int64
 4   (delhi, Students, 2019)      2 non-null      int64
 5   (delhi, Students, 2020)      2 non-null      int64
 6   (delhi, Students, 2021)      2 non-null      int64
 7   (delhi, Students, 2022)      2 non-null      int64
 8   (mumbai, Avg Package, 2019)  2 non-null      int64
 9   (mumbai, Avg Package, 2020)  2 non-null      int64
 10  (mumbai, Avg Package, 2021)  2 non-null      int64
 11  (mumbai, Avg Package, 2022)  2 non-null      int64
 12  (mumbai, Students, 2019)     2 non-null      int64
 13  (mumbai, Students, 2020)     2 non-null      int64
 14 

In [28]:
branch_Df1.isnull()

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
cse,2019,False,False,False,False
cse,2020,False,False,False,False
cse,2021,False,False,False,False
cse,2022,False,False,False,False
ece,2019,False,False,False,False
ece,2020,False,False,False,False
ece,2021,False,False,False,False
ece,2022,False,False,False,False


### extracting rows single

In [29]:
branch_Df1.loc[("cse",2022)]

delhi   Avg Package    13
        Students       14
mumbai  Avg Package    15
        Students       16
Name: (cse, 2022), dtype: int64

In [34]:
branch_Df1.loc[("cse",2019):("ece",2019):2] ## skip 2

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
cse,2019,1,2,3,4
cse,2021,9,10,11,12
ece,2019,17,18,19,20


In [38]:
branch_Df1.iloc[[1,2,3]]

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
cse,2020,5,6,7,8
cse,2021,9,10,11,12
cse,2022,13,14,15,16


In [40]:
branch_Df1.iloc[0:5:2]

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
cse,2019,1,2,3,4
cse,2021,9,10,11,12
ece,2019,17,18,19,20


In [41]:
branch_Df1['delhi']

Unnamed: 0,Unnamed: 1,Avg Package,Students
cse,2019,1,2
cse,2020,5,6
cse,2021,9,10
cse,2022,13,14
ece,2019,17,18
ece,2020,21,22
ece,2021,26,27
ece,2022,30,31


In [45]:
branch_Df1

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
cse,2019,1,2,3,4
cse,2020,5,6,7,8
cse,2021,9,10,11,12
cse,2022,13,14,15,16
ece,2019,17,18,19,20
ece,2020,21,22,23,24
ece,2021,26,27,28,29
ece,2022,30,31,32,33


In [48]:
branch_Df1.iloc[:,1:4]

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Students,Avg Package,Students
cse,2019,2,3,4
cse,2020,6,7,8
cse,2021,10,11,12
cse,2022,14,15,16
ece,2019,18,19,20
ece,2020,22,23,24
ece,2021,27,28,29
ece,2022,31,32,33


In [61]:
### Extracting Both
branch_Df1.iloc[[0,4],[1,2]]

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Students,Avg Package
cse,2019,2,3
ece,2019,18,19


### Sorting Index

In [64]:
branch_Df1.sort_index(ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
ece,2022,30,31,32,33
ece,2021,26,27,28,29
ece,2020,21,22,23,24
ece,2019,17,18,19,20
cse,2022,13,14,15,16
cse,2021,9,10,11,12
cse,2020,5,6,7,8
cse,2019,1,2,3,4


In [66]:
branch_Df1.sort_index(ascending=[False,True]) ## sorting for cse but not for ece

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
ece,2019,17,18,19,20
ece,2020,21,22,23,24
ece,2021,26,27,28,29
ece,2022,30,31,32,33
cse,2019,1,2,3,4
cse,2020,5,6,7,8
cse,2021,9,10,11,12
cse,2022,13,14,15,16


In [68]:
branch_Df1.sort_index(level=1,ascending=False) #sorting only on year

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
ece,2022,30,31,32,33
cse,2022,13,14,15,16
ece,2021,26,27,28,29
cse,2021,9,10,11,12
ece,2020,21,22,23,24
cse,2020,5,6,7,8
ece,2019,17,18,19,20
cse,2019,1,2,3,4


In [69]:
# Transpose
branch_Df1.T

Unnamed: 0_level_0,Unnamed: 1_level_0,cse,cse,cse,cse,ece,ece,ece,ece
Unnamed: 0_level_1,Unnamed: 1_level_1,2019,2020,2021,2022,2019,2020,2021,2022
delhi,Avg Package,1,5,9,13,17,21,26,30
delhi,Students,2,6,10,14,18,22,27,31
mumbai,Avg Package,3,7,11,15,19,23,28,32
mumbai,Students,4,8,12,16,20,24,29,33


## swaplevel

In [72]:

branch_Df1.swaplevel() #swaps the rows 

Unnamed: 0_level_0,Unnamed: 1_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,Unnamed: 1_level_1,Avg Package,Students,Avg Package,Students
2019,cse,1,2,3,4
2020,cse,5,6,7,8
2021,cse,9,10,11,12
2022,cse,13,14,15,16
2019,ece,17,18,19,20
2020,ece,21,22,23,24
2021,ece,26,27,28,29
2022,ece,30,31,32,33


In [74]:
branch_Df1.swaplevel(axis=1) #swaps the columns

Unnamed: 0_level_0,Unnamed: 1_level_0,Avg Package,Students,Avg Package,Students
Unnamed: 0_level_1,Unnamed: 1_level_1,delhi,delhi,mumbai,mumbai
cse,2019,1,2,3,4
cse,2020,5,6,7,8
cse,2021,9,10,11,12
cse,2022,13,14,15,16
ece,2019,17,18,19,20
ece,2020,21,22,23,24
ece,2021,26,27,28,29
ece,2022,30,31,32,33
