In [1]:
import pandas as pd
import numpy as np

In [2]:
np.random.seed(101)
df = pd.DataFrame(data=np.random.randn(5,4),
                  index=['A', 'B', 'C', 'D', 'E'],  
                  columns=['Col1', 'Col2', 'Col3', 'Col4']
                  )
df

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## `.Index` -- `.Reset_index()` -- `.Set_index()`


In [6]:
df.index

Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

In [8]:
df.reset_index()

Unnamed: 0,index,Col1,Col2,Col3,Col4
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


In [10]:
# DataFrame.reset_index(level=None, *, drop=False, inplace=False, col_level=0, col_fill='', allow_duplicates=<no_default>, names=None)
# If you are not interested in keeping the index column, you can set drop=True

df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,Col1,Col2,Col3,Col4
0,2.70685,0.628133,0.907969,0.503826
1,0.651118,-0.319318,-0.848077,0.605965
2,-2.018168,0.740122,0.528813,-0.589001
3,0.188695,-0.758872,-0.933237,0.955057
4,0.190794,1.978757,2.605967,0.683509


In [12]:
df['Col5'] = ['Part1','Part2','Part3', 'Item1','Item2']
df

Unnamed: 0,Col1,Col2,Col3,Col4,Col5
0,2.70685,0.628133,0.907969,0.503826,Part1
1,0.651118,-0.319318,-0.848077,0.605965,Part2
2,-2.018168,0.740122,0.528813,-0.589001,Part3
3,0.188695,-0.758872,-0.933237,0.955057,Item1
4,0.190794,1.978757,2.605967,0.683509,Item2


In [14]:
df.set_index('Col5', inplace=True)
df

Unnamed: 0_level_0,Col1,Col2,Col3,Col4
Col5,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Part1,2.70685,0.628133,0.907969,0.503826
Part2,0.651118,-0.319318,-0.848077,0.605965
Part3,-2.018168,0.740122,0.528813,-0.589001
Item1,0.188695,-0.758872,-0.933237,0.955057
Item2,0.190794,1.978757,2.605967,0.683509


In [24]:
df.reset_index(inplace=True)
df

Unnamed: 0,index,Col5,Col1,Col2,Col3,Col4
0,0,Part1,2.70685,0.628133,0.907969,0.503826
1,1,Part2,0.651118,-0.319318,-0.848077,0.605965
2,2,Part3,-2.018168,0.740122,0.528813,-0.589001
3,3,Item1,0.188695,-0.758872,-0.933237,0.955057
4,4,Item2,0.190794,1.978757,2.605967,0.683509


In [32]:
# Column rearranging

df = df[['Col1','Col2','Col3','Col4','Col5']]
df

Unnamed: 0,Col1,Col2,Col3,Col4,Col5
0,2.70685,0.628133,0.907969,0.503826,Part1
1,0.651118,-0.319318,-0.848077,0.605965,Part2
2,-2.018168,0.740122,0.528813,-0.589001,Part3
3,0.188695,-0.758872,-0.933237,0.955057,Item1
4,0.190794,1.978757,2.605967,0.683509,Item2


## Multiindex

In [18]:
np.random.seed(101)
df = pd.DataFrame(data=np.random.randn(5,4),
                 index=['A', 'B', 'C', 'D', 'E'],  
                  columns=['Col1', 'Col2', 'Col3', 'Col4']
                 )
df

Unnamed: 0,Col1,Col2,Col3,Col4
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [20]:
np.random.seed(seed=13)
df = pd.DataFrame(data={'Col1':np.random.randint(low=10, high=20, size=10),
                        'Col2':np.random.randint(low=-10, high=0, size=10),
                        'Category1': np.random.choice(['X','Y','Z'], size=10),
                        'Category2': np.random.choice(['a','b','c','d','e','f','g'], size=10)}
                  )
df

Unnamed: 0,Col1,Col2,Category1,Category2
0,12,-4,Y,e
1,10,-5,Y,f
2,10,-1,Z,d
3,16,-6,X,f
4,12,-8,X,a
5,14,-10,X,d
6,19,-7,X,c
7,13,-5,Z,a
8,14,-7,Z,f
9,12,-4,Z,b


In [22]:
df.set_index(['Category1','Category2'], inplace=True)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Col1,Col2
Category1,Category2,Unnamed: 2_level_1,Unnamed: 3_level_1
Y,e,12,-4
Y,f,10,-5
Z,d,10,-1
X,f,16,-6
X,a,12,-8
X,d,14,-10
X,c,19,-7
Z,a,13,-5
Z,f,14,-7
Z,b,12,-4


In [24]:
df.index

MultiIndex([('Y', 'e'),
            ('Y', 'f'),
            ('Z', 'd'),
            ('X', 'f'),
            ('X', 'a'),
            ('X', 'd'),
            ('X', 'c'),
            ('Z', 'a'),
            ('Z', 'f'),
            ('Z', 'b')],
           names=['Category1', 'Category2'])

In [26]:
df.index.names

FrozenList(['Category1', 'Category2'])

In [28]:
## NOTE: The MultIindex format will likely appear over your career in the workplace.
'''Typically, we tend to prefer a DataFrame that is not MultiIndexed, since the majority of the data science packages have 
functions/methods more suitable for non MultiIndex DataFrames. In this case, you can remove the hierarchical index by adding .reset_index()'''

df.reset_index(inplace=True)
df

Unnamed: 0,Category1,Category2,Col1,Col2
0,Y,e,12,-4
1,Y,f,10,-5
2,Z,d,10,-1
3,X,f,16,-6
4,X,a,12,-8
5,X,d,14,-10
6,X,c,19,-7
7,Z,a,13,-5
8,Z,f,14,-7
9,Z,b,12,-4


In [30]:
df

Unnamed: 0,Category1,Category2,Col1,Col2
0,Y,e,12,-4
1,Y,f,10,-5
2,Z,d,10,-1
3,X,f,16,-6
4,X,a,12,-8
5,X,d,14,-10
6,X,c,19,-7
7,Z,a,13,-5
8,Z,f,14,-7
9,Z,b,12,-4
