# Resetting and Setting Index

In [None]:
import numpy as np
import pandas as pd

In [None]:
np.random.seed(100) # sets random seed number for replicability of examples

df = pd.DataFrame(np.random.randint(0,20,(5,5)),
                  'R1 R2 R3 R4 R5'.split(),
                  'C1 C2 C3 C4 C5'.split())
df

Unnamed: 0,C1,C2,C3,C4,C5
R1,8,3,7,15,16
R2,10,2,2,2,14
R3,2,17,16,15,4
R4,11,16,9,2,12
R5,4,1,13,19,4


### Using .reset_index()
* The .reset_index() method is a versatile function that allows you to reset the index of a DataFrame converting the index into a column and replacing it with a default integer index.

In [None]:
# Notice that you will get the index into a column
# This is how you can reset the index

df_reset = df.reset_index()
df_reset

Unnamed: 0,index,C1,C2,C3,C4,C5
0,R1,8,3,7,15,16
1,R2,10,2,2,2,14
2,R3,2,17,16,15,4
3,R4,11,16,9,2,12
4,R5,4,1,13,19,4


In [None]:
# reset with drop

df_reset_d = df.reset_index(drop = True)
df_reset_d

Unnamed: 0,C1,C2,C3,C4,C5
0,8,3,7,15,16
1,10,2,2,2,14
2,2,17,16,15,4
3,11,16,9,2,12
4,4,1,13,19,4


### Using .set_index()
* The .set_index() method allows you to set one or more columns of a DataFrame as its index. By default, it returns a new DataFrame with the index set.

In [None]:
# To demonstrate the method, let us create a new column in the DataFrame

df['new1'] = 'A B C D E'.split()
df

Unnamed: 0,C1,C2,C3,C4,C5,new1
R1,8,3,7,15,16,A
R2,10,2,2,2,14,B
R3,2,17,16,15,4,C
R4,11,16,9,2,12,D
R5,4,1,13,19,4,E


In [None]:
# Use the set_index to set a specific column as the new index,
# inplace = True if you want it permanently

df.set_index('new1')

Unnamed: 0_level_0,C1,C2,C3,C4,C5
new1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,8,3,7,15,16
B,10,2,2,2,14
C,2,17,16,15,4
D,11,16,9,2,12
E,4,1,13,19,4


In [None]:
df['new1'] = 'A B C D E'.split()
df['new2'] = 'F G H I J'.split()
df

Unnamed: 0,C1,C2,C3,C4,C5,new1,new2
R1,8,3,7,15,16,A,F
R2,10,2,2,2,14,B,G
R3,2,17,16,15,4,C,H
R4,11,16,9,2,12,D,I
R5,4,1,13,19,4,E,J


# Multiple Index DataFrame

In [None]:
# Use the set_index to mulitple index,
# inplace = True if you want it permanently

multi_index= df.set_index(['new1', 'new2'])
multi_index

Unnamed: 0_level_0,Unnamed: 1_level_0,C1,C2,C3,C4,C5
new1,new2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A,F,8,3,7,15,16
B,G,10,2,2,2,14
C,H,2,17,16,15,4
D,I,11,16,9,2,12
E,J,4,1,13,19,4


In [None]:
# Accessing subsets

multi_index.loc[('A', 'F')]

Unnamed: 0_level_0,A
Unnamed: 0_level_1,F
C1,8
C2,3
C3,7
C4,15
C5,16


In [None]:
# Accessing subsets .loc

multi_index.loc[[('C', 'H'), ('D', 'I')], 'C3':]

Unnamed: 0_level_0,Unnamed: 1_level_0,C3,C4,C5
new1,new2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C,H,16,15,4
D,I,9,2,12


In [None]:
# Accessing subsets .loc

multi_index.loc[[('A', 'F'), ('B', 'G')], ['C1','C3']]

Unnamed: 0_level_0,Unnamed: 1_level_0,C1,C3
new1,new2,Unnamed: 2_level_1,Unnamed: 3_level_1
A,F,8,7
B,G,10,2


In [None]:
# Accessing subsets .iloc

multi_index.iloc[:2, :3]

Unnamed: 0_level_0,Unnamed: 1_level_0,C1,C2,C3
new1,new2,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,F,8,3,7
B,G,10,2,2


In [None]:
# Accessing subsets .iloc, non-contiguous

multi_index.iloc[[1, 3], [2, 4]]

Unnamed: 0_level_0,Unnamed: 1_level_0,C3,C5
new1,new2,Unnamed: 2_level_1,Unnamed: 3_level_1
B,G,2,14
D,I,9,12


In [None]:
# Another Multi Index DataFrame

# From Arrays
# Index Levels

arr = [['R1','R1','R1','R2','R2','R2'],[1,2,3,1,2,3]]
indices = pd.MultiIndex.from_arrays(arr)
indices

MultiIndex([('R1', 1),
            ('R1', 2),
            ('R1', 3),
            ('R2', 1),
            ('R2', 2),
            ('R2', 3)],
           )

In [None]:
# Create Data
np.random.seed(100)

df_multi = pd.DataFrame(data = np.random.randn(6,2), index = indices, columns = ['A','B'])
df_multi

Unnamed: 0,Unnamed: 1,A,B
R1,1,-1.749765,0.34268
R1,2,1.153036,-0.252436
R1,3,0.981321,0.514219
R2,1,0.22118,-1.070043
R2,2,-0.189496,0.255001
R2,3,-0.458027,0.435163


In [None]:
# Accessing subsets .loc

df_multi.loc[('R1', 2), ('A', 'B')]

Unnamed: 0_level_0,R1
Unnamed: 0_level_1,2
A,1.153036
B,-0.252436


In [None]:
# Accessing subsets .loc

df_multi.loc[('R1', [1, 3]), ('A', 'B')]

Unnamed: 0,Unnamed: 1,A,B
R1,1,-1.749765,0.34268
R1,3,0.981321,0.514219


In [None]:
# Accessing subsets .loc

df_multi.loc[(['R1', 'R2'], [1, 3]), ('A', 'B')]

Unnamed: 0,Unnamed: 1,A,B
R1,1,-1.749765,0.34268
R1,3,0.981321,0.514219
R2,1,0.22118,-1.070043
R2,3,-0.458027,0.435163


In [None]:
# Accessing subsets .iloc

df_multi.iloc[1:3, 1]

Unnamed: 0,Unnamed: 1,B
R1,2,-0.252436
R1,3,0.514219


In [None]:
df_multi

Unnamed: 0,Unnamed: 1,A,B
R1,1,-1.749765,0.34268
R1,2,1.153036,-0.252436
R1,3,0.981321,0.514219
R2,1,0.22118,-1.070043
R2,2,-0.189496,0.255001
R2,3,-0.458027,0.435163


In [None]:
# Accessing subsets .iloc

df_multi.iloc[2:, 1:]

Unnamed: 0,Unnamed: 1,B
R1,3,0.514219
R2,1,-1.070043
R2,2,0.255001
R2,3,0.435163


In [None]:
# Accessing subsets .iloc

df_multi.iloc[[0, 2, 3, 4], :]

Unnamed: 0,Unnamed: 1,A,B
R1,1,-1.749765,0.34268
R1,3,0.981321,0.514219
R2,1,0.22118,-1.070043
R2,2,-0.189496,0.255001


In [None]:
# Using .xs

df_multi.xs('R1')

Unnamed: 0,A,B
1,-1.749765,0.34268
2,1.153036,-0.252436
3,0.981321,0.514219


In [None]:
# Using .xs

df_multi.xs('R1').loc[2:]

Unnamed: 0,A,B
2,1.153036,-0.252436
3,0.981321,0.514219


In [None]:
# Using .xs

df_multi.xs('R2').loc[2:, 'A']

Unnamed: 0,A
2,-0.189496
3,-0.458027


In [None]:
# Using .xs with level

df_multi.xs(1, level = 1)

Unnamed: 0,A,B
R1,-1.749765,0.34268
R2,0.22118,-1.070043


In [None]:
# Using .xs with level

df_multi.xs(1, level = 1)['A']

Unnamed: 0,A
R1,-1.749765
R2,0.22118


In [None]:
# It will be easier providing label to indices

df_multi.index.names = ['Group', 'ID']
df_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,ID,Unnamed: 2_level_1,Unnamed: 3_level_1
R1,1,-1.749765,0.34268
R1,2,1.153036,-0.252436
R1,3,0.981321,0.514219
R2,1,0.22118,-1.070043
R2,2,-0.189496,0.255001
R2,3,-0.458027,0.435163


In [None]:
# Using .xs with level

df_multi.xs(1, level = 'ID')

Unnamed: 0_level_0,A,B
Group,Unnamed: 1_level_1,Unnamed: 2_level_1
R1,-1.749765,0.34268
R2,0.22118,-1.070043


In [461]:
# Using .get_level_values and .isin

df_multi.loc[df_multi.index.get_level_values('ID').isin([1, 3])]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,ID,Unnamed: 2_level_1,Unnamed: 3_level_1
R1,1,-1.749765,0.34268
R1,3,0.981321,0.514219
R2,1,0.22118,-1.070043
R2,3,-0.458027,0.435163


In [462]:
# Using .query

df_multi.query('ID == 1 or ID == 3')

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,ID,Unnamed: 2_level_1,Unnamed: 3_level_1
R1,1,-1.749765,0.34268
R1,3,0.981321,0.514219
R2,1,0.22118,-1.070043
R2,3,-0.458027,0.435163


In [481]:
# Using .query

df_multi.query("(Group == 'R1' or Group == 'R2') and (ID == 1 or ID == 3)")

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,ID,Unnamed: 2_level_1,Unnamed: 3_level_1
R1,3,0.981321,0.514219


In [487]:
# Using .query

df_multi.query("(Group == 'R1' or Group == 'R2') and (ID == 1 or ID == 3) and (A >= 0.9)")

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Group,ID,Unnamed: 2_level_1,Unnamed: 3_level_1
R1,3,0.981321,0.514219
