## Resetting and Setting Index

In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(100)

df = pd.DataFrame(np.random.randint(0,20,(5,5)), 'R1 R2 R3 R4 R5'.split(), 'C1 C2 C3 C4 C5'.split())
df

Unnamed: 0,C1,C2,C3,C4,C5
R1,8,3,7,15,16
R2,10,2,2,2,14
R3,2,17,16,15,4
R4,11,16,9,2,12
R5,4,1,13,19,4


### Using .reset_index()

   
    - setting index into defaults

In [3]:
# Notice that you will get the index into a column
# This is how you can reset the index

df.reset_index()

Unnamed: 0,index,C1,C2,C3,C4,C5
0,R1,8,3,7,15,16
1,R2,10,2,2,2,14
2,R3,2,17,16,15,4
3,R4,11,16,9,2,12
4,R5,4,1,13,19,4


In [4]:
# Something to remember here is that this does not occur inplace as you can see below

df

Unnamed: 0,C1,C2,C3,C4,C5
R1,8,3,7,15,16
R2,10,2,2,2,14
R3,2,17,16,15,4
R4,11,16,9,2,12
R5,4,1,13,19,4


In [5]:
# You should provide the inplace parameter to make it permanent

# df.reset_index(inplace=True) --> use this one to reset index permanently

# You will do this if you want to reset your index into numerical values

### Using set_index()

    - a method to set a List, Series or Data frame as index of a Data Frame.

In [6]:
# To demonstrate the method, let us create a new column in the DataFrame

df['new'] = 'A B C D E'.split()
df

Unnamed: 0,C1,C2,C3,C4,C5,new
R1,8,3,7,15,16,A
R2,10,2,2,2,14,B
R3,2,17,16,15,4,C
R4,11,16,9,2,12,D
R5,4,1,13,19,4,E


In [7]:
# Use the set_index to set a specific column as the new index

df.set_index('new')

Unnamed: 0_level_0,C1,C2,C3,C4,C5
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,8,3,7,15,16
B,10,2,2,2,14
C,2,17,16,15,4
D,11,16,9,2,12
E,4,1,13,19,4


In [8]:
# Again this is not a permanent change, use the inplace parameter if you want to do this

df

Unnamed: 0,C1,C2,C3,C4,C5,new
R1,8,3,7,15,16,A
R2,10,2,2,2,14,B
R3,2,17,16,15,4,C
R4,11,16,9,2,12,D
R5,4,1,13,19,4,E


In [9]:
df.set_index('new', inplace = True)
df

Unnamed: 0_level_0,C1,C2,C3,C4,C5
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,8,3,7,15,16
B,10,2,2,2,14
C,2,17,16,15,4
D,11,16,9,2,12
E,4,1,13,19,4


In [11]:
# Now the indexing is permanent with the inplace parameter set to True
# This will overide your original indices 'R1 R2 ...'

df

Unnamed: 0_level_0,C1,C2,C3,C4,C5
new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,8,3,7,15,16
B,10,2,2,2,14
C,2,17,16,15,4
D,11,16,9,2,12
E,4,1,13,19,4


## DataFrames with Multiple Index

In [19]:
# From Arrays
# Index Levels

arr = [['R1','R1','R1','R2','R2','R2'],[1,2,3,1,2,3]]
h_index = pd.MultiIndex.from_arrays(arr)

In [20]:
h_index

MultiIndex([('R1', 1),
            ('R1', 2),
            ('R1', 3),
            ('R2', 1),
            ('R2', 2),
            ('R2', 3)],
           )

In [21]:
df = pd.DataFrame(np.random.randn(6,2), h_index, ['A','B'])

In [22]:
df

Unnamed: 0,Unnamed: 1,A,B
R1,1,1.430188,0.949711
R1,2,0.65692,0.222034
R1,3,0.598291,-1.750374
R2,1,0.133306,-1.31521
R2,2,-0.937495,0.384211
R2,3,-0.66168,2.587856
