In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(np.random.randint(100, 200, size=(10, 10)), columns=list('ABCDEFGHIJ'))
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,159,132,157,145,157,114,153,181,137,143
1,179,181,121,131,122,141,138,197,138,109
2,108,139,107,140,185,181,148,158,178,125
3,112,137,119,146,197,174,118,110,153,125
4,189,156,153,194,118,140,134,153,146,177
5,175,126,141,189,190,112,135,140,142,125
6,165,117,185,135,124,121,127,102,169,153
7,147,150,185,121,171,149,145,167,118,139
8,114,121,188,174,126,180,132,129,133,148
9,142,130,131,107,136,165,136,181,168,179


In [3]:
# new df defaults to RangeIndex if no index is specified
df.index

RangeIndex(start=0, stop=10, step=1)

# Basic Index Manipulation

In [4]:
# set column as index
df = df.set_index('A')
print(df.index)
df

Int64Index([159, 179, 108, 112, 189, 175, 165, 147, 114, 142], dtype='int64', name='A')


Unnamed: 0_level_0,B,C,D,E,F,G,H,I,J
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
159,132,157,145,157,114,153,181,137,143
179,181,121,131,122,141,138,197,138,109
108,139,107,140,185,181,148,158,178,125
112,137,119,146,197,174,118,110,153,125
189,156,153,194,118,140,134,153,146,177
175,126,141,189,190,112,135,140,142,125
165,117,185,135,124,121,127,102,169,153
147,150,185,121,171,149,145,167,118,139
114,121,188,174,126,180,132,129,133,148
142,130,131,107,136,165,136,181,168,179


In [5]:
# reset index to RangeIndex
df = df.reset_index()
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
0,159,132,157,145,157,114,153,181,137,143
1,179,181,121,131,122,141,138,197,138,109
2,108,139,107,140,185,181,148,158,178,125
3,112,137,119,146,197,174,118,110,153,125
4,189,156,153,194,118,140,134,153,146,177
5,175,126,141,189,190,112,135,140,142,125
6,165,117,185,135,124,121,127,102,169,153
7,147,150,185,121,171,149,145,167,118,139
8,114,121,188,174,126,180,132,129,133,148
9,142,130,131,107,136,165,136,181,168,179


In [6]:
# set index directly
df.index = list('ABCDEFGHIJ')
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
A,159,132,157,145,157,114,153,181,137,143
B,179,181,121,131,122,141,138,197,138,109
C,108,139,107,140,185,181,148,158,178,125
D,112,137,119,146,197,174,118,110,153,125
E,189,156,153,194,118,140,134,153,146,177
F,175,126,141,189,190,112,135,140,142,125
G,165,117,185,135,124,121,127,102,169,153
H,147,150,185,121,171,149,145,167,118,139
I,114,121,188,174,126,180,132,129,133,148
J,142,130,131,107,136,165,136,181,168,179


In [7]:
# Use reindex to conform existing df to new index.
# This is different than just writing over index values. Rows will follow index value to new position.
# Can use to reorder, exclude rows, add new rows with fill logic. Returns new object
df.reindex(list('EDCBAZYXWV'), fill_value=0)

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
E,189,156,153,194,118,140,134,153,146,177
D,112,137,119,146,197,174,118,110,153,125
C,108,139,107,140,185,181,148,158,178,125
B,179,181,121,131,122,141,138,197,138,109
A,159,132,157,145,157,114,153,181,137,143
Z,0,0,0,0,0,0,0,0,0,0
Y,0,0,0,0,0,0,0,0,0,0
X,0,0,0,0,0,0,0,0,0,0
W,0,0,0,0,0,0,0,0,0,0
V,0,0,0,0,0,0,0,0,0,0


Index objects are very similar to series objects in attributes and methods e.g. name, is_unique, value_counts(), sort_values()...

# Interval Index

In [8]:
# interval index
df.index = pd.interval_range(0, 10)
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
"(0, 1]",159,132,157,145,157,114,153,181,137,143
"(1, 2]",179,181,121,131,122,141,138,197,138,109
"(2, 3]",108,139,107,140,185,181,148,158,178,125
"(3, 4]",112,137,119,146,197,174,118,110,153,125
"(4, 5]",189,156,153,194,118,140,134,153,146,177
"(5, 6]",175,126,141,189,190,112,135,140,142,125
"(6, 7]",165,117,185,135,124,121,127,102,169,153
"(7, 8]",147,150,185,121,171,149,145,167,118,139
"(8, 9]",114,121,188,174,126,180,132,129,133,148
"(9, 10]",142,130,131,107,136,165,136,181,168,179


# Categorical Index

In [9]:
# categorical index. just like a series, use to enforce categories and/or specify order/rank of categories
df.index = pd.Categorical(['cat1', 'cat2']*5, categories=['cat2', 'cat1'], ordered=True)
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J
cat1,159,132,157,145,157,114,153,181,137,143
cat2,179,181,121,131,122,141,138,197,138,109
cat1,108,139,107,140,185,181,148,158,178,125
cat2,112,137,119,146,197,174,118,110,153,125
cat1,189,156,153,194,118,140,134,153,146,177
cat2,175,126,141,189,190,112,135,140,142,125
cat1,165,117,185,135,124,121,127,102,169,153
cat2,147,150,185,121,171,149,145,167,118,139
cat1,114,121,188,174,126,180,132,129,133,148
cat2,142,130,131,107,136,165,136,181,168,179


In [10]:
df.index

CategoricalIndex(['cat1', 'cat2', 'cat1', 'cat2', 'cat1', 'cat2', 'cat1',
                  'cat2', 'cat1', 'cat2'],
                 categories=['cat2', 'cat1'], ordered=True, dtype='category')