In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

from numpy.random import randn

In [2]:
# init
ser1 = Series([1,2,3,4], index = ['A', 'B', 'C', 'D'])
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [4]:
''' Reindexing '''
ser2 = ser1.reindex(['A','B','C','D','E','F'])
ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

In [5]:
''' fill_value: Fill in default value when reindexing
** The value will not change if the index is previously existed
'''
ser2.reindex(['A','B','C','D','E','F','G', 'H'], fill_value = 0)

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
H    0.0
dtype: float64

In [6]:
ser3 = Series(['USA', 'Mexico', 'Canada'], index=[0,5,10])
ser3

0        USA
5     Mexico
10    Canada
dtype: object

In [7]:
''' Use range() to set index '''
# ffill -> forward fill: fill the missing value with the valus of its previous index
ser3.reindex(range(15), method='ffill')

0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Canada
11    Canada
12    Canada
13    Canada
14    Canada
dtype: object

In [9]:
# a 5-by-5 matrix of random number
dataFrame = DataFrame(randn(25).reshape((5,5)), index=['A', 'B', 'D', 'E', 'F'], columns=['col1','col2','col3', 'col4', 'col5'])
dataFrame

Unnamed: 0,col1,col2,col3,col4,col5
A,1.073821,-0.028703,-0.996531,-0.034518,-0.579667
B,-0.864156,-0.992529,0.248712,1.131031,0.560323
D,1.842582,0.458928,1.49016,0.914676,-1.073665
E,0.327743,0.653081,-0.464785,-0.178951,0.430896
F,1.907952,0.437114,-0.034518,1.20166,0.743071


In [10]:
# Reindex the rows
dataF2 = dataFrame.reindex(['A', 'B', 'C', 'D', 'E', 'F'])
dataF2

Unnamed: 0,col1,col2,col3,col4,col5
A,1.073821,-0.028703,-0.996531,-0.034518,-0.579667
B,-0.864156,-0.992529,0.248712,1.131031,0.560323
C,,,,,
D,1.842582,0.458928,1.49016,0.914676,-1.073665
E,0.327743,0.653081,-0.464785,-0.178951,0.430896
F,1.907952,0.437114,-0.034518,1.20166,0.743071


In [11]:
# Reindex the columns
new_columns = ['col1','col2','col3', 'col4', 'col5', 'col6']
dataF2.reindex(columns=new_columns)

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,1.073821,-0.028703,-0.996531,-0.034518,-0.579667,
B,-0.864156,-0.992529,0.248712,1.131031,0.560323,
C,,,,,,
D,1.842582,0.458928,1.49016,0.914676,-1.073665,
E,0.327743,0.653081,-0.464785,-0.178951,0.430896,
F,1.907952,0.437114,-0.034518,1.20166,0.743071,


In [14]:
''' .loc() -> label-based indexing '''
dataF2.loc[['A', 'B', 'C', 'D', 'E', 'F'], new_columns]

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,1.073821,-0.028703,-0.996531,-0.034518,-0.579667,
B,-0.864156,-0.992529,0.248712,1.131031,0.560323,
C,,,,,,
D,1.842582,0.458928,1.49016,0.914676,-1.073665,
E,0.327743,0.653081,-0.464785,-0.178951,0.430896,
F,1.907952,0.437114,-0.034518,1.20166,0.743071,
