In [19]:
import numpy as np
import pandas as pd

In [20]:
# reindex() is the fundamental data alignment method in pandas. It is used to implement nearly all other features relying on label-alignment functionality. To reindex means to conform the data to match a given set of labels along a particular axis. This accomplishes several things:
s = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])

s

a    0.662072
b    1.015907
c   -0.334998
d   -0.403394
e    0.601626
dtype: float64

In [21]:
s.reindex(["b","e","f"])

b    1.015907
e    0.601626
f         NaN
dtype: float64

In [22]:
df = pd.DataFrame(np.arange(12).reshape(4,3),columns=["one", "two", "three"], index=["a", "b", "c","d"])


In [23]:
# With a DataFrame, you can simultaneously reindex the index and columns:

df.reindex(index=["c", "f", "b"], columns=["three", "two", "one"])

Unnamed: 0,three,two,one
c,8.0,7.0,6.0
f,,,
b,5.0,4.0,3.0


In [24]:
# You may also use reindex with an axis keyword:
df.reindex(["c", "f", "b"], axis="index")

Unnamed: 0,one,two,three
c,6.0,7.0,8.0
f,,,
b,3.0,4.0,5.0


In [25]:
df2 = pd.DataFrame(np.random.rand(6).reshape(3,2),columns=["one", "two"], index=["a", "b", "c"])
df2

Unnamed: 0,one,two
a,0.471359,0.816915
b,0.988817,0.027468
c,0.981843,0.294163


##### Reindexing to align with another object
- You may wish to take an object and reindex its axes to be labeled the same as another object. While the syntax for this is straightforward albeit verbose, it is a common enough operation that the reindex_like() method is available to make this simpler:

In [28]:
# You may wish to take an object and reindex its axes to be labeled the same as another object. While the syntax for this is straightforward albeit verbose, it is a common enough operation that the reindex_like() method is available to make this simpler:
df.reindex_like(df2)

Unnamed: 0,one,two
a,0,1
b,3,4
c,6,7


##### Aligning objects with each other with align
- The align() method is the fastest way to simultaneously align two objects. It supports a join argument (related to joining and merging):

In [30]:
df = pd.Series(np.random.randint(1,5,(5,)), index=["a", "b", "c", "d", "e"])
df


a    2
b    4
c    1
d    4
e    2
dtype: int32

In [33]:
df1 = df[:4]

df2 = df[1:]

In [35]:
# It returns a tuple with both of the reindexed Series:
df1.align(s2)

(a    2.0
 b    4.0
 c    1.0
 d    4.0
 e    NaN
 dtype: float64,
 a         NaN
 b    1.015907
 c   -0.334998
 d   -0.403394
 e    0.601626
 dtype: float64)

In [36]:
df1.align(s2, join="inner")

(b    4
 c    1
 d    4
 dtype: int32,
 b    1.015907
 c   -0.334998
 d   -0.403394
 dtype: float64)

In [40]:
df1.align(df2, join="right")

(b    4.0
 c    1.0
 d    4.0
 e    NaN
 dtype: float64,
 b    4
 c    1
 d    4
 e    2
 dtype: int32)

In [41]:
df

a    2
b    4
c    1
d    4
e    2
dtype: int32

##### Filling while reindexing
- reindex() takes an optional parameter method which is a filling method chosen from the following table:

In [49]:
df.reindex(["d","b","f","c", "e", "d"], method="ffill")

d    4
b    4
f    2
c    1
e    2
d    4
dtype: int32

##### Limits on filling while reindexing
- The limit and tolerance arguments provide additional control over filling while reindexing. Limit specifies the maximum count of consecutive matches:

In [51]:
df.reindex(["d","b","f","c", "e", "d"], method="ffill", limit=1)

ValueError: limit argument for 'pad' method only well-defined if index and target are monotonic

##### Dropping labels from an axis
- A method closely related to reindex is the drop() function. It removes a set of labels from an axis:

In [52]:
df

a    2
b    4
c    1
d    4
e    2
dtype: int32

In [53]:
df.drop(["d"])

a    2
b    4
c    1
e    2
dtype: int32

##### Renaming / mapping labels
- The rename() method allows you to relabel an axis based on some mapping (a dict or Series) or an arbitrary function.

In [55]:
df = pd.DataFrame(np.random.randn(6).reshape(3,2), columns=["one", "two"], index=["a", "b", "c"])
df

Unnamed: 0,one,two
a,-0.402068,0.792774
b,0.77432,-0.408227
c,0.123294,0.158959


In [56]:
df.rename(str.upper)

Unnamed: 0,one,two
A,-0.402068,0.792774
B,0.77432,-0.408227
C,0.123294,0.158959


In [66]:
def func(x):
    return x + " 1"
    
df.rename(columns={"one": "1","two": "2",},
          index=func)

Unnamed: 0,1,2
a 1,-0.402068,0.792774
b 1,0.77432,-0.408227
c 1,0.123294,0.158959


In [67]:
# DataFrame.rename() also supports an “axis-style” calling convention, where you specify a single mapper and the axis to apply that mapping to.
df.rename({"one": "foo", "two": "bar"}, axis="columns")

Unnamed: 0,foo,bar
a,-0.402068,0.792774
b,0.77432,-0.408227
c,0.123294,0.158959


In [69]:
df.rename({"a": "apple", "b": "banana", "d": "durian"}, axis="index")

Unnamed: 0,one,two
apple,-0.402068,0.792774
banana,0.77432,-0.408227
c,0.123294,0.158959


In [70]:
# The methods DataFrame.rename_axis() and Series.rename_axis() allow specific names of a MultiIndex to be changed (as opposed to the labels).
df = pd.DataFrame(
    {"x": [1, 2, 3, 4, 5, 6], "y": [10, 20, 30, 40, 50, 60]},
    index=pd.MultiIndex.from_product(
        [["a", "b", "c"], [1, 2]], names=["let", "num"]
    ),
)


df

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y
let,num,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,1,10
a,2,2,20
b,1,3,30
b,2,4,40
c,1,5,50
c,2,6,60


In [71]:
df.rename_axis(index={"let": "abc"})

Unnamed: 0_level_0,Unnamed: 1_level_0,x,y
abc,num,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,1,10
a,2,2,20
b,1,3,30
b,2,4,40
c,1,5,50
c,2,6,60
