In [1]:
import pandas as pd
import numpy as np

In [2]:
# Reindexing -> change the row labels and column labels of a DataFrame
# Multiple Operations can be done:
# 1_Reorder the existing data
# 2_Insert missing value (NA) markers in label locations where no data for the label existed

In [5]:
N = 20
df1 = pd.DataFrame({
    'A': pd.date_range(start='2016-01-01', periods=N, freq='D'),
    'x': np.linspace(0, stop=N-1, num=N),
    'y': np.random.rand(N),
    'C': np.random.choice(['Low','Medium','High'],N).tolist(),
    'D': np.random.normal(100, 10, size=(N)).tolist()
})

print(df1)

# reindex the DataFrame
print("After Reindexing:")
df_reindexed = df1.reindex(index=[0,2,5], columns=['A', 'C', 'B'])
print(df_reindexed)

            A       C           D     x         y
0  2016-01-01    High   94.640349   0.0  0.052462
1  2016-01-02    High  104.911483   1.0  0.284479
2  2016-01-03     Low  103.198645   2.0  0.052899
3  2016-01-04  Medium   97.144645   3.0  0.402451
4  2016-01-05    High   90.303014   4.0  0.960994
5  2016-01-06  Medium   78.646523   5.0  0.492044
6  2016-01-07     Low   94.981504   6.0  0.966430
7  2016-01-08  Medium  101.024901   7.0  0.172293
8  2016-01-09     Low  105.727394   8.0  0.582610
9  2016-01-10  Medium  100.179746   9.0  0.393036
10 2016-01-11    High   74.268765  10.0  0.311980
11 2016-01-12     Low   90.993757  11.0  0.450440
12 2016-01-13     Low   91.279079  12.0  0.810744
13 2016-01-14    High   89.052444  13.0  0.552947
14 2016-01-15     Low   98.295599  14.0  0.892109
15 2016-01-16     Low  115.241176  15.0  0.745025
16 2016-01-17     Low  115.456091  16.0  0.170712
17 2016-01-18    High  113.447923  17.0  0.700563
18 2016-01-19    High   84.939612  18.0  0.098314


In [8]:
# Reindex to align with other objects
df2 = pd.DataFrame(np.random.randn(10,3), columns=['col1', 'col2', 'col3'])
df3 = pd.DataFrame(np.random.randn(7,3), columns=['col1', 'col2', 'col3'])

print("Original df2:")
print(df2)
print("Original df3:")
print(df3)

df2 = df2.reindex_like(df3)
print("After Reindexed:")
print(df2)

Original df2:
       col1      col2      col3
0  1.964990 -0.057632 -0.078842
1 -0.526876 -0.192742 -0.146227
2 -1.002838 -0.504668  0.001324
3  0.811995  0.709293 -0.579602
4 -0.022654 -0.173177 -1.079529
5  1.282915  1.478444 -0.053956
6  0.148620 -1.173505 -0.352255
7 -0.342799  0.086630  0.862082
8 -1.090806  0.048876 -0.370273
9  2.619551 -2.235643  1.070530
Original df3:
       col1      col2      col3
0  0.877055 -0.409363  0.104958
1  0.353667  1.270210  0.636517
2  0.336914  0.791477 -0.220322
3 -0.441847  0.254475 -0.855233
4  1.505466  1.471862  0.739350
5 -0.110604  0.720587  0.851968
6 -1.274651  0.641726 -0.250442
After Reindexed:
       col1      col2      col3
0  1.964990 -0.057632 -0.078842
1 -0.526876 -0.192742 -0.146227
2 -1.002838 -0.504668  0.001324
3  0.811995  0.709293 -0.579602
4 -0.022654 -0.173177 -1.079529
5  1.282915  1.478444 -0.053956
6  0.148620 -1.173505 -0.352255


In [9]:
# FILLING WHILE REINDEXING
# reindex() takes optional para. method
# Parameter methods:
# 1_pad/ffill -> fill values forward
# 2_dfill/backfill -> fill values backward
# 3_nearest -> fill from the nearest index values

In [12]:
df4 = pd.DataFrame(np.random.randn(6,3), columns=['col1', 'col2', 'col3'])
df5 = pd.DataFrame(np.random.randn(2,3), columns=['col1', 'col2', 'col3'])

# Padding NaN's
print(df5.reindex_like(df4))

# Fill the NaN's with precending values
print("DataFrame with forward fill:")
print(df5.reindex_like(df4, method='ffill'))
# NOTE: The last 4 rows are padded

       col1      col2      col3
0 -1.330485 -0.311242  1.409099
1  0.643975 -1.396882 -0.377404
2       NaN       NaN       NaN
3       NaN       NaN       NaN
4       NaN       NaN       NaN
5       NaN       NaN       NaN
DataFrame with forward fill:
       col1      col2      col3
0 -1.330485 -0.311242  1.409099
1  0.643975 -1.396882 -0.377404
2  0.643975 -1.396882 -0.377404
3  0.643975 -1.396882 -0.377404
4  0.643975 -1.396882 -0.377404
5  0.643975 -1.396882 -0.377404


In [14]:
# LIMIT on filling while reindexing
print("DataFrame with forward fill limiting to 1:")
# 2nd index row filled by the precending 1st index row
print(df5.reindex_like(df4, method='ffill', limit=1))

DataFrame with forward fill limiting to 1:
       col1      col2      col3
0 -1.330485 -0.311242  1.409099
1  0.643975 -1.396882 -0.377404
2  0.643975 -1.396882 -0.377404
3       NaN       NaN       NaN
4       NaN       NaN       NaN
5       NaN       NaN       NaN


In [15]:
# Renaming
df6 = pd.DataFrame(np.random.randn(6,3),columns=['col1','col2','col3'])
print(df6)

print("After renaming the rows and columns:")
print(df6.rename(columns={'col1': 'c1', 'col2': 'c2'},
                index={0: 'apple', 1: 'banana', 2: 'durian'}))

       col1      col2      col3
0 -0.031457 -0.779161  0.296261
1  0.226637 -1.440682 -0.517699
2  0.969934 -0.291249 -1.081098
3  2.926702  1.715237 -0.199818
4 -0.018139  0.721724  1.304796
5  0.194436  0.722109 -0.588682
After renaming the rows and columns:
              c1        c2      col3
apple  -0.031457 -0.779161  0.296261
banana  0.226637 -1.440682 -0.517699
durian  0.969934 -0.291249 -1.081098
3       2.926702  1.715237 -0.199818
4      -0.018139  0.721724  1.304796
5       0.194436  0.722109 -0.588682
