# PIPE example
source: https://sinyi-chou.github.io/python-pandas-pipe/

In [3]:
import pandas as pd
import numpy as np

# Set seed
np.random.seed(520)

# Create a dataframe
df = pd.DataFrame({
    'name': ['Ted'] * 3 + ['Lisa'] * 3 + ['Sam'] * 3,
    'subject': ['math', 'physics', 'history'] * 3,
    'score': np.random.randint(60, 100, 9)
})

df

Unnamed: 0,name,subject,score
0,Ted,math,87
1,Ted,physics,80
2,Ted,history,75
3,Lisa,math,79
4,Lisa,physics,78
5,Lisa,history,77
6,Sam,math,85
7,Sam,physics,61
8,Sam,history,88


In [17]:
df.groupby(['subject'])['score'].rank(ascending=False)

0    1.0
1    1.0
2    3.0
3    3.0
4    2.0
5    2.0
6    2.0
7    3.0
8    1.0
Name: score, dtype: float64

In [18]:
def get_subject_rank(input_df):
    # Avoid overwrite the original dataframe
    input_df = input_df.copy()
    input_df['subject_rank'] = (input_df
                                .groupby(['subject'])['score']
                                .rank(ascending=False))
    return input_df

# pipe method
df.pipe(get_subject_rank)

Unnamed: 0,name,subject,score,subject_rank
0,Ted,math,87,1.0
1,Ted,physics,80,1.0
2,Ted,history,75,3.0
3,Lisa,math,79,3.0
4,Lisa,physics,78,2.0
5,Lisa,history,77,2.0
6,Sam,math,85,2.0
7,Sam,physics,61,3.0
8,Sam,history,88,1.0


In [19]:
def get_subject_rank(input_df, df_or_not=True):
    # Avoid overwrite the original dataframe
    input_df = input_df.copy()
    if df_or_not is True:
        input_df['subject_rank'] = (input_df
                                    .groupby(['subject'])['score']
                                    .rank(ascending=False))
        return input_df
    else:
        output_series = (input_df
                         .groupby(['subject'])['score']
                         .rank(ascending=False))
        return output_series

# pipe method - return arbitary output
df.pipe(get_subject_rank, df_or_not = False)

0    1.0
1    1.0
2    3.0
3    3.0
4    2.0
5    2.0
6    2.0
7    3.0
8    1.0
Name: score, dtype: float64