In [1]:
import pandas as pd
import numpy as np

In [2]:
df_YouTube = pd.DataFrame({
    'YouTube Views': [125, 800, 335, 400, 1500]
})
df_YouTube

Unnamed: 0,YouTube Views
0,125
1,800
2,335
3,400
4,1500


In [3]:
# Example 1a - Default periods
df_YouTube['Views_Change'] = df_YouTube['YouTube Views'].pct_change()
df_YouTube

Unnamed: 0,YouTube Views,Views_Change
0,125,
1,800,5.4
2,335,-0.58125
3,400,0.19403
4,1500,2.75


In [4]:
# Example 1b - Show percentage sign instead of decimal
df_YouTube['Views_Change_100'] = df_YouTube['Views_Change'] * 100   
df_YouTube

Unnamed: 0,YouTube Views,Views_Change,Views_Change_100
0,125,,
1,800,5.4,540.0
2,335,-0.58125,-58.125
3,400,0.19403,19.402985
4,1500,2.75,275.0


In [5]:
# Example 2 - Periods 2
df_YouTube['Views_Change_2_Periods'] = df_YouTube['YouTube Views'].pct_change(periods=2)
df_YouTube

Unnamed: 0,YouTube Views,Views_Change,Views_Change_100,Views_Change_2_Periods
0,125,,,
1,800,5.4,540.0,
2,335,-0.58125,-58.125,1.68
3,400,0.19403,19.402985,-0.5
4,1500,2.75,275.0,3.477612


In [6]:
# Example 3 - ABS Difference
df_YouTube['Views_Absolute_Difference'] = df_YouTube['YouTube Views'].pct_change().abs()
df_YouTube

Unnamed: 0,YouTube Views,Views_Change,Views_Change_100,Views_Change_2_Periods,Views_Absolute_Difference
0,125,,,,
1,800,5.4,540.0,,5.4
2,335,-0.58125,-58.125,1.68,0.58125
3,400,0.19403,19.402985,-0.5,0.19403
4,1500,2.75,275.0,3.477612,2.75


In [7]:
# Example 4 - Diff multiple rows and columns
monthly_card_sales = pd.DataFrame({
    'Donald Bradman': [28, 46, 33],
    'Nolan Ryan': [511, 702, 611]
})
monthly_card_sales.pct_change()

Unnamed: 0,Donald Bradman,Nolan Ryan
0,,
1,0.642857,0.373777
2,-0.282609,-0.12963


In [11]:
# Example 5 - Difference across columns
df_merchants = pd.DataFrame({
    'Q1': [182, 270, 330],
    'Q2': [211, 220, 380],
    'Q3': [250, 230, 390]
}, index=[2023, 2024, 2025])
df_merchants.index.name = 'Year'

df_merchants

Unnamed: 0_level_0,Q1,Q2,Q3
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023,182,211,250
2024,270,220,230
2025,330,380,390


In [12]:
df_merchants.pct_change(axis=1)

Unnamed: 0_level_0,Q1,Q2,Q3
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023,,0.159341,0.184834
2024,,-0.185185,0.045455
2025,,0.151515,0.026316


In [13]:
# Example 6 - Time series analysis with date index
dates = pd.date_range(start='2025-04-19', periods=7, freq='D')
temps = [30, 32, 31, 35, 36, 34, 33]
df_temps = pd.DataFrame({
    'Date': dates,
    'Temperature': temps
})
df_temps.set_index('Date', inplace=True)
df_temps.sort_index()

Unnamed: 0_level_0,Temperature
Date,Unnamed: 1_level_1
2025-04-19,30
2025-04-20,32
2025-04-21,31
2025-04-22,35
2025-04-23,36
2025-04-24,34
2025-04-25,33


In [14]:
df_temps['Temp_Change'] = df_temps['Temperature'].pct_change()
df_temps

Unnamed: 0_level_0,Temperature,Temp_Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2025-04-19,30,
2025-04-20,32,0.066667
2025-04-21,31,-0.03125
2025-04-22,35,0.129032
2025-04-23,36,0.028571
2025-04-24,34,-0.055556
2025-04-25,33,-0.029412


In [15]:
# Example 7 - Dealing with null values prior to diff
data = {
    'Time': ['08:00', '08:15', '08:30', '08:45', '09:00', '09:15'],
    'Passengers': [120, 125, np.nan, 130, 128, np.nan]
}
nan_df = pd.DataFrame(data)

nan_df['diff'] = nan_df['Passengers'].fillna(100).pct_change()

nan_df

Unnamed: 0,Time,Passengers,diff
0,08:00,120.0,
1,08:15,125.0,0.041667
2,08:30,,-0.2
3,08:45,130.0,0.3
4,09:00,128.0,-0.015385
5,09:15,,-0.21875


In [17]:
# Example 8 - Different ways to fill in the first value

# Backward Fill uses next available value. If the next row isn’t null it’ll populate it’s value 
df_temps['Filled_bfill'] = df_temps['Temp_Change'].bfill()
df_temps

Unnamed: 0_level_0,Temperature,Temp_Change,Filled_bfill
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-04-19,30,,0.066667
2025-04-20,32,0.066667,0.066667
2025-04-21,31,-0.03125,-0.03125
2025-04-22,35,0.129032,0.129032
2025-04-23,36,0.028571,0.028571
2025-04-24,34,-0.055556,-0.055556
2025-04-25,33,-0.029412,-0.029412


In [18]:
df_temps['Filled_zero'] = df_temps['Temp_Change'].fillna(0)
df_temps

Unnamed: 0_level_0,Temperature,Temp_Change,Filled_bfill,Filled_zero
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-04-19,30,,0.066667,0.0
2025-04-20,32,0.066667,0.066667,0.066667
2025-04-21,31,-0.03125,-0.03125,-0.03125
2025-04-22,35,0.129032,0.129032,0.129032
2025-04-23,36,0.028571,0.028571,0.028571
2025-04-24,34,-0.055556,-0.055556,-0.055556
2025-04-25,33,-0.029412,-0.029412,-0.029412


In [19]:
mean_change = df_temps['Temp_Change'].mean()

df_temps['Filled_mean'] = df_temps['Temp_Change'].fillna(mean_change)

df_temps

Unnamed: 0_level_0,Temperature,Temp_Change,Filled_bfill,Filled_zero,Filled_mean
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-04-19,30,,0.066667,0.0,0.018009
2025-04-20,32,0.066667,0.066667,0.066667,0.066667
2025-04-21,31,-0.03125,-0.03125,-0.03125,-0.03125
2025-04-22,35,0.129032,0.129032,0.129032,0.129032
2025-04-23,36,0.028571,0.028571,0.028571,0.028571
2025-04-24,34,-0.055556,-0.055556,-0.055556,-0.055556
2025-04-25,33,-0.029412,-0.029412,-0.029412,-0.029412


In [20]:
# Example 9 - Groupby
data = {
    'Date': pd.date_range(start='2025-01-01', periods=12, freq='ME'),
    'Event': ['5K', '10K', 'Half', 'Marathon'] * 3,
    'Time': [25, 55, 110, 240, 24, 54, 108, 238, 23, 52, 107, 237]
}
df_running = pd.DataFrame(data)
df_running.sort_values(by=['Event', 'Date'], inplace=True)
df_running.reset_index(drop=True, inplace=True)

In [21]:
df_running['Time_Change'] = df_running.groupby('Event')['Time'].pct_change()
df_running

Unnamed: 0,Date,Event,Time,Time_Change
0,2025-02-28,10K,55,
1,2025-06-30,10K,54,-0.018182
2,2025-10-31,10K,52,-0.037037
3,2025-01-31,5K,25,
4,2025-05-31,5K,24,-0.04
5,2025-09-30,5K,23,-0.041667
6,2025-03-31,Half,110,
7,2025-07-31,Half,108,-0.018182
8,2025-11-30,Half,107,-0.009259
9,2025-04-30,Marathon,240,
