In [1]:
# https://dplyr.tidyverse.org/reference/lead-lag.html
%run nb_helpers.py

from datar.all import *

nb_header(lead, lag, book='lead-lag')

### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ lead</div>

##### Shift a vector by `n` positions.

The original API:  
https://dplyr.tidyverse.org/reference/lead.html  

##### Args:
&emsp;&emsp;`x`: A vector  
&emsp;&emsp;`n`: The number of positions to shift.  
&emsp;&emsp;`default`: The default value to use for positions that don't exist.  
&emsp;&emsp;`order_by`: A vector of column names to order by.  

##### Returns:
&emsp;&emsp;A vector  


### <div style="background-color: #EEE; padding: 5px 0 8px 0">★ lag</div>

##### Shift a vector by `n` positions.

The original API:  
https://dplyr.tidyverse.org/reference/lag.html  

##### Args:
&emsp;&emsp;`x`: A vector  
&emsp;&emsp;`n`: The number of positions to shift.  
&emsp;&emsp;`default`: The default value to use for positions that don't exist.  
&emsp;&emsp;`order_by`: A vector of column names to order by.  

##### Returns:
&emsp;&emsp;A vector  


In [2]:
x = [1,2,3,4,5]

lag(x)

0    NaN
1    1.0
2    2.0
3    3.0
4    4.0
dtype: float64

In [3]:
lead(x)

0    2.0
1    3.0
2    4.0
3    5.0
4    NaN
dtype: float64

In [4]:
tibble(behind=lag(x), x=x, ahead=lead(x))

Unnamed: 0,behind,x,ahead
,<float64>,<int64>,<float64>
0.0,,1,2.0
1.0,1.0,2,3.0
2.0,2.0,3,4.0
3.0,3.0,4,5.0
4.0,4.0,5,


In [5]:
lag(x, n=1)

0    NaN
1    1.0
2    2.0
3    3.0
4    4.0
dtype: float64

In [6]:
lag(x, n=2)

0    NaN
1    NaN
2    1.0
3    2.0
4    3.0
dtype: float64

In [7]:
lead(x, n=1)

0    2.0
1    3.0
2    4.0
3    5.0
4    NaN
dtype: float64

In [8]:
lead(x, n=2)

0    3.0
1    4.0
2    5.0
3    NaN
4    NaN
dtype: float64

In [9]:
lag(x, default=0)

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [10]:
lead(x, default=6)

0    2
1    3
2    4
3    5
4    6
dtype: int64

In [11]:
scrambled = slice_sample(
    tibble(year=[2000, 2001, 2002, 2003, 2004, 2005], 
           value=[a**2 for a in range(6)]), 
    prop=1
) 

scrambled >> mutate(previous_year_value = lag(f.value)) >> arrange(f.year)

Unnamed: 0,year,value,previous_year_value
,<int64>,<int64>,<float64>
0.0,2000,0,1.0
1.0,2001,1,9.0
2.0,2002,4,
3.0,2003,9,4.0
4.0,2004,16,0.0
5.0,2005,25,16.0


In [12]:
# Use this for lag(value, order_by = year) instead
scrambled >> mutate(previous_year_value = lag(f.value, order_by=f.year)) >> arrange(f.year)


Unnamed: 0,year,value,previous_year_value
,<int64>,<int64>,<float64>
0.0,2000,0,0.0
1.0,2001,1,
2.0,2002,4,4.0
3.0,2003,9,1.0
4.0,2004,16,9.0
5.0,2005,25,16.0
