In [1]:
# https://dplyr.tidyverse.org/reference/ranking.html
import numpy
from pandas.core.series import Series

from datar.datasets import mtcars
from datar.all import *

%run nb_helpers.py
nb_header(row_number, min_rank, dense_rank, percent_rank, cume_dist, ntile)



### # row_number  

##### Gives the row number

See https://dplyr.tidyverse.org/reference/ranking.html  

##### Args:
&emsp;&emsp;`x`: a vector of values to rank.  
&emsp;&emsp;&emsp;&emsp;Otherwise it'll depend on `_data`  

##### Returns:
&emsp;&emsp;The row number of `x` or the data frame (0-based)  


### # min_rank  

##### Rank the data using min method


### # dense_rank  

##### Rank the data using dense method


### # percent_rank  

##### Rank the data using percent_rank method


### # cume_dist  

##### Rank the data using percent_rank method


### # ntile  

##### A rough rank, which breaks the input vector into `n` buckets.

##### Note:
&emsp;&emsp;The output tiles are 0-based.  
&emsp;&emsp;The result is slightly different from dplyr's ntile.  
&emsp;&emsp;>>> ntile(c(1,2,NA,1,0,NA), 2) # dplyr  
&emsp;&emsp;>>> # 1 2 NA 2 1 NA  
&emsp;&emsp;>>> ntile([1,2,NA,1,0,NA], n=2) # datar  
&emsp;&emsp;>>> # [0, 1, NA, 0, 0, NA]  
&emsp;&emsp;>>> # Categories (2, int64): [0 < 1]  


In [2]:
df = tibble(x=c(5, 1, 3, 2, 2, numpy.nan))
df >> mutate(
    row_number=row_number(),
    min_rank=min_rank(f.x), 
    dense_rank=dense_rank(f.x),
    percent_rank=percent_rank(f.x),
    cume_dist=cume_dist(f.x),
    ntile=ntile(f.x, 2)
)

Unnamed: 0,x,row_number,min_rank,dense_rank,percent_rank,cume_dist,ntile
,<float64>,<float64>,<float64>,<float64>,<float64>,<float64>,<category>
0.0,5.0,1.0,5.0,4.0,1.00,1.0,1
1.0,1.0,2.0,1.0,1.0,0.00,0.2,0
2.0,3.0,3.0,4.0,3.0,0.75,0.8,0
3.0,2.0,4.0,2.0,2.0,0.25,0.6,0
4.0,2.0,5.0,2.0,2.0,0.25,0.6,0
5.0,,6.0,,,,,


In [3]:
tibble(x=range(8)) >> mutate(ntile=ntile(f.x, 3))

Unnamed: 0,x,ntile
,<int64>,<category>
0.0,0,0
1.0,1,0
2.0,2,0
3.0,3,1
4.0,4,1
5.0,5,2
6.0,6,2
7.0,7,2


In [4]:
mtcars >> mutate(n=row_number() == 0)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb,n
,<float64>,<int64>,<float64>,<int64>,<float64>,<float64>,<float64>,<int64>,<int64>,<int64>,<int64>,<bool>
Mazda RX4,21.0,6,160.0,110,3.90,2.620,16.46,0,1,4,4,False
Mazda RX4 Wag,21.0,6,160.0,110,3.90,2.875,17.02,0,1,4,4,False
Datsun 710,22.8,4,108.0,93,3.85,2.320,18.61,1,1,4,1,False
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1,False
Hornet Sportabout,18.7,8,360.0,175,3.15,3.440,17.02,0,0,3,2,False
Valiant,18.1,6,225.0,105,2.76,3.460,20.22,1,0,3,1,False
Duster 360,14.3,8,360.0,245,3.21,3.570,15.84,0,0,3,4,False
Merc 240D,24.4,4,146.7,62,3.69,3.190,20.00,1,0,4,2,False
Merc 230,22.8,4,140.8,95,3.92,3.150,22.90,1,0,4,2,False


In [5]:
mtcars >> filter(0 <= row_number() < 10)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
,<float64>,<int64>,<float64>,<int64>,<float64>,<float64>,<float64>,<int64>,<int64>,<int64>,<int64>
Mazda RX4,21.0,6,160.0,110,3.90,2.620,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160.0,110,3.90,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.320,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.440,17.02,0,0,3,2
Valiant,18.1,6,225.0,105,2.76,3.460,20.22,1,0,3,1
Duster 360,14.3,8,360.0,245,3.21,3.570,15.84,0,0,3,4
Merc 240D,24.4,4,146.7,62,3.69,3.190,20.00,1,0,4,2
Merc 230,22.8,4,140.8,95,3.92,3.150,22.90,1,0,4,2
