In [1]:
# https://dplyr.tidyverse.org/reference/slice.html

from datar.datasets import mtcars
from datar.all import *

%run nb_helpers.py
nb_header(slice, slice_head, slice_max, slice_min, slice_sample, slice_tail)

### # slice  

##### Index rows by their (integer) locations

Original APIs https://dplyr.tidyverse.org/reference/slice.html  

##### Args:
&emsp;&emsp;`_data`: The dataframe  
&emsp;&emsp;`rows`: The indexes  
&emsp;&emsp;&emsp;&emsp;Ranges can be specified as `f[1:3]`  
&emsp;&emsp;&emsp;&emsp;Note that the negatives mean differently than in dplyr.  
&emsp;&emsp;&emsp;&emsp;In dplyr, negative numbers meaning exclusive, but here negative  
&emsp;&emsp;&emsp;&emsp;numbers are negative indexes like how they act in python indexing.  
&emsp;&emsp;&emsp;&emsp;For exclusive indexes, you need to use inversion. For example:  
&emsp;&emsp;&emsp;&emsp;`slice(df, ~f[:3])` excludes first 3 rows. You can also do:  
&emsp;&emsp;&emsp;&emsp;`slice(df, ~c(f[:3], 6))` to exclude multiple set of rows.  
&emsp;&emsp;&emsp;&emsp;To exclude a single row, you can't do this directly: `slice(df, ~1)`  
&emsp;&emsp;&emsp;&emsp;since `~1` is directly compiled into a number. You can do this  

&emsp;&emsp;&emsp;&emsp;`instead`: `slice(df, ~c(1))`  

&emsp;&emsp;&emsp;&emsp;Exclusive and inclusive expressions are allowed to be mixed, unlike  
&emsp;&emsp;&emsp;&emsp;in `dplyr`. They are expanded in the order they are passed in.  

&emsp;&emsp;`_preserve`: Relevant when the _data input is grouped.  
&emsp;&emsp;&emsp;&emsp;If _preserve = FALSE (the default), the grouping structure is  
&emsp;&emsp;&emsp;&emsp;recalculated based on the resulting data,  
&emsp;&emsp;&emsp;&emsp;otherwise the grouping is kept as is.  

&emsp;&emsp;`_base0`: If rows are selected by indexes, whether they are 0-based.  
&emsp;&emsp;&emsp;&emsp;If not provided, `datar.base.get_option('index.base.0')` is used.  

##### Returns:
&emsp;&emsp;The sliced dataframe  


### # slice_head  

##### Select first rows

##### Args:
&emsp;&emsp;`_data`: The dataframe.  
&emsp;&emsp;`n`: and  
&emsp;&emsp;`prop`: Provide either n, the number of rows, or prop, the proportion of  
&emsp;&emsp;&emsp;&emsp;rows to select.  
&emsp;&emsp;&emsp;&emsp;If neither are supplied, n = 1 will be used.  
&emsp;&emsp;&emsp;&emsp;If n is greater than the number of rows in the group (or prop > 1),  
&emsp;&emsp;&emsp;&emsp;the result will be silently truncated to the group size.  
&emsp;&emsp;&emsp;&emsp;If the proportion of a group size is not an integer,  
&emsp;&emsp;&emsp;&emsp;it is rounded down.  

##### Returns:
&emsp;&emsp;The sliced dataframe  


### # slice_max  

##### select rows with highest values of a variable.

See Also:  
&emsp;&emsp;[`slice_head()`](datar.dplyr.slice.slice_head)  


### # slice_min  

##### select rows with lowest values of a variable.

See Also:  
&emsp;&emsp;[`slice_head()`](datar.dplyr.slice.slice_head)  


### # slice_sample  

##### Randomly selects rows.

See Also:  
&emsp;&emsp;[`slice_head()`](datar.dplyr.slice.slice_head)  


### # slice_tail  

##### Select last rows

See Also:  
&emsp;&emsp;[`slice_head()`](datar.dplyr.slice.slice_head)  


In [2]:
mtcars >> slice(1) # 1-based by default

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4


In [3]:
mtcars >> slice(n()) # last row

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,1,4,2


In [4]:
# or
mtcars >> slice(-1) # for both 0-based and 1-based

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,1,4,2


In [5]:
mtcars >> slice(f[5:n()])

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4
Merc 280C,17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4
Merc 450SE,16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3
Merc 450SL,17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3
Merc 450SLC,15.2,8,275.8,180,3.07,3.78,18.0,0,0,3,3


In [6]:
# or
mtcars >> slice(~f[:4])

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4
Merc 280C,17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4
Merc 450SE,16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3
Merc 450SL,17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3
Merc 450SLC,15.2,8,275.8,180,3.07,3.78,18.0,0,0,3,3


In [7]:
mtcars >> slice(-f[:4]) 

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Ford Pantera L,15.8,8,351.0,264,4.22,3.17,14.5,0,1,5,4
Ferrari Dino,19.7,6,145.0,175,3.62,2.77,15.5,0,1,5,6
Maserati Bora,15.0,8,301.0,335,3.54,3.57,14.6,0,1,5,8
Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,1,4,2


In [8]:
mtcars >> slice_head(n=5)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [9]:
mtcars >> slice_tail(n=5)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Lotus Europa,30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2
Ford Pantera L,15.8,8,351.0,264,4.22,3.17,14.5,0,1,5,4
Ferrari Dino,19.7,6,145.0,175,3.62,2.77,15.5,0,1,5,6
Maserati Bora,15.0,8,301.0,335,3.54,3.57,14.6,0,1,5,8
Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,1,4,2


In [10]:
mtcars >> slice_min(f.mpg, n=5)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Cadillac Fleetwood,10.4,8,472.0,205,2.93,5.25,17.98,0,0,3,4
Lincoln Continental,10.4,8,460.0,215,3.0,5.424,17.82,0,0,3,4
Camaro Z28,13.3,8,350.0,245,3.73,3.84,15.41,0,0,3,4
Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
Chrysler Imperial,14.7,8,440.0,230,3.23,5.345,17.42,0,0,3,4


In [11]:
mtcars >> slice_max(f.mpg, n=5)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Toyota Corolla,33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1
Fiat 128,32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1
Honda Civic,30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2
Lotus Europa,30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2
Fiat X1-9,27.3,4,79.0,66,4.08,1.935,18.9,1,1,4,1


In [12]:
mtcars >> slice_min(f.cyl, n=1)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
Fiat 128,32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1
Honda Civic,30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2
Toyota Corolla,33.9,4,71.1,65,4.22,1.835,19.9,1,1,4,1
Toyota Corona,21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1
Fiat X1-9,27.3,4,79.0,66,4.08,1.935,18.9,1,1,4,1
Porsche 914-2,26.0,4,120.3,91,4.43,2.14,16.7,0,1,5,2
Lotus Europa,30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2


In [13]:
mtcars >> slice_min(f.cyl, n=1, with_ties=False)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1


In [14]:
mtcars >> slice_min(f.cyl, n=1, with_ties='last')

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,1,4,2


In [15]:
mtcars >> slice_sample(n=5)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Fiat X1-9,27.3,4,79.0,66,4.08,1.935,18.9,1,1,4,1
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Merc 450SL,17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3
Lincoln Continental,10.4,8,460.0,215,3.0,5.424,17.82,0,0,3,4
Ford Pantera L,15.8,8,351.0,264,4.22,3.17,14.5,0,1,5,4


In [16]:
mtcars >> slice_sample(n=5, random_state=8525)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Lotus Europa,30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2
Merc 450SL,17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3
Porsche 914-2,26.0,4,120.3,91,4.43,2.14,16.7,0,1,5,2
Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2


In [17]:
mtcars >> slice_sample(n=5, random_state=8525)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Lotus Europa,30.4,4,95.1,113,3.77,1.513,16.9,1,1,5,2
Merc 450SL,17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3
Porsche 914-2,26.0,4,120.3,91,4.43,2.14,16.7,0,1,5,2
Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2


In [18]:
mtcars >> slice_sample(n=5, random_state=8525, replace=True)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Dodge Challenger,15.5,8,318.0,150,2.76,3.52,16.87,0,0,3,2
Merc 280C,17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4
Dodge Challenger,15.5,8,318.0,150,2.76,3.52,16.87,0,0,3,2
Chrysler Imperial,14.7,8,440.0,230,3.23,5.345,17.42,0,0,3,4
Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4


In [19]:
mtcars >> slice_sample(weight_by=f.wt, n=5)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
Fiat 128,32.4,4,78.7,66,4.08,2.2,19.47,1,1,4,1
Honda Civic,30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2


In [20]:
df = tibble(
  group=['a'] + ['b']*2 + ['c']*4,
  x=runif(7)
)
df >> group_by(f.group) >> slice_head(n=2) 

Unnamed: 0,group,x
0,a,0.381945
1,b,0.316842
2,b,0.413982
3,c,0.391872
4,c,0.905792


In [21]:
df >> group_by(f.group) >> slice_head(prop = 0.5)

Unnamed: 0,group,x
0,b,0.316842
1,c,0.391872
2,c,0.905792


In [22]:
# row_number() is 1-based by default
mtcars >> filter(row_number() == 1) 

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4


In [23]:
mtcars >> filter(row_number() == n() )

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,1,4,2


In [24]:
mtcars >> filter(5 <= row_number() < n()) # cannot filter

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4


In [25]:
mtcars >> filter(between(row_number(), 5, n()))

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
Merc 230,22.8,4,140.8,95,3.92,3.15,22.9,1,0,4,2
Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4
Merc 280C,17.8,6,167.6,123,3.92,3.44,18.9,1,0,4,4
Merc 450SE,16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3
Merc 450SL,17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3
Merc 450SLC,15.2,8,275.8,180,3.07,3.78,18.0,0,0,3,3


In [26]:
df = tibble(
  group=['a']*10 + ['b']*20 + ['c']*40,
  x=runif(70)
)
df >> group_by(f.group) >> slice_min(f.x, 2)

Unnamed: 0,group,x
0,a,0.024409
1,a,0.165794
2,b,0.09144
3,b,0.108035
4,c,0.00367
5,c,0.02351


In [27]:
df >> group_by(f.group) >> slice_max(f.x, 2)

Unnamed: 0,group,x
0,a,0.712412
1,a,0.572901
2,b,0.981173
3,b,0.951627
4,c,0.985669
5,c,0.950341


In [28]:
df >> group_by(f.group) >> slice_sample(2)

Unnamed: 0,group,x
0,a,0.572901
1,a,0.185484
2,b,0.108035
3,b,0.356293
4,c,0.570077
5,c,0.514756
