In [1]:
# https://dplyr.tidyverse.org/reference/distinct.html
from datar.datasets import starwars
from datar.all import *

%run nb_helpers.py
nb_header(distinct, n_distinct, book='distinct')



### # distinct  

##### Select only unique/distinct rows from a data frame.

The original API:  
https://dplyr.tidyverse.org/reference/distinct.html  

##### Args:
&emsp;&emsp;`_data`: The dataframe  
&emsp;&emsp;`*args`: and  
&emsp;&emsp;`**kwargs`: Optional variables to use when determining  
&emsp;&emsp;&emsp;&emsp;uniqueness.  

&emsp;&emsp;`_keep_all`: If TRUE, keep all variables in _data  

##### Returns:
&emsp;&emsp;A dataframe without duplicated rows in _data  


### # n_distinct  

##### Get the length of distinct elements


In [2]:
df = tibble(
  x=sample(range(10), 100, replace=True),
  y=sample(range(10), 100, replace=True)
)
nrow(df)

100

In [3]:
nrow(distinct(df))

60

In [4]:
df >> distinct(f.x, f.y) >> nrow()

60

In [5]:
df >> distinct(f.x)

Unnamed: 0,x
,<int64>
0.0,0
2.0,2
3.0,3
4.0,4
7.0,1
8.0,6
11.0,5
13.0,8
15.0,9


In [6]:
df >> distinct(f.y)

Unnamed: 0,y
,<int64>
0.0,1
1.0,5
4.0,2
5.0,7
6.0,0
9.0,8
12.0,6
13.0,3
15.0,9


In [7]:
df >> distinct(f.x, _keep_all=True)

Unnamed: 0,x,y
,<int64>,<int64>
0.0,0,1
2.0,2,1
3.0,3,5
4.0,4,2
7.0,1,7
8.0,6,7
11.0,5,0
13.0,8,3
15.0,9,9


In [8]:
df >> distinct(f.y, _keep_all=True)

Unnamed: 0,x,y
,<int64>,<int64>
0.0,0,1
1.0,0,5
4.0,4,2
5.0,0,7
6.0,0,0
9.0,6,8
12.0,2,6
13.0,8,3
15.0,9,9


In [9]:
df >> distinct(diff=abs(f.x-f.y))

Unnamed: 0,diff
,<int64>
0.0,1
1.0,5
3.0,2
5.0,7
6.0,0
7.0,6
10.0,4
23.0,3
26.0,9


In [10]:
starwars >> distinct(across(contains("color")))

Unnamed: 0,hair_color,skin_color,eye_color
,<object>,<object>,<object>
0,blond,fair,blue
1,,gold,yellow
2,,"white, blue",red
3,none,white,yellow
...,...,...,...
4,brown,light,brown
79,none,pale,white
81,black,dark,dark
82,brown,light,hazel


In [11]:
df = tibble(
  g=[1, 1, 2, 2],
  x=[1, 1, 2, 1]
) >> group_by(f.g)

df >> distinct(f.x) 

Unnamed: 0,g,x
,<int64>,<int64>
0.0,1,1
1.0,2,2
2.0,2,1
