In [1]:
# https://dplyr.tidyverse.org/reference/mutate-joins.html
from datar.datasets import band_members, band_instruments, band_instruments2
from datar.all import *

%run nb_helpers.py
nb_header(inner_join, left_join, right_join, full_join)

### # inner_join  

##### Mutating joins including all rows in x and y.

##### Args:
&emsp;&emsp;x, y: A pair of data frames  
&emsp;&emsp;`by`: A character vector of variables to join by.  
&emsp;&emsp;`copy`: If x and y are not from the same data source, and copy is  
&emsp;&emsp;&emsp;&emsp;TRUE, then y will be copied into the same src as x.  
&emsp;&emsp;&emsp;&emsp;This allows you to join tables across srcs, but it is a  
&emsp;&emsp;&emsp;&emsp;potentially expensive operation so you must opt into it.  

&emsp;&emsp;`suffix`: If there are non-joined duplicate variables in x and y,  
&emsp;&emsp;&emsp;&emsp;these suffixes will be added to the output to disambiguate them.  
&emsp;&emsp;&emsp;&emsp;Should be a character vector of length 2.  

&emsp;&emsp;`keep`: Should the join keys from both x and y be preserved in the output?  

##### Returns:
&emsp;&emsp;The joined dataframe  


### # left_join  

##### Mutating joins including all rows in x.

See inner_join()  


### # right_join  

##### Mutating joins including all rows in y.

See inner_join()  


### # full_join  

##### Mutating joins including all rows in x or y.

See inner_join()  


In [2]:
band_members >> inner_join(band_instruments)

Unnamed: 0,name,band,plays
0,John,Beatles,guitar
1,Paul,Beatles,bass


In [3]:
band_members >> left_join(band_instruments)

Unnamed: 0,name,band,plays
0,Mick,Stones,
1,John,Beatles,guitar
2,Paul,Beatles,bass


In [4]:
band_members >> right_join(band_instruments)

Unnamed: 0,name,band,plays
0,John,Beatles,guitar
1,Paul,Beatles,bass
2,Keith,,guitar


In [5]:
band_members >> full_join(band_instruments)

Unnamed: 0,name,band,plays
0,Mick,Stones,
1,John,Beatles,guitar
2,Paul,Beatles,bass
3,Keith,,guitar


In [6]:
band_members >> inner_join(band_instruments, by=f.name)

Unnamed: 0,name,band,plays
0,John,Beatles,guitar
1,Paul,Beatles,bass


In [7]:
band_members >> full_join(band_instruments2, by={'name': 'artist'})

Unnamed: 0,name,band,plays
0,Mick,Stones,
1,John,Beatles,guitar
2,Paul,Beatles,bass
3,,,guitar


In [8]:
band_members >> full_join(band_instruments2, by={'name': 'artist'}, keep=True)

Unnamed: 0,name,band,artist,plays
0,Mick,Stones,,
1,John,Beatles,John,guitar
2,Paul,Beatles,Paul,bass
3,,,Keith,guitar


In [9]:
df1 = tibble(x=[1,2,3])
df2 = tibble(x=[1,1,2], y=["first", "second", "third"])
df1 >> left_join(df2)

Unnamed: 0,x,y
0,1,first
1,1,second
2,2,third
3,3,


In [10]:
df1 = tibble(x=[1, NA], y=2)
df2 = tibble(x=[1, NA], z=3)
left_join(df1, df2) # na_matches not supported yet

Unnamed: 0,x,y,z
0,1.0,2,3
1,,2,3
