# Setup

In [1]:
from piper.defaults import *

piper v0.1.0: Monday, 29 March 2021 19:08:53


# Import data

In [2]:
salesreps = 'inputs/006-MSPTDA-dSalesRepTable.xlsx'
salesreps = pd.read_excel(salesreps)
%piper salesreps >> head() >> display()

territories = 'inputs/006-MSPTDA-dTerritoryTable.txt'
territories = pd.read_csv(territories, sep='\t')
%piper territories >> head()

134 rows, 4 columns


Unnamed: 0,SalesRepID,SalesRepName,HireDate,TerritoryID
0,872-44-12,Sol Marroquin,1994-02-03,NLD
1,872-44-22,Kiera Mcfall,1994-09-10,MEX
2,872-44-32,Raven Beatty,1995-03-19,POL
3,872-44-42,Elinore Dees,1995-06-08,HND


126 rows, 2 columns


Unnamed: 0,TerritoryID,Territory
0,AFG,Afghanistan
1,ALB,Albania
2,APR,Aprine
3,ARE,United Arab Emirates


# Examples

## Example 1: Sales/Territories

In [3]:
%%piper
sales_with_territories <- salesreps
>> inner_join(territories)

In [4]:
head(sales_with_territories)

134 rows, 5 columns


Unnamed: 0,SalesRepID,SalesRepName,HireDate,TerritoryID,Territory
0,872-44-12,Sol Marroquin,1994-02-03,NLD,Netherlands
1,872-49-42,Sharlene Melendez,2007-01-26,NLD,Netherlands
2,872-44-22,Kiera Mcfall,1994-09-10,MEX,Mexico
3,872-44-32,Raven Beatty,1995-03-19,POL,Poland


In [5]:
%%piper
sales_with_territories
>> count('Territory', percent=True) 
>> head()

82 rows, 3 columns


Unnamed: 0_level_0,n,%,cum %
Territory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Kazakhstan,5,3.73,3.73
Argentina,4,2.99,6.72
Mongolia,4,2.99,9.7
Philippines,3,2.24,11.94


In [6]:
%%piper
sales_with_territories 
>> count('HireDate')
>> where("n > 1")

Unnamed: 0_level_0,n,%,cum %
HireDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2008-12-19,2,1.49,1.49


## Example 2: left/right

In [7]:
left = pd.DataFrame({
    'id': [1, 2, 3, 4, 5],
    'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
    'subject_id': ['sub1', 'sub2', 'sub4', 'sub6', 'sub5']
})
display(left)

right = pd.DataFrame({
    'id': [1, 2, 3, 4, 5],
    'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
    'subject_id': ['sub2', 'sub4', 'sub3', 'sub6', 'sub5']
})
display(right)

Unnamed: 0,id,Name,subject_id
0,1,Alex,sub1
1,2,Amy,sub2
2,3,Allen,sub4
3,4,Alice,sub6
4,5,Ayoung,sub5


Unnamed: 0,id,Name,subject_id
0,1,Billy,sub2
1,2,Brian,sub4
2,3,Bran,sub3
3,4,Bryce,sub6
4,5,Betty,sub5


### outer

In [8]:
%%piper
left 
>> outer_join(right, left_index=True, right_index=True,
           how='outer').style.highlight_null(null_color='yellow')

Unnamed: 0,id_x,Name_x,subject_id_x,id_y,Name_y,subject_id_y
0,1,Alex,sub1,1,Billy,sub2
1,2,Amy,sub2,2,Brian,sub4
2,3,Allen,sub4,3,Bran,sub3
3,4,Alice,sub6,4,Bryce,sub6
4,5,Ayoung,sub5,5,Betty,sub5


In [9]:
%%piper
left 
>> outer_join(right, on='id').style.highlight_null(null_color='yellow')

Unnamed: 0,id,Name_x,subject_id_x,Name_y,subject_id_y
0,1,Alex,sub1,Billy,sub2
1,2,Amy,sub2,Brian,sub4
2,3,Allen,sub4,Bran,sub3
3,4,Alice,sub6,Bryce,sub6
4,5,Ayoung,sub5,Betty,sub5


In [10]:
%%piper
left 
>> outer_join(right, on='subject_id').style.highlight_null(null_color='yellow')

Unnamed: 0,id_x,Name_x,subject_id,id_y,Name_y
0,1.0,Alex,sub1,,
1,2.0,Amy,sub2,1.0,Billy
2,3.0,Allen,sub4,2.0,Brian
3,4.0,Alice,sub6,4.0,Bryce
4,5.0,Ayoung,sub5,5.0,Betty
5,,,sub3,3.0,Bran


### left

In [11]:
%%piper
left 
>> left_join(right, on='id').style.highlight_null(null_color='yellow')

Unnamed: 0,id,Name_x,subject_id_x,Name_y,subject_id_y
0,1,Alex,sub1,Billy,sub2
1,2,Amy,sub2,Brian,sub4
2,3,Allen,sub4,Bran,sub3
3,4,Alice,sub6,Bryce,sub6
4,5,Ayoung,sub5,Betty,sub5


In [12]:
%%piper
left 
>> left_join(right, on='subject_id').style.highlight_null(null_color='yellow')

Unnamed: 0,id_x,Name_x,subject_id,id_y,Name_y
0,1,Alex,sub1,,
1,2,Amy,sub2,1.0,Billy
2,3,Allen,sub4,2.0,Brian
3,4,Alice,sub6,4.0,Bryce
4,5,Ayoung,sub5,5.0,Betty


### right

In [13]:
%%piper
left 
>> right_join(right, on=['id', 'subject_id'], how='right').style.highlight_null(null_color='yellow')

Unnamed: 0,id,Name_x,subject_id,Name_y
0,1,,sub2,Billy
1,2,,sub4,Brian
2,3,,sub3,Bran
3,4,Alice,sub6,Bryce
4,5,Ayoung,sub5,Betty
