# Setup

In [1]:
from piper import piper
from piper.defaults import *
from piper.verbs import info, where, count, order_by, assign, counts, adorn, head, flatten_cols, add_group_calc

piper version 0.0.7, last run: Friday, 26 February 2021 14:50:10


# Import data

In [2]:
url = 'https://github.com/datagy/pivot_table_pandas/raw/master/sample_pivot.xlsx'

%piper df <- pd.read_excel(url, parse_dates=['Date'])
%piper df >> info()

Dataframe with (rows, cols) (1000, 5) consumes 0.15 Mb


Unnamed: 0,columns,data_type,unique,isna,isnull,total_count
0,Date,datetime64[ns],347,0,0,1000
1,Region,object,4,0,0,1000
2,Type,object,3,0,0,1000
3,Units,float64,33,89,89,1000
4,Sales,int64,329,0,0,1000


## Region (ordered categorical)

In [3]:
%%piper
df >> count('Region') >> ['Region'] >> list()

['East', 'North', 'South', 'West']

In [4]:
region_categories = pd.CategoricalDtype(['North', 'South', 'East', 'West'], ordered=True)
df.Region = df.Region.astype(region_categories)

### Region/Type => counts() >> adorn() - ignore_row_index=False

In [5]:
%%piper 
df >> counts(['Region', 'Type'], sort_values=None)
   >> adorn(axis=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,n
Region,Type,Unnamed: 2_level_1
North,Children's Clothing,85
North,Men's Clothing,89
North,Women's Clothing,142
South,Children's Clothing,45
South,Men's Clothing,39
South,Women's Clothing,53
East,Children's Clothing,113
East,Men's Clothing,122
East,Women's Clothing,176
West,Children's Clothing,42


### Region/Type => counts() >> adorn() - ignore_row_index=True

In [6]:
%%piper 
df  >> counts(['Region', 'Type'], sort_values=None).reset_index()
    >> adorn(axis='both', ignore_row_index=True).fillna('')

Unnamed: 0,Region,Type,n,All
0,North,Children's Clothing,85,85
1,North,Men's Clothing,89,89
2,North,Women's Clothing,142,142
3,South,Children's Clothing,45,45
4,South,Men's Clothing,39,39
5,South,Women's Clothing,53,53
6,East,Children's Clothing,113,113
7,East,Men's Clothing,122,122
8,East,Women's Clothing,176,176
9,West,Children's Clothing,42,42


### Region => count() >> adorn() - ignore_row_index=True

In [7]:
%%piper 
df >> count('Region', add_total=False) >> adorn(ignore_row_index=True)

Unnamed: 0,Region,n
0,East,411
1,North,316
2,South,137
3,West,136
4,All,1000


## Units (missing data)

In [8]:
%piper count(df, 'Units') >> head(5)

34 rows, 2 columns


Unnamed: 0,Units,n
0,,89
1,18.0,42
2,26.0,37
3,13.0,36
4,25.0,34


In [9]:
%piper df.query('Units.isna()') >> head(4)

89 rows, 5 columns


Unnamed: 0,Date,Region,Type,Units,Sales
11,2020-08-09,North,Men's Clothing,,270
19,2020-03-08,North,Men's Clothing,,644
25,2020-05-23,North,Men's Clothing,,240
37,2020-04-24,North,Men's Clothing,,900


### Units --> fillna to ensure pandas aggregation functions work correctly

In [10]:
df.Units.fillna(0, inplace=True)

## Region (summary/groupby) with totals

### Region => groupby() >> adorn() - ignore_row_index=True

In [11]:
%%piper 
df.groupby(['Region']).agg(TotalSales=('Sales', 'sum'))
>> pd.DataFrame.reset_index()
>> adorn(ignore_row_index=True)

Unnamed: 0,Region,TotalSales
0,North,138700
1,South,59315
2,East,167763
3,West,61476
4,All,427254


### Region/Type => groupby() >> adorn() - ignore_row_index=False

In [12]:
g1 = df.groupby(['Region', 'Type']).agg(TotalSales=('Sales', 'sum'))
g1 = adorn(g1, axis='both').astype(int)
head(g1, g1.shape[0])

13 rows, 2 columns


Unnamed: 0_level_0,Unnamed: 1_level_0,TotalSales,All
Region,Type,Unnamed: 2_level_1,Unnamed: 3_level_1
North,Children's Clothing,37306,37306
North,Men's Clothing,39975,39975
North,Women's Clothing,61419,61419
South,Children's Clothing,18570,18570
South,Men's Clothing,18542,18542
South,Women's Clothing,22203,22203
East,Children's Clothing,45849,45849
East,Men's Clothing,51685,51685
East,Women's Clothing,70229,70229
West,Children's Clothing,20182,20182


In [13]:
%%piper 
df.groupby(['Type', 'Region']).agg(TotalSales=('Sales', 'sum')).unstack()
>> adorn(axis='both', ignore_row_index=False)
>> flatten_cols(remove_prefix='TotalSales')
>> adorn(axis='column', col_row_name='total_north_south', 
         columns=['North', 'South'], ignore_row_index=False)
>> adorn(axis='column', col_row_name='total_east_west', 
         columns=['East', 'West'], ignore_row_index=False)
>> adorn(axis='column', col_row_name='Total', 
         columns=['total_north_south', 'total_east_west'], ignore_row_index=False)
>> head(4)

4 rows, 8 columns


Unnamed: 0,North,South,East,West,All,total_north_south,total_east_west,Total
Children's Clothing,37306,18570,45849,20182,121907,55876,66031,121907
Men's Clothing,39975,18542,51685,19077,129279,58517,70762,129279
Women's Clothing,61419,22203,70229,22217,176068,83622,92446,176068
All,138700,59315,167763,61476,427254,198015,229239,427254


### Region/Type => groupby() >> adorn() >> flatten_cols()

In [14]:
%%piper 
df.groupby(['Type', 'Region']).agg(TotalUnits=('Units', 'count')).unstack()
>> adorn(axis='both', ignore_row_index=False).astype(int)
>> flatten_cols(remove_prefix='TotalUnits')
>> head(4)

4 rows, 5 columns


Unnamed: 0,North,South,East,West,All
Children's Clothing,85,45,113,42,285
Men's Clothing,89,39,122,41,291
Women's Clothing,142,53,176,53,424
All,316,137,411,136,1000


In [15]:
%%piper 
df.groupby(['Type', 'Region']).agg(TotalUnits=('Units', 'count'))
>> add_group_calc(index=['Type'], function='rank')
>> assign(rank=lambda x: x['rank'].astype(int))
>> order_by(['Type', 'rank'], ascending=[True, True])
# >> adorn(axis='row', columns='TotalUnits', ignore_row_index=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,TotalUnits,rank
Type,Region,Unnamed: 2_level_1,Unnamed: 3_level_1
Children's Clothing,East,113,1
Children's Clothing,North,85,2
Children's Clothing,South,45,3
Children's Clothing,West,42,4
Men's Clothing,East,122,1
Men's Clothing,North,89,2
Men's Clothing,West,41,3
Men's Clothing,South,39,4
Women's Clothing,East,176,1
Women's Clothing,North,142,2


In [16]:
%%piper 
df.groupby(['Type', 'Region']).agg(TotalUnits=('Units', 'count'))
>> where("Region.isin(['East', 'North'])")
>> assign(calc_field=lambda x: x.TotalUnits * 100)
>> add_group_calc(index=['Type'], column='rank', value='TotalUnits', function='rank')
>> assign(rank=lambda x: x['rank'].astype(int))
>> order_by(['Type', 'rank'], ascending=[True, True])
>> adorn(axis='row', columns=['TotalUnits', 'calc_field'], ignore_row_index=False)
>> pd.DataFrame.reset_index()

Unnamed: 0,Type,Region,TotalUnits,calc_field,rank
0,Children's Clothing,East,113,11300,1.0
1,Children's Clothing,North,85,8500,2.0
2,Men's Clothing,East,122,12200,1.0
3,Men's Clothing,North,89,8900,2.0
4,Women's Clothing,East,176,17600,1.0
5,Women's Clothing,North,142,14200,2.0
6,,All,727,72700,


### Region/Type => pivot_table()

In [17]:
%%piper

df.pivot_table(index='Type',
               columns='Region',
               values='Date', 
               margins=True,
               aggfunc='count')
# >> adorn()

Region,North,South,East,West,All
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Children's Clothing,85,45,113,42,285
Men's Clothing,89,39,122,41,291
Women's Clothing,142,53,176,53,424
All,316,137,411,136,1000
