# Setup

In [1]:
from piper import piper
from piper.defaults import *
from piper.verbs import *

piper version 0.0.9, last run: Tuesday, 09 March 2021 20:04:00


# Import data

In [2]:
url = 'https://github.com/datagy/pivot_table_pandas/raw/master/sample_pivot.xlsx'

%piper df <- pd.read_excel(url, parse_dates=['Date'])
%piper df >> info()

Dataframe consumes 0.15 Mb


Unnamed: 0,columns,type,n,isna,isnull,unique
0,Date,datetime64[ns],1000,0,0,347
1,Region,object,1000,0,0,4
2,Type,object,1000,0,0,3
3,Units,float64,1000,89,89,33
4,Sales,int64,1000,0,0,329


## Region (ordered categorical)

In [3]:
%%piper
df 
>> count('Region')
>> list()

['n', '%', 'cum %']

In [4]:
region_categories = pd.CategoricalDtype(['North', 'South', 'East', 'West'], ordered=True)
df.Region = df.Region.astype(region_categories)

### Region/Type => count() >> adorn() - ignore_row_index=False

In [5]:
%%piper 
df >> count(['Region', 'Type'], sort_values=None, totals=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,n,%,cum %
Region,Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
North,Children's Clothing,85,8.5,8.5
North,Men's Clothing,89,8.9,17.4
North,Women's Clothing,142,14.2,31.6
South,Children's Clothing,45,4.5,36.1
South,Men's Clothing,39,3.9,40.0
South,Women's Clothing,53,5.3,45.3
East,Children's Clothing,113,11.3,56.6
East,Men's Clothing,122,12.2,68.8
East,Women's Clothing,176,17.6,86.4
West,Children's Clothing,42,4.2,90.6


### Region/Type => counts() >> adorn() - ignore_row_index=True

In [6]:
%%piper 
df  >> count(['Region', 'Type'], sort_values=None, totals=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,n,%,cum %
Region,Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
North,Children's Clothing,85,8.5,8.5
North,Men's Clothing,89,8.9,17.4
North,Women's Clothing,142,14.2,31.6
South,Children's Clothing,45,4.5,36.1
South,Men's Clothing,39,3.9,40.0
South,Women's Clothing,53,5.3,45.3
East,Children's Clothing,113,11.3,56.6
East,Men's Clothing,122,12.2,68.8
East,Women's Clothing,176,17.6,86.4
West,Children's Clothing,42,4.2,90.6


### Region => count() >> adorn() - ignore_row_index=True

In [7]:
%%piper 
df >> count('Region', totals=True)

Unnamed: 0,n,%,cum %
East,411,41.1,41.1
North,316,31.6,72.7
South,137,13.7,86.4
West,136,13.6,100.0
Total,1000,100.0,


## Units (missing data)

In [8]:
%piper count(df, 'Units') >> head(5)

33 rows, 3 columns


Unnamed: 0_level_0,n,%,cum %
Units,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
18.0,42,4.61,4.61
26.0,37,4.06,8.67
13.0,36,3.95,12.62
34.0,34,3.73,16.36
7.0,34,3.73,20.09


In [9]:
%piper df.query('Units.isna()') >> head(4)

89 rows, 5 columns


Unnamed: 0,Date,Region,Type,Units,Sales
11,2020-08-09,North,Men's Clothing,,270
19,2020-03-08,North,Men's Clothing,,644
25,2020-05-23,North,Men's Clothing,,240
37,2020-04-24,North,Men's Clothing,,900


### Units --> fillna to ensure pandas aggregation functions work correctly

In [10]:
df.Units.fillna(0, inplace=True)

## Region (summary/groupby) with totals

### Region => groupby() >> adorn() - ignore_row_index=True

In [11]:
%%piper 
df.groupby(['Region']).agg(TotalSales=('Sales', 'sum'))
>> pd.DataFrame.reset_index()
>> adorn(ignore_row_index=True)

Unnamed: 0,Region,TotalSales
0,North,138700
1,South,59315
2,East,167763
3,West,61476
4,All,427254


### Region/Type => groupby() >> adorn() - ignore_row_index=False

In [12]:
g1 = df.groupby(['Region', 'Type']).agg(TotalSales=('Sales', 'sum'))
g1 = adorn(g1, axis='both').astype(int)
head(g1, g1.shape[0])

13 rows, 2 columns


Unnamed: 0_level_0,Unnamed: 1_level_0,TotalSales,All
Region,Type,Unnamed: 2_level_1,Unnamed: 3_level_1
North,Children's Clothing,37306,37306
North,Men's Clothing,39975,39975
North,Women's Clothing,61419,61419
South,Children's Clothing,18570,18570
South,Men's Clothing,18542,18542
South,Women's Clothing,22203,22203
East,Children's Clothing,45849,45849
East,Men's Clothing,51685,51685
East,Women's Clothing,70229,70229
West,Children's Clothing,20182,20182


In [13]:
%%piper 
df.groupby(['Type', 'Region']).agg(TotalSales=('Sales', 'sum')).unstack()
>> adorn(axis='both', ignore_row_index=False)
>> flatten_cols(remove_prefix='TotalSales')
>> adorn(axis='column', col_row_name='total_north_south', 
         columns=['North', 'South'], ignore_row_index=False)
>> adorn(axis='column', col_row_name='total_east_west', 
         columns=['East', 'West'], ignore_row_index=False)
>> adorn(axis='column', col_row_name='Total', 
         columns=['total_north_south', 'total_east_west'], ignore_row_index=False)
>> head(4)

4 rows, 8 columns


Unnamed: 0,North,South,East,West,All,total_north_south,total_east_west,Total
Children's Clothing,37306,18570,45849,20182,121907,55876,66031,121907
Men's Clothing,39975,18542,51685,19077,129279,58517,70762,129279
Women's Clothing,61419,22203,70229,22217,176068,83622,92446,176068
All,138700,59315,167763,61476,427254,198015,229239,427254


### Region/Type => groupby() >> adorn() >> flatten_cols()

In [14]:
%%piper 
df.groupby(['Type', 'Region']).agg(TotalUnits=('Units', 'count')).unstack()
>> adorn(axis='both', ignore_row_index=False).astype(int)
>> flatten_cols(remove_prefix='TotalUnits')
>> head(4)

4 rows, 5 columns


Unnamed: 0,North,South,East,West,All
Children's Clothing,85,45,113,42,285
Men's Clothing,89,39,122,41,291
Women's Clothing,142,53,176,53,424
All,316,137,411,136,1000


In [15]:
%%piper 
df.groupby(['Type', 'Region']).agg(TotalUnits=('Units', 'count'))
>> transform(index='Type', rank=('TotalUnits', 'rank'))
>> assign(rank=lambda x: x['rank'].astype(int))
>> order_by(['Type', 'rank'])
>> adorn(axis='row', columns='TotalUnits', ignore_row_index=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,TotalUnits,rank
Type,Region,Unnamed: 2_level_1,Unnamed: 3_level_1
Children's Clothing,West,42,1.0
Children's Clothing,South,45,2.0
Children's Clothing,North,85,3.0
Children's Clothing,East,113,4.0
Men's Clothing,South,39,1.0
Men's Clothing,West,41,2.0
Men's Clothing,North,89,3.0
Men's Clothing,East,122,4.0
Women's Clothing,South,53,1.0
Women's Clothing,West,53,1.0


In [16]:
%%piper 
df.groupby(['Type', 'Region']).agg(TotalUnits=('Units', 'count'))
>> where("Region.isin(['East', 'North'])")
>> assign(calc_field=lambda x: x.TotalUnits * 100)
>> transform(index='Type', rank=('TotalUnits', 'rank'))
>> assign(rank=lambda x: x['rank'].astype(int))
>> order_by(['Type', 'rank'], ascending=[True, True])
>> adorn(axis='row', columns=['TotalUnits', 'calc_field'], ignore_row_index=False)
>> reset_index()

Unnamed: 0,Type,Region,TotalUnits,calc_field,rank
0,Children's Clothing,North,85,8500,1.0
1,Children's Clothing,East,113,11300,2.0
2,Men's Clothing,North,89,8900,1.0
3,Men's Clothing,East,122,12200,2.0
4,Women's Clothing,North,142,14200,1.0
5,Women's Clothing,East,176,17600,2.0
6,,All,727,72700,


### Region/Type => pivot_table()

In [17]:
%%piper

df.pivot_table(index='Type',
               columns='Region',
               values='Date', 
               margins=True,
               aggfunc='count')
# >> adorn()

Region,North,South,East,West,All
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Children's Clothing,85,45,113,42,285
Men's Clothing,89,39,122,41,291
Women's Clothing,142,53,176,53,424
All,316,137,411,136,1000
