# Sidetable Gives You the Pandas Methods You Didn’t Know You Needed

Source code from the article: ["Sidetable Gives You the Pandas Methods You Didn’t Know You Needed"](https://towardsdatascience.com/sidetable-gives-you-the-pandas-methods-you-didnt-know-you-needed-92be825ff512)

## Setup

In [1]:
import sys
import pandas as pd
import sidetable

print(f"Python version: {sys.version}")
print(f"Pandas version: {pd.__version__}")
print(f"Sidetable version: {sidetable.__version__}")

Python version: 3.8.5 (default, Aug  5 2020, 09:44:06) [MSC v.1916 64 bit (AMD64)]
Pandas version: 1.1.1
Sidetable version: 0.7.0


## Penguin data

In [2]:
df_penguins = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv")

df_penguins.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE


In [3]:
df_penguins.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   species            344 non-null    object 
 1   island             344 non-null    object 
 2   bill_length_mm     342 non-null    float64
 3   bill_depth_mm      342 non-null    float64
 4   flipper_length_mm  342 non-null    float64
 5   body_mass_g        342 non-null    float64
 6   sex                333 non-null    object 
dtypes: float64(4), object(3)
memory usage: 18.9+ KB


## Exploring sidetable

### stb.missing()

In [4]:
df_penguins.stb.missing()

Unnamed: 0,missing,total,percent
sex,11,344,3.197674
bill_length_mm,2,344,0.581395
bill_depth_mm,2,344,0.581395
flipper_length_mm,2,344,0.581395
body_mass_g,2,344,0.581395
species,0,344,0.0
island,0,344,0.0


In [5]:
df_penguins.isna().sum()

species               0
island                0
bill_length_mm        2
bill_depth_mm         2
flipper_length_mm     2
body_mass_g           2
sex                  11
dtype: int64

In [6]:
df_penguins.stb.missing(style=True)

Unnamed: 0,missing,total,percent
sex,11,344,3.20%
bill_length_mm,2,344,0.58%
bill_depth_mm,2,344,0.58%
flipper_length_mm,2,344,0.58%
body_mass_g,2,344,0.58%
species,0,344,0.00%
island,0,344,0.00%


### stb.freq()

In [7]:
df_penguins.stb.freq(["species"])

Unnamed: 0,species,count,percent,cumulative_count,cumulative_percent
0,Adelie,152,44.186047,152,44.186047
1,Gentoo,124,36.046512,276,80.232558
2,Chinstrap,68,19.767442,344,100.0


In [8]:
df_penguins.stb.freq(["species"], style=True)

Unnamed: 0,species,count,percent,cumulative_count,cumulative_percent
0,Adelie,152,44.19%,152,44.19%
1,Gentoo,124,36.05%,276,80.23%
2,Chinstrap,68,19.77%,344,100.00%


In [10]:
type(df_penguins.stb.freq(["species"]))

pandas.core.frame.DataFrame

In [11]:
df_penguins.stb.freq(["species"], style=True, cum_cols=False)

Unnamed: 0,species,count,percent
0,Adelie,152,44.19%
1,Gentoo,124,36.05%
2,Chinstrap,68,19.77%


In [12]:
df_penguins.stb.freq(["species", "island", "sex"])

Unnamed: 0,species,island,sex,count,percent,cumulative_count,cumulative_percent
0,Gentoo,Biscoe,MALE,61,18.318318,61,18.318318
1,Gentoo,Biscoe,FEMALE,58,17.417417,119,35.735736
2,Chinstrap,Dream,MALE,34,10.21021,153,45.945946
3,Chinstrap,Dream,FEMALE,34,10.21021,187,56.156156
4,Adelie,Dream,MALE,28,8.408408,215,64.564565
5,Adelie,Dream,FEMALE,27,8.108108,242,72.672673
6,Adelie,Torgersen,FEMALE,24,7.207207,266,79.87988
7,Adelie,Torgersen,MALE,23,6.906907,289,86.786787
8,Adelie,Biscoe,MALE,22,6.606607,311,93.393393
9,Adelie,Biscoe,FEMALE,22,6.606607,333,100.0


In [13]:
df_penguins.stb.freq(["island"], style=True)

Unnamed: 0,island,count,percent,cumulative_count,cumulative_percent
0,Biscoe,168,48.84%,168,48.84%
1,Dream,124,36.05%,292,84.88%
2,Torgersen,52,15.12%,344,100.00%


In [15]:
df_penguins.stb.freq(["island"], style=True, thresh=.5)

Unnamed: 0,island,count,percent,cumulative_count,cumulative_percent
0,others,344,100.00%,344,100.00%


In [16]:
df_penguins.stb.freq(["island"], style=True, thresh=.9)

Unnamed: 0,island,count,percent,cumulative_count,cumulative_percent
0,others,344,100.00%,344,100.00%


In [17]:
df_penguins.stb.freq(["island"], style=True, thresh=.9, other_label="Other Islands")

Unnamed: 0,island,count,percent,cumulative_count,cumulative_percent
0,Other Islands,344,100.00%,344,100.00%


In [18]:
df_penguins.stb.freq(["island"], value="flipper_length_mm")

Unnamed: 0,island,flipper_length_mm,percent,cumulative_flipper_length_mm,cumulative_percent
0,Biscoe,35021.0,50.967066,35021.0,50.967066
1,Dream,23941.0,34.842024,58962.0,85.80909
2,Torgersen,9751.0,14.19091,68713.0,100.0


In [19]:
freq_table = df_penguins.stb.freq(["island"])
freq_table.columns = freq_table.columns.str.title()

freq_table

Unnamed: 0,Island,Count,Percent,Cumulative_Count,Cumulative_Percent
0,Biscoe,168,48.837209,168,48.837209
1,Dream,124,36.046512,292,84.883721
2,Torgersen,52,15.116279,344,100.0


### Other EDA options

In [20]:
df_penguins.describe()

Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g
count,342.0,342.0,342.0,342.0
mean,43.92193,17.15117,200.915205,4201.754386
std,5.459584,1.974793,14.061714,801.954536
min,32.1,13.1,172.0,2700.0
25%,39.225,15.6,190.0,3550.0
50%,44.45,17.3,197.0,4050.0
75%,48.5,18.7,213.0,4750.0
max,59.6,21.5,231.0,6300.0


In [21]:
df_penguins.describe(include="all")

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
count,344,344,342.0,342.0,342.0,342.0,333
unique,3,3,,,,,2
top,Adelie,Biscoe,,,,,MALE
freq,152,168,,,,,168
mean,,,43.92193,17.15117,200.915205,4201.754386,
std,,,5.459584,1.974793,14.061714,801.954536,
min,,,32.1,13.1,172.0,2700.0,
25%,,,39.225,15.6,190.0,3550.0,
50%,,,44.45,17.3,197.0,4050.0,
75%,,,48.5,18.7,213.0,4750.0,


### stb.subtotal()

In [22]:
df_penguins.stb.subtotal().tail()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
340,Gentoo,Biscoe,46.8,14.3,215.0,4850.0,FEMALE
341,Gentoo,Biscoe,50.4,15.7,222.0,5750.0,MALE
342,Gentoo,Biscoe,45.2,14.8,212.0,5200.0,FEMALE
343,Gentoo,Biscoe,49.9,16.1,213.0,5400.0,MALE
grand_total,,,15021.3,5865.7,68713.0,1437000.0,


In [23]:
df_penguins.groupby(["species", "sex"]).agg(dict(island="count"))

Unnamed: 0_level_0,Unnamed: 1_level_0,island
species,sex,Unnamed: 2_level_1
Adelie,FEMALE,73
Adelie,MALE,73
Chinstrap,FEMALE,34
Chinstrap,MALE,34
Gentoo,FEMALE,58
Gentoo,MALE,61


In [24]:
df_penguins.groupby(["species", "sex"]).agg(dict(island="count")).stb.subtotal()

Unnamed: 0_level_0,Unnamed: 1_level_0,island
species,sex,Unnamed: 2_level_1
Adelie,FEMALE,73
Adelie,MALE,73
Adelie,Adelie - subtotal,146
Chinstrap,FEMALE,34
Chinstrap,MALE,34
Chinstrap,Chinstrap - subtotal,68
Gentoo,FEMALE,58
Gentoo,MALE,61
Gentoo,Gentoo - subtotal,119
grand_total,,333
