# Group and Aggregate Data
Quickly gain insights into the main numerical summary statistics of your data by choosing appropriate aggregation functions or defining custom functions.

In [2]:
# Load packages
import pandas as pd


In [3]:
# Upload your data as CSV and load as data frame
df = pd.read_csv("data.csv", parse_dates=["date"], index_col=0)
df.head()


Unnamed: 0,date,uid,sku,price,device,gender,country,age
0,2017-07-10,41195147,sku_three_499,499,android,M,BRA,17
1,2017-07-15,41195147,sku_three_499,499,android,M,BRA,17
2,2017-11-12,41195147,sku_four_599,599,android,M,BRA,17
3,2017-09-26,91591874,sku_two_299,299,android,M,TUR,17
4,2017-12-01,91591874,sku_four_599,599,android,M,TUR,17


## Choose the aggregation function

| Function | Description                         |
|----------|-------------------------------------|
| count    | Number of non-null observations     |
| sum      | Sum of values                       |
| mean     | Mean of values                      |
| mad      | Mean absolute deviation             |
| median   | Arithmetic median of values         |
| min      | Minimum                             |
| max      | Maximum                             |
| mode     | Mode                                |
| abs      | Absolute Value                      |
| prod     | Product of values                   |
| std      | Unbiased standard deviation         |
| var      | Unbiased variance                   |
| sem      | Unbiased standard error of the mean |
| skew     | Unbiased skewness (3rd moment)      |
| kurt     | Unbiased kurtosis (4th moment)      |
| quantile | Sample quantile (value at %)        |
| cumsum   | Cumulative sum                      |
| cumprod  | Cumulative product                  |
| cummax   | Cumulative maximum                  |
| cummin   | Cumulative minimum                  |

In [4]:
# Define a custom function
def custom(x):
    return x.mean() - x.median()


# Group the data
grouped = df.groupby(by=["device", "gender"])  # Choose column(s) to groupby

# Aggregate the data
aggregation = grouped.agg(
    {
        "price": [  # Column to aggregate over
            "mean",
            "median",
            "std",  # Use standard functions
            custom,  # Or use custom functions
        ]
    }
)

# Examine the results
print(aggregation)


                     price                               
                      mean median         std      custom
device  gender                                           
android F       400.747504    299  179.984378  101.747504
        M       416.237308    499  195.001520  -82.762692
iOS     F       404.435330    299  181.524952  105.435330
        M       405.272401    299  196.843197  106.272401
