# Chapter 7: Aggregate Methods

Aggregations allow us to take detailed data and collapse it to a single value

In [None]:
import pandas as pd
import numpy as np

url = "http://github.com/mattharrison/datasets/raw/master/data/vehicles.csv.zip"
df = pd.read_csv(url)
city_mpg = df.city08
highway_mpg = df.highway08

  df = pd.read_csv(url)


## 7.1 Aggregations

In [None]:
# mean
city_mpg.mean()

18.369045304297103

In [None]:
# aggregate properties
print(city_mpg.is_unique)
print(city_mpg.is_monotonic_increasing)

False
False


In [None]:
# quantile
city_mpg.quantile() # default is 50% quantile


17.0

In [None]:
city_mpg.quantile(0.90)

24.0

In [None]:
city_mpg.quantile([0.1, 0.5, 0.9])

0.1    13.0
0.5    17.0
0.9    24.0
Name: city08, dtype: float64

## 7.2 Count and Mean of an Attribute

- If we want to count the values that meet some criteria, we can use the ``.sum`` method

In [None]:
# get count of cars with mileage greater than 20
(city_mpg
.gt(20)
.sum())

10272

In [None]:
# get percentage of values that meet some criteria
(city_mpg
.gt(20)
.mul(100)
.mean())

24.965973167412017

In [None]:
# returns boolean
(city_mpg
.gt(20))

0        False
1        False
2         True
3        False
4        False
         ...  
41139    False
41140    False
41141    False
41142    False
41143    False
Name: city08, Length: 41144, dtype: bool

In [None]:
# sums all the boolean == True
(city_mpg
.gt(20)
.sum())

10272

## 7.3 .agg and Aggregation Strings

- ``.agg`` shines in the ability to perform multiple aggregations

In [None]:
def second_to_last(s):
    return s.iloc[-2]

In [None]:
city_mpg.agg(['mean', np.var, max, second_to_last])

mean               18.369045
var                62.503036
max               150.000000
second_to_last     18.000000
Name: city08, dtype: float64