In [5]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

data = pd.read_csv('vehicles.csv')

from tweak_autos import tweak_autos # tweak_autos tiene que estar en la misma carpeta que esta notebook

autos2 = tweak_autos(data) # esta funcion contiene todas la lineas de codigo para limpiar el dataset
                           # la funcion esta detallada en la notebook Effective Pandas 01

## `agg()`
El metodo `agg()` (viene *aggregate*) ofrece una versatilidad tremenda a las operaciones con Pandas.
Permite pasar como argumentos varias cosas:
- metodos de agregacion (sum, mean, count, etc.)
- funciones de Numpy
- funciones propias!

In [8]:
autos2.columns

Index(['city08', 'comb08', 'highway08', 'cylinders', 'displ', 'drive',
       'fuelCost08', 'make', 'model', 'range', 'createdOn', 'year',
       'automatic', 'speeds', 'ffs'],
      dtype='object')

In [9]:
city_mpg = autos2.city08
city_mpg

0        19
1         9
2        23
3        10
4        17
         ..
41139    19
41140    20
41141    18
41142    18
41143    16
Name: city08, Length: 41144, dtype: int16

In [2]:
def second_to_last(s):
    return s.iloc[-2]

In [16]:
import numpy as np
city_mpg.agg(['mean', np.var, 'max', second_to_last, 'hasnans', 'nunique'])

mean              18.369045
var               62.503036
max                     150
second_to_last           18
hasnans               False
nunique                 105
Name: city08, dtype: object

## Exercises

### 1. Find the count of non-missing values of a series

`count()`
Count non-NA cells for each column or row

In [22]:
city_mpg.count()

41144

### 2. Find the number of entries of a series
`shape` Return a tuple of the shape of the underlying data.

In [25]:
# nos da el total de valores que tiene la serie
city_mpg.shape

(41144,)

In [43]:
city_mpg.size

41144

### 3. Find the number of unique entries of a series

In [30]:
brand = autos2.make
display(type(brand))
display(brand)

pandas.core.series.Series

0        Alfa Romeo
1           Ferrari
2             Dodge
3             Dodge
4            Subaru
            ...    
41139        Subaru
41140        Subaru
41141        Subaru
41142        Subaru
41143        Subaru
Name: make, Length: 41144, dtype: category
Categories (136, object): ['AM General', 'ASC Incorporated', 'Acura', 'Alfa Romeo', ..., 'Volvo', 'Wallace Environmental', 'Yugo', 'smart']

In [31]:
brand.nunique()

136

### 4. Find the mean value of a series
`mean()`

In [33]:
city_mpg.mean()

18.369045304297103

### 5. Find the maximun value of a series
`max()`

In [34]:
city_mpg.max()

150

### 6. Use the `agg()` method to find all the above

In [44]:
city_mpg.agg(['count', 
              'size', 
              'nunique', 
              'mean', 
              'max'])

count      41144.000000
size       41144.000000
nunique      105.000000
mean          18.369045
max          150.000000
Name: city08, dtype: float64