In [2]:
import pandas as pd
import altair as alt

In [3]:
from vega_datasets import data
cars = data.cars()
cars.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


In [6]:
dados = (cars
    .assign(
         Miles_per_Gallon = cars.Miles_per_Gallon * 1.609 / 3.785, # milhas por galão -> km por litro
         Displacement = cars.Displacement * 1.609,                 # milhas -> km
         Weight_in_lbs = cars.Weight_in_lbs * 0.454                # libras -> kg
    )
    .rename(columns={
        'Name':'Nome',
        'Miles_per_Gallon':'Quilometros_por_litro',
        'Cylinders':'Cilindros',
        'Displacement':'Deslocamento_kg',
        'Horsepower':'Potencia_cv',
        'Weight_in_lbs':'Peso_kg',
        'Acceleration':'Aceleracao_s',
        'Year':'Ano',
        'Origin':'Origem'
    })     
)

dados.head()

Unnamed: 0,Nome,Quilometros_por_litro,Cilindros,Deslocamento_kg,Potencia_cv,Peso_kg,Aceleracao_s,Ano,Origem
0,chevrolet chevelle malibu,7.651783,8,493.963,130.0,1590.816,12.0,1970-01-01,USA
1,buick skylark 320,6.376486,8,563.15,165.0,1676.622,11.5,1970-01-01,USA
2,plymouth satellite,7.651783,8,511.662,150.0,1559.944,11.0,1970-01-01,USA
3,amc rebel sst,6.801585,8,489.136,150.0,1558.582,12.0,1970-01-01,USA
4,ford torino,7.226684,8,485.918,140.0,1565.846,10.5,1970-01-01,USA


Uma visão simples dos dados

In [11]:
alt.Chart(dados).mark_point().encode(
    x = 'Potencia_cv',
    y = 'Quilometros_por_litro',
    color = 'Origem'
)

In [14]:
alt.Chart(dados).mark_point().encode(
    alt.X('Potencia_cv'),
    alt.Y('Quilometros_por_litro'),
    alt.Color('Origem')
)

The two styles of specifying encodings can be interleaved: `x='precip', alt.Y('city')` is also a valid input to the `encode` function.

In the examples above, the data type for each field is inferred automatically based on its type within the Pandas data frame. We can also explicitly indicate the data type to Altair by annotating the field name:

- `'b:N'` indicates a *nominal* type (unordered, categorical data),
- `'b:O'` indicates an *ordinal* type (rank-ordered data),
- `'b:Q'` indicates a *quantitative* type (numerical data with meaningful magnitudes), and
- `'b:T'` indicates a *temporal* type (date/time data)

For example, `alt.X('precip:N')`.