# 🏋 ex8 Python altair

Import altair

In [None]:
import altair as alt

Let's use datasets from [vega_datasets](https://github.com/vega/vega-datasets)

In [None]:
from vega_datasets import data
cars = data.cars()
cars.head()

# Scatterplot

Minimal scatterplot using `cars` dataset:

In [None]:
alt.Chart(cars).mark_point().encode(x='Miles_per_Gallon', y='Horsepower')

With `circles` marks:

In [None]:
alt.Chart(cars).mark_circle(
        opacity=0.5  #transparent circles
    ).encode(x='Miles_per_Gallon', y='Horsepower')

# Bar chart
Bar chart of car models counts per `Origin`

In [None]:
alt.Chart(cars).mark_bar().encode(x='Origin', y='count(Origin)')

Same with dataframe of counts

In [None]:
#create a dataframe of counts
origin_counts = cars[['Origin']].groupby('Origin').size().reset_index(name='counts')
origin_counts

alt.Chart(origin_counts).mark_bar().encode(x='Origin', y='counts')

With labels rotated and sized

In [None]:
alt.Chart(cars, width=300).mark_bar().encode(
  x=alt.X('Origin', axis = alt.Axis(labelAngle=0)),
  y='count(Origin)')

# Boxplot

In [None]:
alt.Chart(cars).mark_boxplot().encode(
    x='Origin',
    y='Miles_per_Gallon'
)

# Histogram

In [None]:
alt.Chart(cars).mark_bar().encode(
    alt.X("Miles_per_Gallon", bin=True),
    y='count()')

# Line chart

In [None]:
import pandas as pd  #import pandas
import numpy as np  #import numpy

x = np.linspace(0, 100, 5)
y = x * x

df = pd.DataFrame({
    'x' : x,
    'y' : y
    })

alt.Chart(df).mark_line().encode(
    alt.X('x'),
    alt.Y('y'))

# Customizations
- Axis labels
- Transparency
- Title
- Tooltips
- Axis range
- Interactive

In [None]:
alt.Chart(cars).mark_circle(opacity=0.5).encode(
  x=alt.X('Miles_per_Gallon', axis=alt.Axis(title='Miles per gallon'), scale=alt.Scale(zero=False)),
  y=alt.Y('Horsepower', axis=alt.Axis(title='Horsepower'), scale=alt.Scale(zero=False)),  
  color=alt.Color('Origin', legend=alt.Legend(title="Origin")),
  tooltip = ['Miles_per_Gallon', 'Horsepower']
  ).properties(
    title='Cars Data',
    width=300,
    height=180
  ).interactive()

---

# Exercises

## 😜 Exercise 1

Create a scatterplot of the `iris` dataset:

 - map `x` to `sepalLength`
 - map `y` to `sepalWidth`
 - map `color` to `species`
 - set x axis label to 'Sepal length (cm)'
 - set y axis label to 'Sepal width (cm)'
 - set title label to 'Iris dataset'

In [None]:
import altair as alt

from vega_datasets import data
df = data.iris()


## 😜 Exercise 2

Create a scatterplot of the `iris` dataset:

 - map `x` to `sepalLength`
 - map `y` to `sepalWidth`
 - map `color` to `species`
 - map `size` to `petalLength`
 - set `color` to `species`
 - set Color legend `title` to `Species by color`
 - set x axis label to 'Sepal length (cm)'
 - set y axis label to 'Sepal width (cm)'
 - set title label to 'Iris dataset'

In [None]:
import altair as alt
from vega_datasets import data
df = data.iris()


## 🤔 Exercise 3

Create a scatterplot of the `SMO-VOR-2015.csv` dataset:

 - load `SMO-VOR-2015.csv` in a dataframe
 - map `x` to `ptime`
 - map `y` to `alt`
 - set the mark `opacity` to `0.1`
 - set x axis label to 'Penetration time'
 - set y axis label to 'Altitude (feets)'
 - set title label to 'SMO VOR 2015 dataset'
 - add tooltips for 'ptime', 'flight', 'icao', 'ops'
 - make the plot interactive

In [None]:
import pandas as pd
df = pd.read_csv('data/SMO-VOR-2015.csv')

import altair as alt

df.ptime = pd.to_datetime(df.ptime)
alt.data_transformers.disable_max_rows()


## 😜 Exercise 4

Create a boxplot of the `SMO-VOR-2015.csv` dataset:

 - load `SMO-VOR-2015.csv` in a dataframe
 - map `x` to `month`
 - map `y` to `alt`
 - map `color` to `month`
 - set x axis `title` to None
 - set y axis `title` to 'Altitude (feets)'
 - set title label to 'SMO VOR 2015 dataset'

In [None]:
import pandas as pd
import altair as alt

df = pd.read_csv('data/SMO-VOR-2015.csv')

months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
df.month = pd.Categorical(df.month, ordered=True, categories=months)
alt.data_transformers.disable_max_rows()


## 🤔 Exercise 5

Create a histogram of the `SMO-VOR-2015.csv` dataset:

 - load `SMO-VOR-2015.csv` in a dataframe
 - map `x` to `alt`
 - set x axis label to 'Altitude (feets)'
 - set y axis label to 'Counts'
 - set title label to 'SMO VOR 2015 dataset'
```

In [None]:
import pandas as pd
import altair as alt

df = pd.read_csv('data/SMO-VOR-2015.csv')

alt.data_transformers.disable_max_rows()  #needed because of dataset size


## 😜 Exercise 6

Create a histograms of the `SMO-VOR-2015.csv` dataset faceted by month:

 - load `SMO-VOR-2015.csv` in a dataframe
 - map `x` to `alt`
 - set x axis label to 'Altitude (feets)'
 - set y axis label to 'Counts'
 - set title label to 'SMO VOR 2015 dataset'
 - add faceting with `facet=alt.Facet('month', sort=months)` encoding


In [None]:
import pandas as pd
import altair as alt

df = pd.read_csv('data/SMO-VOR-2015.csv')

alt.data_transformers.disable_max_rows()  #needed because of dataset size
