# Im am importing the Altair package using the following code below. 

In [2]:
import altair as alt
from vega_datasets import data

# I am pulling in one of the built in datasets that is imported from the code above.

In [3]:
# Altair actual has built in datasets from importing the vega_datasets package that you can practice with. 
cars = data.cars()
cars.head(3)

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA


# Below you will see a bar chart using the column Miles_per_Gallion for the x-axis then the number of records for the y-axis. You also see the staked bar charts being shown becasue I am specifying the color of the column values. 

In [4]:
# First you must declare the chart option and then enclose it with the dataset you want to visualize. 
# Then you specify the type of chart you want to use to print your findings. 
# Next part you will have to do the encode section this is where you form the visual. 
# With in the encoding section you see that I am specifying X and Y with column names from the dataset. 
# One thing will Altair is that it can run function with simply commands i.e the count(), this will display all records in the set on the Y-axis. 

alt.Chart(cars).mark_bar(opacity=.8).encode(
    alt.X('Miles_per_Gallon',bin=True),
    alt.Y('count()'),
    alt.Color('Origin')
).properties(
    width=800,
    height=500
)

# The below bar graph visualization is in a heat map of the number of recorded orgins that have a certain mile per gallon.

In [5]:
# I am declaring the chart to have a bar chart.
# Then I am encoding the X-axis to have values from the miles per gallon column.
# Encoded the Y-axis to to have values from the Origin column from the dataset.
# You are seeing the scale of coloring because I am setting color of values using Color attribute. 
alt.Chart(cars).mark_bar().encode(
    alt.X('Miles_per_Gallon',bin=True),
    alt.Color('count()'),
    alt.Y('Origin')
).properties(
    width=800,
    height=500
)

# Here is three groups; two scatterplots, and one bar graph. All graphs are pulling from the same data source. 

In [19]:
# I created a varaible called "interval" to store the selection color for selecting over the X,Y-axis. 
interval = alt.selection_interval(encodings=['x','y'])

# variable called "circle" to store the charting code that plots columns Horsepower and Miles per gallon on the Y/X-axis
circle=alt.Chart(cars).mark_circle().encode(
    y='Horsepower',
    x='Miles_per_Gallon',
    # I then added the function in the circle chart to show all available points on the graph that is selected. Any non selected points are then turn gray.
    color=alt.condition(interval, 'Origin', alt.value('lightgray')),
    # You will see a varaible called tooltip this creates a chart for each indiviual point of the corresponding data to the point. 
    tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']
).interactive().properties(
    selection=interval
)
# I also created a rule variable to store the mean value of a specific column value then pointed in the chart. 
rule=alt.Chart(cars).mark_rule(color='red').encode(
    x='mean(Acceleration):Q'
)
# created a historgram chart to display the total number of entry for each car origin. Plotted the column Origin on the Y-axis to show a sideways bar chart, then made the Y-axis the total entiries in the dataset.
hist=alt.Chart(cars).mark_bar().encode(
    y='Origin',
    x='count()',
    color='Origin'
# Here you have the bar chart is affected be the selection of the data points in the chart above. 
).transform_filter(
    interval
)
# You see below the circle and rule variable being added to together to show the mean line in the graph when you print it.
# The next graph is similar to the circle, however, we changed the x-axis to a different column to give different view.
# You can also notice that I have the "|" and "&" meaning that | will place graphs side by side and for & will place the graph on top of one another. 
(((circle+rule) | (circle.encode(x='Acceleration')+rule)) & hist)

# Here is three groups; two scatterplots, and one bar graph. All graphs are pulling from the same data source. However, there is different visual mechanics for viewing.

In [21]:
# I created a varaible called "interval" to store the mutli selection by clicking by color of the data.
interval = alt.selection_multi(encodings=['color'])

# created a historgram chart to display the total number of entry for each car origin. Plotted the column Origin on the Y-axis to show a sideways bar chart, then made the Y-axis the total entiries in the dataset.
hist=alt.Chart(cars).mark_bar().encode(
    x='count()',
    y='Origin',
    # created a variable color to store the condition that would make any unclicked bar gray. 
    color=alt.condition(interval,'Origin',alt.value('lightgray'),legend=None)

).properties(
    selection=interval
)
# variable called "circle" to store the charting code that plots columns Horsepower and Miles per gallon on the Y/X-axis. 
circle=alt.Chart(cars).mark_circle().encode(
    y='Horsepower:Q',
    x='Miles_per_Gallon:Q',
    # Here I am setting a color using the column origin to seperate the data, however I am not displaying the legend.
    color=alt.Color('Origin:N',legend=None),
    tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']
).interactive().transform_filter(
    interval
)
rule=alt.Chart(cars).mark_rule(color='red').encode(
    x='mean(Acceleration):Q'
)
(hist & ((circle+rule) | (circle.encode(x='Acceleration')+rule)))

In [18]:
alt.Chart(cars).mark_point().encode(
    x='Year:T',
    color='Origin',
    y='mean(Miles_per_Gallon)',
    #tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']
).interactive().properties(
    width=800,
    height=500
)

# I am creating two line graph and two area graphs

In [16]:
# This chart is displaying a line that is running along the x and y-axis
alt.Chart(cars).mark_line().encode(
    x='Year:T',
    color='Origin',
    y='mean(Miles_per_Gallon)',
    #tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']
) + alt.Chart(cars).mark_area(opacity=0.3).encode(
    x='Year:T',
    color='Origin',
    y='ci0(Miles_per_Gallon)',
    y2='ci1(Miles_per_Gallon)'
    #y='mean(Miles_per_Gallon)',
    #tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']
) + alt.Chart(cars).mark_area(opacity=0.3).encode(
    x='Year:T',
    color='Origin',
    y='ci0(Horsepower)',
    y2='ci1(Horsepower)'
    #y='mean(Miles_per_Gallon)',
    #tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']
) + alt.Chart(cars).mark_line().encode(
    x='Year:T',
    color='Origin',
    y='mean(Horsepower)',
    #tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']
).interactive().properties(
    width=800,
    height=500
)

# I am using a new dataset with in the practice sets made in Altair package. 

In [10]:
# This line of code disables the default max rows limit to allow datasets larger than 5,000 entires
alt.data_transformers.enable('default',max_rows=None)
# Storing the data set Seattle temperture into a variable temps.
temps=data.seattle_temps()
# Displaying the top rows in the dataset to see what my columns and data rows are. 
temps.head()

Unnamed: 0,date,temp
0,2010-01-01 00:00:00,39.4
1,2010-01-01 01:00:00,39.2
2,2010-01-01 02:00:00,39.0
3,2010-01-01 03:00:00,38.9
4,2010-01-01 04:00:00,38.8


# This is a point chart below depicting the temperture over the course of months

In [11]:
alt.Chart(temps).mark_point().encode(
    x=alt.X('date:T',timeUnit='month'),
    y='temp:Q'
).interactive().properties(
    width=800,
    height=500
)

In [12]:
alt.Chart(temps).mark_bar().encode(
    x=alt.X('date:O',timeUnit='month'),
    y='mean(temp):Q'
).interactive().properties(
    width=800,
    height=500
)

In [13]:
alt.Chart(temps).mark_rect().encode(
    x=alt.X('date:O',timeUnit='hours'),
    y=alt.Y('date:O',timeUnit='month'),
    color='mean(temp):Q'
).interactive().properties(
    width=800,
    height=500
)

In [14]:
weather=data.seattle_weather()
weather.head(2)

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain


In [15]:
interval = alt.selection_multi(encodings=['color'])

base = alt.Chart(weather).mark_rule(size=2).encode(
    x='date:T',
    y='temp_min:Q',
    y2='temp_max:Q',
    color=alt.Color('weather:N',legend=None)
).properties(
    width=800,
    height=500
).interactive().transform_filter(
    interval
)

hist=alt.Chart(weather).mark_bar().encode(
    x='count()',
    y='weather',
    color=alt.condition(interval,'weather',alt.value('lightgray'),legend=None)
).properties(
    selection=interval
)
base & hist