# Get data

In [1]:
import pandas as pd
import altair as alt

In [2]:
df = pd.read_csv("StudentsPerformance.csv")
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [3]:
df.columns = [
    col.replace(
        " ",
        "_",
    )
    for col in df.columns
]

# Visualize

## Scatter Plot and Horizonal Bar Plot

In [4]:
brush = alt.selection(type="interval")

points = (
    alt.Chart(df)
    .mark_point()
    .encode(
        x="reading_score:Q",
        y="math_score:Q",
        tooltip=["gender", "reading_score", "math_score"],
        color=alt.condition(brush, "gender:N", alt.value("lightgray")),
    )
    .add_selection(brush)
).properties(title="Click and drag to create a selection region")
points

In [6]:
brush = alt.selection(type="interval")

points = (
    alt.Chart(df)
    .mark_point()
    .encode(
        x="reading_score:Q",
        y="math_score:Q",
        tooltip=["gender", "reading_score", "math_score"],
        color=alt.condition(brush, "gender:N", alt.value("lightgray")),
    )
    .add_selection(brush)
)

bars = (
    alt.Chart(df)
    .mark_bar()
    .encode(
        y="parental_level_of_education:N",
        color=alt.Color("parental_level_of_education:N"),
        x="count(parental_level_of_education):Q",
    )
    .transform_filter(brush)
)


alt.vconcat(points, bars).properties(
    title="Click and drag in the scatter plot to create a selection region"
)


## Bar charts

In [7]:
bars1 = (
    alt.Chart(df)
    .mark_bar()
    .encode(
        alt.X(
            "math_score:Q",
            bin=alt.Bin(maxbins=30, extent=brush),
            scale=alt.Scale(domain=brush),
        ),
        y="count(math_score):Q",
    )
)


bars2 = (
    alt.Chart(df)
    .mark_bar()
    .encode(
        alt.X("math_score:Q", bin=alt.Bin(maxbins=30)),
        y="count(math_score):Q",
    )
).add_selection(brush)

alt.vconcat(bars1, bars2).properties(
    title="Click and drag the bottom bars to zoom in the top bars"
)

## Scatter plot and bar plot

In [8]:
points = (
    alt.Chart(df)
    .mark_point()
    .encode(
        x="reading_score:Q",
        y="math_score:Q",
        tooltip=["gender", "reading_score", "math_score"],
    )
    .transform_filter(brush)
)

bars = (
    alt.Chart(df)
    .mark_bar()
    .encode(alt.X("math_score:Q", bin=alt.Bin(maxbins=30)), y="count(math_score):Q")
    .properties(width=500)
).add_selection(brush)

alt.vconcat(points, bars).properties(
    title="Click and drag the bottom bars to filter the scatter plot"
)


## Multiple Interactions

In [9]:
# dropdown filter
parental_educations = df["parental_level_of_education"].unique()
parental_education_dropdown = alt.binding_select(options=parental_educations)
parental_education_select = alt.selection_single(
    fields=["parental_level_of_education"],
    bind=parental_education_dropdown,
    name="Parental Level of Education",
)

# radio filter
ethinicities = df["race/ethnicity"].unique()
ethinicity_radio = alt.binding_radio(options=ethinicities)
ethinicity_select = alt.selection_single(
    fields=["race/ethnicity"], bind=ethinicity_radio, name="Race/Ethnicity"
)
ethinicity_color_condition = alt.condition(
    ethinicity_select,
    alt.Color("race/ethnicity:N", legend=None),
    alt.value("lightgray"),
)

# Create scatter plot
chart = (
    alt.Chart(df)
    .mark_point(filled=True)
    .encode(x="reading_score:Q", y="math_score:Q")
    .add_selection(parental_education_select)
    .transform_filter(parental_education_select)
    .add_selection(ethinicity_select)
    .encode(color=ethinicity_color_condition)
).properties(title="Use the dropdown or the radio button to filter the scatter plot")
chart