In [1]:
import altair as alt
import pandas as pd
import numpy as np

# Income 
## setup

In [23]:
income_access = pd.read_csv("data/income-internet-access.csv") # 2010, 2013, 2015, 2017
income_reasons = pd.read_csv("data/income-reasons.csv") # 2010, 2013, 2015, 2017
reasons_doc = pd.read_csv("data/reasons_doc.csv")
reasons_doc

Unnamed: 0,reason_id,reason
0,1,Don’t need it (not interested)
1,2,Too expensive
2,3,Can use it somewhere else
3,4,Not available in area
4,5,No computer or computer inadequate
5,6,Privacy or security concerns
6,7,Other reasons


## Pre-process

In [24]:
df1 = income_access[["income", "dial_up_2010", "high_speed_int_2010", "dial_up_2013", "high_speed_int_2013", "dial_up_2015", "high_speed_int_2015", "dial_up_2017", "high_speed_int_2017"]]
df1 = pd.melt(df1, id_vars=["income"])
df1["year"] = df1["variable"].map(lambda x: x[-4:])
df1["label"] = df1["variable"].map(lambda x: "Dial up service" if x[0]=="d" else "High-speed internet service")
df1["type"] = "access"
df1.head()

Unnamed: 0,income,variable,value,year,label,type
0,Less than 10k,dial_up_2010,5.474397,2010,Dial up service,access
1,10-19k,dial_up_2010,6.385029,2010,Dial up service,access
2,20-29k,dial_up_2010,5.295396,2010,Dial up service,access
3,30-39k,dial_up_2010,4.754989,2010,Dial up service,access
4,40-49k,dial_up_2010,4.426353,2010,Dial up service,access


In [25]:
df1[df1["year"]=="2010"]

Unnamed: 0,income,variable,value,year,label,type
0,Less than 10k,dial_up_2010,5.474397,2010,Dial up service,access
1,10-19k,dial_up_2010,6.385029,2010,Dial up service,access
2,20-29k,dial_up_2010,5.295396,2010,Dial up service,access
3,30-39k,dial_up_2010,4.754989,2010,Dial up service,access
4,40-49k,dial_up_2010,4.426353,2010,Dial up service,access
5,50-74k,dial_up_2010,3.258702,2010,Dial up service,access
6,75-99k,dial_up_2010,2.599761,2010,Dial up service,access
7,100k or more,dial_up_2010,1.670988,2010,Dial up service,access
8,Less than 10k,high_speed_int_2010,84.768608,2010,High-speed internet service,access
9,10-19k,high_speed_int_2010,83.2612,2010,High-speed internet service,access


In [26]:
def annotate_reason(text):
    reason_id = int(text[-1])
    reason = reasons_doc[reasons_doc["reason_id"]==reason_id]["reason"].values[0]
    return reason

df2 = pd.melt(income_reasons, id_vars=["income"])
df2["year"] = df2["variable"].map(lambda x: x[:4])
df2["label"] = df2["variable"].map(lambda x: annotate_reason(x))
df2["type"] = "reasons"
df2[df2["year"]=="2010"]

Unnamed: 0,income,variable,value,year,label,type
0,Less than 10k,2010_1,30.862084,2010,Don’t need it (not interested),reasons
1,10-19k,2010_1,41.329476,2010,Don’t need it (not interested),reasons
2,20-29k,2010_1,40.457514,2010,Don’t need it (not interested),reasons
3,30-39k,2010_1,42.66852,2010,Don’t need it (not interested),reasons
4,40-49k,2010_1,41.510531,2010,Don’t need it (not interested),reasons
5,50-74k,2010_1,42.708921,2010,Don’t need it (not interested),reasons
6,75-99k,2010_1,43.704048,2010,Don’t need it (not interested),reasons
7,100k or more,2010_1,44.098169,2010,Don’t need it (not interested),reasons
8,Less than 10k,2010_2,39.890187,2010,Too expensive,reasons
9,10-19k,2010_2,31.307251,2010,Too expensive,reasons


In [44]:
income_df = pd.concat([df1, df2])
income_df[income_df["year"]=="2010"]

income_order = {
    "Less than 10k": 1,
    "10-19k": 2,
    "20-29k": 3,
    "30-39k": 4,
    "40-49k": 5,
    "50-74k": 6,
    "75-99k": 7,
    "100k or more": 8
}

income_df["order"] = income_df["income"].map(lambda x: income_order[x])
income_df

Unnamed: 0,income,variable,value,year,label,type,order
0,Less than 10k,dial_up_2010,5.474397,2010,Dial up service,access,1
1,10-19k,dial_up_2010,6.385029,2010,Dial up service,access,2
2,20-29k,dial_up_2010,5.295396,2010,Dial up service,access,3
3,30-39k,dial_up_2010,4.754989,2010,Dial up service,access,4
4,40-49k,dial_up_2010,4.426353,2010,Dial up service,access,5
...,...,...,...,...,...,...,...
203,30-39k,2017_7,9.122306,2017,Other reasons,reasons,4
204,40-49k,2017_7,8.006254,2017,Other reasons,reasons,5
205,50-74k,2017_7,8.766039,2017,Other reasons,reasons,6
206,75-99k,2017_7,10.812828,2017,Other reasons,reasons,7


## Charts

In [132]:
radio_year = alt.binding_radio(options=["2010","2015","2017"], name="year: ")

select_year = alt.selection_single(name="select",
                                   fields=["year"],
                                   bind=radio_year,
                                   init={"year": "2010"})

select_bar1 = alt.selection_single(
    on='mouseover',
    nearest=True,
    clear="click"
)

select_bar2 = alt.selection_single(
    on='mouseover',
    clear="click"
)

select_reason = alt.selection_single(
    fields=["label"],
    bind="legend"
)


OpacityCondition1 = alt.condition(select_bar1, alt.value(1.0), alt.value(0.8))
OpacityCondition2 = alt.condition(select_bar2, alt.value(1.0), alt.value(0.8))
ColorCondition = alt.condition(select_reason, 
                               alt.Color("label", 
                                         title="reasons", 
                                         type="nominal",
                                         scale=alt.Scale(scheme="tableau20"), 
                                         sort=alt.EncodingSortField("value", order='ascending')),
                               alt.value("lightgrey")
                              )

# palette = alt.Scale(range=['lightgreen', 'darkgreen', 'olive'])

base = alt.Chart(income_df).add_selection(
    select_year
).transform_filter(
    select_year
).properties(
    width=250, 
)

left = base.transform_filter(
    (alt.datum.type=="access") & (alt.datum.label=="High-speed internet service")
).encode(
    x=alt.X("value:Q",
            title=None,
           sort=alt.SortOrder("descending")),
    y=alt.Y("income:N", 
            title=None, 
            axis=None,
            sort=alt.EncodingSortField(field="order", order='ascending')),
    opacity=OpacityCondition1,
    tooltip="value"
).mark_bar().properties(
    title="% having high-speed internet service at home"
).add_selection(
    select_bar1
)

middle = base.encode(
    y=alt.Y("income:N", 
            sort=alt.EncodingSortField(field="order", order='ascending'),
            axis=None),
    text=alt.Text("income:N")
).mark_text().properties(width=20)

right = base.encode(
    x=alt.X("value:Q",
           title=None),
    y=alt.Y("income:N",
            title=None,
            axis=None,
            sort=alt.EncodingSortField(field="order", order='ascending')),
    color=ColorCondition,
#     order=alt.Order("label", fields="value", sort="ascending"),
    opacity=OpacityCondition2,
    tooltip=["label","value"]
    
).transform_filter(
    (alt.datum.type=="reasons") & (alt.datum.reason!="Other reasons")
# ).transform_filter(
#     select_reason
).mark_bar().properties(
    title="% reasons for not going online at home"
).add_selection(
    select_bar2
).add_selection(
    select_reason
)


alt.concat(left, middle, right, spacing=5).configure_legend(
    orient='bottom'
).resolve_scale(
    color="independent", 
#     y="shared"
).configure_axis(
    grid=False
).configure_view(
    strokeOpacity=0
).configure_legend(
    orient="right"
)


back-to-back bar chart: https://altair-viz.github.io/gallery/us_population_pyramid_over_time.html

In [133]:
## TO DO: 
# 1. back-to-back [done]
# 2. color map [done]
# 3. add base layer [done]
# 4. axis sorting [done]
# 5. interactive legend [done]
# 6. more dimensions
# 7. sort stacked bars