In [4]:
import altair as alt
import pandas as pd
import numpy as np

# Income 
## setup

In [27]:
income_access = pd.read_csv("data/income-internet-access.csv") # 2010, 2013, 2015, 2017
income_reasons = pd.read_csv("data/income-reasons.csv") # 2010, 2013, 2015, 2017
reasons_doc = pd.read_csv("data/reasons_doc.csv")
reasons_doc

Unnamed: 0,reason_id,reason
0,1,Don’t need it (not interested)
1,2,Too expensive
2,3,Can use it somewhere else
3,4,Not available in area
4,5,No computer or computer inadequate
5,6,Privacy or security concerns
6,7,Other reasons


## Pre-process

In [26]:
df1 = income_access[["income", "dial_up_2010", "high_speed_int_2010", "dial_up_2013", "high_speed_int_2013", "dial_up_2015", "high_speed_int_2015", "dial_up_2017", "high_speed_int_2017"]]
df1 = pd.melt(df1, id_vars=["income"])
df1["year"] = df1["variable"].map(lambda x: x[-4:])
df1["type"] = df1["variable"].map(lambda x: "Dial up service" if x[0]=="d" else "High-speed internet service")
df1.head()


Unnamed: 0,income,variable,value,year,type
0,Less than 10k,dial_up_2010,5.474397,2010,Dial up service
1,10-19k,dial_up_2010,6.385029,2010,Dial up service
2,20-29k,dial_up_2010,5.295396,2010,Dial up service
3,30-39k,dial_up_2010,4.754989,2010,Dial up service
4,40-49k,dial_up_2010,4.426353,2010,Dial up service


In [35]:
def annotate_reason(text):
    reason_id = int(text[-1])
    reason = reasons_doc[reasons_doc["reason_id"]==reason_id]["reason"].values[0]
    return reason

df2 = pd.melt(income_reasons, id_vars=["income"])
df2["year"] = df2["variable"].map(lambda x: x[:4])
df2["reason"] = df2["variable"].map(lambda x: annotate_reason(x))
df2.head()

Unnamed: 0,income,variable,value,year,reason
0,Less than 10k,2010_1,55.901513,2010,Don’t need it (not interested)
1,10-19k,2010_1,49.661223,2010,Don’t need it (not interested)
2,20-29k,2010_1,39.724911,2010,Don’t need it (not interested)
3,30-39k,2010_1,28.927634,2010,Don’t need it (not interested)
4,40-49k,2010_1,18.764369,2010,Don’t need it (not interested)


## Charts

In [36]:
df1.head()

Unnamed: 0,income,variable,value,year,type
0,Less than 10k,dial_up_2010,5.474397,2010,Dial up service
1,10-19k,dial_up_2010,6.385029,2010,Dial up service
2,20-29k,dial_up_2010,5.295396,2010,Dial up service
3,30-39k,dial_up_2010,4.754989,2010,Dial up service
4,40-49k,dial_up_2010,4.426353,2010,Dial up service


In [56]:
input_dropdown = alt.binding_select(options=["2010", "2013", "2015", "2017"])
selection = alt.selection_single(
    fields=["year"], bind=input_dropdown, name="select "
)

# selection = alt.selection_single(fields=['Origin'], bind=input_dropdown, name='Country of ')

bar1 = alt.Chart(df1).mark_bar().encode(
    x = alt.X("value:Q"),
    y = alt.Y("income:N", sort="-x"),
    color=alt.Color("type")
).add_selection(selection).transform_filter(
    selection
)


bar1

In [45]:
df2.head()

Unnamed: 0,income,variable,value,year,reason
0,Less than 10k,2010_1,55.901513,2010,Don’t need it (not interested)
1,10-19k,2010_1,49.661223,2010,Don’t need it (not interested)
2,20-29k,2010_1,39.724911,2010,Don’t need it (not interested)
3,30-39k,2010_1,28.927634,2010,Don’t need it (not interested)
4,40-49k,2010_1,18.764369,2010,Don’t need it (not interested)


In [59]:
df2.reason.unique()

array(['Don’t need it (not interested)', 'Too expensive',
       'Can use it somewhere else', 'Not available in area',
       'No computer or computer inadequate',
       'Privacy or security concerns', 'Other reasons'], dtype=object)

In [65]:
input_dropdown = alt.binding_select(options=["2010", "2013", "2015", "2017"])
selection = alt.selection_single(
    fields=["year"], bind=input_dropdown, name="select "
)

# selection = alt.selection_single(fields=['Origin'], bind=input_dropdown, name='Country of ')

bar2 = alt.Chart(df2).mark_bar().encode(
    x = alt.X("value:Q"),
    y = alt.Y("income:N", sort="-x"),
    color=alt.Color("reason")
).add_selection(selection).transform_filter(
    selection
).transform_filter(
    alt.datum.reason != "Other reasons"
)


bar2

In [64]:
(bar1 | bar2).resolve_scale(
    color="independent", 
    y="shared"
).configure_legend(
    orient='bottom'
)

back-to-back bar chart: https://altair-viz.github.io/gallery/us_population_pyramid_over_time.html

In [None]:
## TO DO: 
# 1. back-to-back
# 2. color map
# 3. add base layer
# 4. axis sorting