In [1]:
import altair as alt
import pandas as pd
import numpy as np

# Income 
## setup

In [9]:
income_access = pd.read_csv("data/income-internet-access.csv") # 2010, 2013, 2015, 2017
income_no_access = pd.read_csv("data/income-no-access.csv")
income_reasons = pd.read_csv("data/income-reasons.csv") # 2010, 2013, 2015, 2017
reasons_doc = pd.read_csv("data/reasons_doc.csv")
income_no_access

Unnamed: 0,income,2010_no_access,2015_no_access,2017_no_access
0,Less than 10k,55.901513,46.192595,37.072659
1,10-19k,49.661223,45.282512,38.71626
2,20-29k,39.724911,35.94716,28.275212
3,30-39k,28.927634,29.249005,24.932965
4,40-49k,18.764369,22.193286,16.122799
5,50-74k,14.451187,15.739641,13.895489
6,75-99k,8.73186,12.372852,10.18507
7,100k or more,4.761172,10.030907,9.532456


## Pre-process

In [51]:
# df1 = income_access[["income", "dial_up_2010", "high_speed_int_2010", "dial_up_2013", "high_speed_int_2013", "dial_up_2015", "high_speed_int_2015", "dial_up_2017", "high_speed_int_2017"]]
# df1 = pd.melt(df1, id_vars=["income"])
# df1["year"] = df1["variable"].map(lambda x: x[-4:])
# df1["label"] = df1["variable"].map(lambda x: "Dial up service" if x[0]=="d" else "High-speed internet service")
# df1["type"] = "access"
# df1.head()

df1 = pd.melt(income_no_access, id_vars=["income"])
df1["year"] = df1["variable"].map(lambda x: x[:4])
df1["value"] = 100 - df1["value"]  # no access --> have access
df1["label"] = "with access at home"
df1["type"] = "access"
df1.head()

Unnamed: 0,income,variable,value,year,label,type
0,Less than 10k,2010_no_access,44.098487,2010,with access at home,access
1,10-19k,2010_no_access,50.338777,2010,with access at home,access
2,20-29k,2010_no_access,60.275089,2010,with access at home,access
3,30-39k,2010_no_access,71.072366,2010,with access at home,access
4,40-49k,2010_no_access,81.235631,2010,with access at home,access


In [34]:
def annotate_reason(text):
    reason_id = int(text[-1])
    reason = reasons_doc[reasons_doc["reason_id"]==reason_id]["reason"].values[0]
    return reason

df2 = pd.melt(income_reasons, id_vars=["income"])
df2["year"] = df2["variable"].map(lambda x: x[:4])
df2["label"] = df2["variable"].map(lambda x: annotate_reason(x))
df2["type"] = "reasons"
df2.head()

Unnamed: 0,income,variable,value,year,label,type
0,Less than 10k,2010_1,30.862084,2010,Don’t need it (not interested),reasons
1,10-19k,2010_1,41.329476,2010,Don’t need it (not interested),reasons
2,20-29k,2010_1,40.457514,2010,Don’t need it (not interested),reasons
3,30-39k,2010_1,42.66852,2010,Don’t need it (not interested),reasons
4,40-49k,2010_1,41.510531,2010,Don’t need it (not interested),reasons


In [52]:
income_df = pd.concat([df1, df2])
income_df[income_df["year"]=="2010"]

income_order = {
    "Less than 10k": 1,
    "10-19k": 2,
    "20-29k": 3,
    "30-39k": 4,
    "40-49k": 5,
    "50-74k": 6,
    "75-99k": 7,
    "100k or more": 8
}

income_df["order"] = income_df["income"].map(lambda x: income_order[x])
income_df

Unnamed: 0,income,variable,value,year,label,type,order
0,Less than 10k,2010_no_access,44.098487,2010,with access at home,access,1
1,10-19k,2010_no_access,50.338777,2010,with access at home,access,2
2,20-29k,2010_no_access,60.275089,2010,with access at home,access,3
3,30-39k,2010_no_access,71.072366,2010,with access at home,access,4
4,40-49k,2010_no_access,81.235631,2010,with access at home,access,5
...,...,...,...,...,...,...,...
203,30-39k,2017_7,9.122306,2017,Other reasons,reasons,4
204,40-49k,2017_7,8.006254,2017,Other reasons,reasons,5
205,50-74k,2017_7,8.766039,2017,Other reasons,reasons,6
206,75-99k,2017_7,10.812828,2017,Other reasons,reasons,7


## charts

In [62]:
radio_year = alt.binding_radio(options=["2010","2015","2017"], name="year: ")

select_year = alt.selection_single(name="select",
                                   fields=["year"],
                                   bind=radio_year,
                                   init={"year": "2010"})

select_bar1 = alt.selection_single(
    on='mouseover',
    nearest=True,
    clear="click"
)

select_bar2 = alt.selection_single(
    on='mouseover',
    clear="click"
)

select_reason = alt.selection_single(
    fields=["label"],
    bind="legend"
)


OpacityCondition1 = alt.condition(select_bar1, alt.value(1.0), alt.value(0.8))
OpacityCondition2 = alt.condition(select_bar2, alt.value(1.0), alt.value(0.8))
ColorCondition = alt.condition(select_reason, 
                               alt.Color("label", 
                                         title="reasons", 
                                         type="nominal",
                                         scale=alt.Scale(scheme="tableau20"), 
                                         sort=alt.EncodingSortField("value", order='ascending')),
                               alt.value("lightgrey")
                              )

# palette = alt.Scale(range=['lightgreen', 'darkgreen', 'olive'])

base = alt.Chart(income_df).add_selection(
    select_year
).transform_filter(
    select_year
).properties(
    width=250, 
)

left = base.transform_filter(
    alt.datum.type=="access"
).encode(
    x=alt.X("value:Q",
            title=None,
#             scale=alt.Scale(domain=[0,100]),  ## how to change the scale properly?
#             scale=alt.Scale(domain=[100, 50]),
           sort=alt.SortOrder("descending")),
    y=alt.Y("income:N", 
            title=None, 
            axis=None,
            sort=alt.EncodingSortField(field="order", order='ascending')),
    opacity=OpacityCondition1,
    tooltip="value"
).mark_bar().properties(
    title="% having access to Internet at home"
).add_selection(
    select_bar1
)

middle = base.encode(
    y=alt.Y("income:N", 
            sort=alt.EncodingSortField(field="order", order='ascending'),
            axis=None),
    text=alt.Text("income:N")
).mark_text().properties(width=20)

right = base.encode(
    x=alt.X("value:Q",
           title=None),
    y=alt.Y("income:N",
            title=None,
            axis=None,
            sort=alt.EncodingSortField(field="order", order='ascending')),
    color=ColorCondition,
#     order=alt.Order("label", fields="value", sort="ascending"),
    opacity=OpacityCondition2,
    tooltip=["label","value"]
    
).transform_filter(
    (alt.datum.type=="reasons") & (alt.datum.reason!="Other reasons")
# ).transform_filter(
#     select_reason
).mark_bar().properties(
    title="% reasons for not going online at home"
).add_selection(
    select_bar2
).add_selection(
    select_reason
)


alt.concat(left, middle, right, spacing=5, title="Digital Divides Across Income Levels").configure_legend(
    orient='bottom'
).resolve_scale(
    color="independent", 
#     y="shared"
).configure_axis(
    grid=False
).configure_view(
    strokeOpacity=0
).configure_legend(
    orient="right"
).configure_title(
    fontSize=15,
#     font="Courier",
#     align="center",
#     anchor="middle",
#     color="gray"
)


back-to-back bar chart: https://altair-viz.github.io/gallery/us_population_pyramid_over_time.html

In [8]:
## TO DO: 
# 1. back-to-back [done]
# 2. color map [done]
# 3. add base layer [done]
# 4. axis sorting [done]
# 5. interactive legend [done]
# 6. more dimensions
# 7. sort stacked bars

# Age

## setup

In [69]:
age_access = pd.read_csv("data/age-no-access.csv")
age_reasons = pd.read_csv("data/age-reasons.csv") # 2010, 2015, 2017

In [70]:
df3 = pd.melt(age_access, id_vars=["age"])
df3["year"] = df3["variable"].map(lambda x: x[:4])
df3["value"] = 100 - df3["value"]  # no access --> have access
df3["label"] = "with access at home"
df3["type"] = "access"
df3.head()

Unnamed: 0,age,variable,value,year,label,type
0,3 and 4,2010_no_access,73.704342,2010,with access at home,access
1,5 to 10,2010_no_access,77.703952,2010,with access at home,access
2,11 to 14,2010_no_access,81.883263,2010,with access at home,access
3,15 to 18,2010_no_access,81.79306,2010,with access at home,access
4,19 to 24,2010_no_access,78.553644,2010,with access at home,access


In [71]:
df4 = pd.melt(age_reasons, id_vars=["age"])
df4["year"] = df4["variable"].map(lambda x: x[:4])
df4["label"] = df4["variable"].map(lambda x: annotate_reason(x))
df4["type"] = "reasons"
df4.head(11)

Unnamed: 0,age,variable,value,year,label,type
0,3 and 4,2010_1,20.968277,2010,Don’t need it (not interested),reasons
1,5 to 10,2010_1,21.491974,2010,Don’t need it (not interested),reasons
2,11 to 14,2010_1,21.841247,2010,Don’t need it (not interested),reasons
3,15 to 18,2010_1,22.080982,2010,Don’t need it (not interested),reasons
4,19 to 24,2010_1,24.242601,2010,Don’t need it (not interested),reasons
5,25 to 29,2010_1,26.606125,2010,Don’t need it (not interested),reasons
6,30 to 39,2010_1,27.110663,2010,Don’t need it (not interested),reasons
7,40 to 49,2010_1,31.928397,2010,Don’t need it (not interested),reasons
8,50 to 59,2010_1,44.525043,2010,Don’t need it (not interested),reasons
9,60 to 69,2010_1,54.348231,2010,Don’t need it (not interested),reasons


In [72]:
age_df = pd.concat([df3, df4])

age_order = {
    "3 and 4": 1,
    "5 to 10": 2,
    "11 to 14": 3,
    "15 to 18": 4,
    "19 to 24": 5,
    "25 to 29": 6,
    "30 to 39": 7,
    "40 to 49": 8,
    "50 to 59": 9,
    "60 to 69": 10,
    "70 or older": 11
}

age_df["order"] = age_df["age"].map(lambda x: age_order[x])
age_df.head()

Unnamed: 0,age,variable,value,year,label,type,order
0,3 and 4,2010_no_access,73.704342,2010,with access at home,access,1
1,5 to 10,2010_no_access,77.703952,2010,with access at home,access,2
2,11 to 14,2010_no_access,81.883263,2010,with access at home,access,3
3,15 to 18,2010_no_access,81.79306,2010,with access at home,access,4
4,19 to 24,2010_no_access,78.553644,2010,with access at home,access,5


## charts

In [99]:
radio_year = alt.binding_radio(options=["2010","2015","2017"], name="year: ")

select_year = alt.selection_single(name="select",
                                   fields=["year"],
                                   bind=radio_year,
                                   init={"year": "2010"})

select_bar1 = alt.selection_single(
    on='mouseover',
    nearest=True,
    clear="click"
)

select_bar2 = alt.selection_single(
    on='mouseover',
    clear="click"
)

select_reason = alt.selection_single(
    fields=["label"],
    bind="legend"
)


OpacityCondition1 = alt.condition(select_bar1, alt.value(1.0), alt.value(0.8))
OpacityCondition2 = alt.condition(select_bar2, alt.value(1.0), alt.value(0.8))

ColorCondition = alt.condition(select_reason, 
                               alt.Color("label", 
                                         title="reasons", 
                                         type="nominal",
                                         scale=alt.Scale(scheme="category20c"), 
                                         sort=alt.EncodingSortField("value", order='ascending')),
                               alt.value("lightgrey")
                              )

# palette = alt.Scale(range=['lightgreen', 'darkgreen', 'olive'])

base = alt.Chart(age_df).add_selection(
    select_year
).transform_filter(
    select_year
).properties(
    width=250, 
)

left = base.transform_filter(
    alt.datum.type=="access"
).encode(
    x=alt.X("value:Q",
            title=None,
            sort=alt.SortOrder("descending")),
    y=alt.Y("age:N", 
            title=None, 
            axis=None,
            sort=alt.EncodingSortField(field="order", order='ascending')),
    opacity=OpacityCondition1,
#     color=alt.Color("value:Q", legend=None),
    tooltip="value"
).mark_bar().properties(
    title="% having access to Internet at home"
).add_selection(
    select_bar1
)

middle = base.encode(
    y=alt.Y("age:N", 
            sort=alt.EncodingSortField(field="order", order='ascending'),
            axis=None),
    text=alt.Text("age:N")
).mark_text().properties(width=20)

right = base.encode(
    x=alt.X("value:Q",
           title=None),
    y=alt.Y("age:N",
            title=None,
            axis=None,
            sort=alt.EncodingSortField(field="order", order='ascending')),
    color=ColorCondition,
#     order=alt.Order("label", fields="value", sort="ascending"),
    opacity=OpacityCondition2,
    tooltip=["label","value"]
    
).transform_filter(
    (alt.datum.type=="reasons") & (alt.datum.reason!="Other reasons")
# ).transform_filter(
#     select_reason
).mark_bar().properties(
    title="% reasons for not going online at home"
).add_selection(
    select_bar2
).add_selection(
    select_reason
)


alt.concat(left, middle, right, spacing=5, title="Digital Divides Across Age Groups").configure_legend(
    orient='bottom'
).resolve_scale(
    color="independent", 
#     y="shared"
).configure_axis(
    grid=False
).configure_view(
    strokeOpacity=0
).configure_legend(
    orient="right"
).configure_title(
    fontSize=15,
#     font="Courier",
#     align="center",
#     anchor="middle",
#     color="gray"
)

# Race

## setup

In [67]:
race_access = pd.read_csv("data/race-no-access.csv")
race_reasons = pd.read_csv("data/race-reasons.csv") # 2010, 2015, 2017


In [88]:
df5 = pd.melt(race_access, id_vars=["race"])
df5["year"] = df5["variable"].map(lambda x: x[:4])
df5["value"] = 100 - df5["value"]  # no access --> have access
df5["label"] = "with access at home"
df5["type"] = "access"
df5.head(20)

Unnamed: 0,race,variable,value,year,label,type
0,White,2010_no_access,80.641545,2010,with access at home,access
1,Black,2010_no_access,62.891871,2010,with access at home,access
2,Hispanic,2010_no_access,63.487807,2010,with access at home,access
3,Asian,2010_no_access,87.038843,2010,with access at home,access
4,Pacific Islander,2010_no_access,70.269607,2010,with access at home,access
5,American Indian/Alaska Native,2010_no_access,61.250278,2010,with access at home,access
6,White,2015_no_access,80.747093,2015,with access at home,access
7,Black,2015_no_access,67.407393,2015,with access at home,access
8,Hispanic,2015_no_access,70.128123,2015,with access at home,access
9,Asian,2015_no_access,84.249162,2015,with access at home,access


In [80]:
df6 = pd.melt(race_reasons, id_vars=["race"])
df6["year"] = df6["variable"].map(lambda x: x[:4])
df6["label"] = df6["variable"].map(lambda x: annotate_reason(x))
df6["type"] = "reasons"
df6.head()

Unnamed: 0,race,variable,value,year,label,type
0,White,2010_1,48.218758,2010,Don’t need it (not interested),reasons
1,Black,2010_1,32.318015,2010,Don’t need it (not interested),reasons
2,Hispanic,2010_1,28.776042,2010,Don’t need it (not interested),reasons
3,Asian,2010_1,41.576101,2010,Don’t need it (not interested),reasons
4,Pacific Islander,2010_1,22.969788,2010,Don’t need it (not interested),reasons


In [89]:
race_df = pd.concat([df5, df6])

race_order = {
    "Asian": 1,
    "Pacific Islander": 2,
    "White": 3,
    "Hispanic": 4,
    "Black": 5,
    "American Indian/Alaska Native": 6,
}  # by internet access data

race_df["order"] = race_df["race"].map(lambda x: race_order[x])
race_df.head()

Unnamed: 0,race,variable,value,year,label,type,order
0,White,2010_no_access,80.641545,2010,with access at home,access,3
1,Black,2010_no_access,62.891871,2010,with access at home,access,5
2,Hispanic,2010_no_access,63.487807,2010,with access at home,access,4
3,Asian,2010_no_access,87.038843,2010,with access at home,access,1
4,Pacific Islander,2010_no_access,70.269607,2010,with access at home,access,2


In [95]:
radio_year = alt.binding_radio(options=["2010","2015","2017"], name="year: ")

select_year = alt.selection_single(name="select",
                                   fields=["year"],
                                   bind=radio_year,
                                   init={"year": "2010"})

select_bar1 = alt.selection_single(
    on="mouseover",
    nearest=True,
    clear="click"
)

select_bar2 = alt.selection_single(
    on="mouseover",
    clear="click"
)

select_reason = alt.selection_single(
    fields=["label"],
    bind="legend"
)


OpacityCondition1 = alt.condition(select_bar1, alt.value(1.0), alt.value(0.8))
OpacityCondition2 = alt.condition(select_bar2, alt.value(1.0), alt.value(0.8))

ColorCondition = alt.condition(select_reason, 
                               alt.Color("label", 
                                         title="reasons", 
                                         type="nominal",
                                         scale=alt.Scale(scheme="accent"), 
                                         sort=alt.EncodingSortField("value", order='ascending')),
                               alt.value("lightgrey")
                              )

# palette = alt.Scale(range=['lightgreen', 'darkgreen', 'olive'])

base = alt.Chart(race_df).add_selection(
    select_year
).transform_filter(
    select_year
).properties(
    width=250, 
)

left = base.transform_filter(
    alt.datum.type=="access"
).encode(
    x=alt.X("value:Q",
            title=None,
            sort=alt.SortOrder("descending")),
    y=alt.Y("race:N", 
            title=None, 
            axis=None,
            sort=alt.EncodingSortField(field="order", order='ascending')),
    opacity=OpacityCondition1,
    color=alt.Color("value:Q", legend=None),
    tooltip="value"
).mark_bar().properties(
    title="% having access to Internet at home"
).add_selection(
    select_bar1
)

middle = base.encode(
    y=alt.Y("race:N", 
            sort=alt.EncodingSortField(field="order", order='ascending'),
            axis=None),
    text=alt.Text("race:N")
).mark_text().properties(width=20)

right = base.encode(
    x=alt.X("value:Q",
           title=None),
    y=alt.Y("race:N",
            title=None,
            axis=None,
            sort=alt.EncodingSortField(field="order", order='ascending')),
    color=ColorCondition,
#     order=alt.Order("label", fields="value", sort="ascending"),
    opacity=OpacityCondition2,
    tooltip=["label","value"]
    
).transform_filter(
    (alt.datum.type=="reasons") & (alt.datum.reason!="Other reasons")
# ).transform_filter(
#     select_reason
).mark_bar().properties(
    title="% reasons for not going online at home"
).add_selection(
    select_bar2
).add_selection(
    select_reason
)


alt.concat(left, middle, right, spacing=5, title="Digital Divides Across Races").configure_legend(
    orient='bottom'
).resolve_scale(
    color="independent", 
#     y="shared"
).configure_axis(
    grid=False
).configure_view(
    strokeOpacity=0
).configure_legend(
    orient="right"
).configure_title(
    fontSize=15,
#     font="Courier",
#     align="center",
#     anchor="middle",
#     color="gray"
)

color schemes: https://vega.github.io/vega/docs/schemes/