In [8]:
import pandas as pd
import altair as alt
import numpy as np
import us
%matplotlib notebook
import plotly.express as px
import seaborn as sns

In [9]:
## data with state level info 
nces_household = pd.read_csv("data/nces-household.csv")
nces_household.head(10)

Unnamed: 0,state,total_households_2015,with_computer_incl_cell_2015,total_households_2016,with_computer_incl_cell_2016,total_households_2017,with_computer_incl_cell_2017
0,Alabama,1846400,1490700,1852500,1554400,1841700,1583300
1,Alaska,250200,230000,248500,234600,250700,236700
2,Arizona,2463000,2158600,2519100,2293500,2553000,2356300
3,Arkansas,1144700,935500,1142700,965400,1153100,992700
4,California,12896300,11577000,12944200,11924200,13005100,12160500
5,Colorado,2074700,1893200,2109000,1970700,2139200,2016400
6,Connecticut,1343700,1187500,1357300,1214300,1356800,1237500
7,Delaware,352600,308500,351100,319800,357900,334600
8,District of Columbia,281800,251800,281200,253600,281500,259200
9,Florida,7463200,6529200,7573500,6827300,7690000,7073900


In [10]:
## calculate pct of households w/ computers
nces_household["pct_2015"] = nces_household["with_computer_incl_cell_2015"]/nces_household["total_households_2015"]
nces_household["pct_2016"] = nces_household["with_computer_incl_cell_2016"]/nces_household["total_households_2016"]
nces_household["pct_2017"] = nces_household["with_computer_incl_cell_2017"]/nces_household["total_households_2017"]


## melt dataset to group households by state and year 

nces_household_long = nces_household[["state", "pct_2015", "pct_2016", "pct_2017"]]
nces_household_long = pd.melt(nces_household_long, id_vars=["state"], value_vars=["pct_2015", "pct_2016", "pct_2017"])
nces_household_long["year"] = nces_household_long["variable"].map(lambda x: x[4:8])


In [11]:
## calculate the national average Internet access and subtract that from each state/year
nces_household_long['nat_mean'] = nces_household_long.groupby(['year', 'variable']).transform('mean') 
nces_household_long.head(10)

Unnamed: 0,state,variable,value,year,nat_mean
0,Alabama,pct_2015,0.807355,2015,0.866208
1,Alaska,pct_2015,0.919265,2015,0.866208
2,Arizona,pct_2015,0.876411,2015,0.866208
3,Arkansas,pct_2015,0.817245,2015,0.866208
4,California,pct_2015,0.897699,2015,0.866208
5,Colorado,pct_2015,0.912517,2015,0.866208
6,Connecticut,pct_2015,0.883754,2015,0.866208
7,Delaware,pct_2015,0.874929,2015,0.866208
8,District of Columbia,pct_2015,0.893542,2015,0.866208
9,Florida,pct_2015,0.874853,2015,0.866208


In [17]:
nces_household_long['pct_diff_from_mean'] = round(100*(nces_household_long['value'] - nces_household_long['nat_mean']),2)
## map states to their abbreviations
nces_household_long['state_abbr'] = nces_household_long['state'].map(lambda x: us.states.lookup(x).abbr)


In [30]:
## we can show one year 
fig = px.choropleth(nces_household_long,  # Input Pandas DataFrame
                    locations="state_abbr",  # DataFrame column with locations # DataFrame column with color values
                    color="pct_diff_from_mean",
                    color_continuous_scale=px.colors.diverging.BrBG,
                    color_continuous_midpoint=0,
                    locationmode = 'USA-states') # Set to plot as US States




# Add dropdown
fig.update_layout(
    updatemenus=[
        dict(
            buttons=list([
                dict(
                    args=["year", "2015"],
                    label="2015",
                    method="restyle"
                ),
                dict(
                    args=["year", "2016"],
                    label="2016",
                    method="restyle"
                ),
                dict(
                    args=["year", "2017"],
                    label="2017",
                    method="restyle"
                ),
                
            ]),
            showactive=True,
            x=0.1,
            xanchor="left",
            y=1.1,
            yanchor="top"
        ),
    ]
)

fig.update_layout(
    title_text = "% of Households w/ Computer Access (incl. smartphones)", # Create a Title,
    legend_title = "% Difference from National Average",
    geo_scope='usa',  # Plot only the USA instead of globe
    annotations=[
        dict(text="Selected Year:", showarrow=False,
        x=0, y=1)
    ]
)

fig.show()

In [None]:
## TO DO 

## small multiples comparing computer access and internet access 

In [39]:
sm_multiples = px.scatter(nces_household_long, x="year", y="value",
               color='variable',
               facet_col='state_abbr',facet_col_wrap=10)

sm_multiples.update_layout(
    title_text = 'State by State Estimates of Computer Access', # Create a Title
)

## make the charts prettier
sm_multiples.update_yaxes(matches=None)
## get rid of "state =" and only show state abbrev
sm_multiples.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

sm_multiples.show()