# Internet access in CT, U.S.

Dataset: B28002 PRESENCE AND TYPES OF INTERNET SUBSCRIPTIONS IN HOUSEHOLD, via U.S. Census Bureau 2014 ACS 5-year

http://factfinder.census.gov/faces/tableservices/jsf/pages/productview.xhtml?pid=ACS_14_1YR_B28002&prodType=table

Here's the form on which people are asked about Internet access.

http://www2.census.gov/programs-surveys/acs/methodology/questionnaires/2014/quest14.pdf

2010 OLR report on forcing cable companies to provide access to rural locations:

https://www.cga.ct.gov/2010/rpt/2010-R-0063.htm

https://www.cga.ct.gov/2013/rpt/2013-R-0254.htm

http://pellcenter.org/wp-content/uploads/2015/09/State-Level-Broadband-Policy-FINAL.pdf



In [327]:
import pandas as pd

heds_ct = pd.read_csv("data/ct_counties/ACS_14_1YR_B28002_metadata.csv")
heds_us = pd.read_csv("data/us_states/ACS_14_1YR_B28002_metadata.csv")

heds_us.head()
## Make sure col heads are the same for both US and CT sets
## (uncomment to test)
#heds_us == heds_ct

Unnamed: 0,GEO.id,Id
0,GEO.id2,Id2
1,GEO.display-label,Geography
2,HD01_VD01,Estimate; Total:
3,HD02_VD01,Margin of Error; Total:
4,HD01_VD02,Estimate; With an Internet subscription:


In [328]:
ct_counties = pd.read_csv("data/ct_counties/ACS_14_1YR_B28002.csv", skiprows=1)
us_states = pd.read_csv("data/us_states/ACS_14_1YR_B28002.csv", skiprows=1)

##ct_counties
## Make sure columns are the same
## (uncomment to test)
#ct_counties.columns == us_states.columns

In [329]:

### IGNORE ALL OF THIS: No need to convert col heds from meta data. Spreadsheets already have
### descriptive headings in second row.

## Get descriptive name from column code
#def get_col_name(col_code):
#    try:
#        return heds_us[heds_us["GEO.id"].str.lower() == col_code.lower()].iloc[0]["Id"]
#    except:
#        return col_code
#get_col_name("HD01_VD01")

#ct_counties.columns = map(lambda x: get_col_name(x), ct_counties.columns)
#us_states.columns = map(lambda x: get_col_name(x), us_states.columns)

#ct_counties


In [330]:
## ex: clean_ct_county("Hartford County, Connecticut") -> "Hartford"
def clean_ct_county(geography):
    cutoff = geography.find(" County, ")
    ## Leave differently formatted data alone
    if cutoff == 0:
        return geography
    return geography[:cutoff]

## Uncomment to test function 
##"'" + clean_ct_county("Hartford County, Connecticut") + "'"

ct_counties["Geography"] = map(lambda x : clean_ct_county(x), ct_counties["Geography"])

## 20% of New Haven County has no Internet access at home

In [355]:
def pct_no_access (df):
    df.loc[df["Geography"].notnull(),"pct_no_internet"] = df["Estimate; No Internet access"] *100 / df["Estimate; Total:"]
    df.loc[df["Geography"].notnull(),"pct_internet"] = 100 - (df["Estimate; No Internet access"] *100 / df["Estimate; Total:"])

    tmp_df = df [["Geography",
                  "pct_no_internet",
                  "pct_internet",
                  "Estimate; No Internet access",
                  "Estimate; Total:",
                  "Margin of Error; No Internet access"]]
    tmp_df.columns = ["geography",
                      "pct_no_internet",
                      "pct_internet",
                      "no_internet",
                      "total",
                      "moe"]
    tmp_df.loc[tmp_df["geography"].notnull(),"moe_pct"] = tmp_df["moe"] * 100 / tmp_df["no_internet"]
    return tmp_df.sort_values(by="pct_internet")
pct_no_access(ct_counties).to_csv("ct_counties_internet_at_home.csv", skipcols=1)

## Utah has the highest Internet access rate

In [356]:
pct_no_access(us_states).to_csv("us_states_internet_at_home.csv", skipcols=1)

In [357]:
pct_no_access(us_states).median()["pct_no_internet"]

19.92996847572855

In [298]:
## function for making JSON object for map, using Geography as index
import json
def to_json_with_index(df, index_col):
    ret_obj = {}
    cols = df.columns
    #print cols
    #print len(cols)
    for row in df.iterrows():
        row_obj = {}
        for col in cols:
            #print "::" + str(row[col]) + "::"  
            #print row[1]["geography"]
            #print row[1][col]
            val = row[1][col]
            if isinstance(val, float) and not isinstance(val, int):
                val = round(val,1)
            if col == index_col:
                continue
            row_obj[col] = val
        ret_obj[row[1][index_col]] = row_obj
    return json.dumps(ret_obj)

## CT data to json for map
to_json_with_index(pct_no_access(ct_counties), "geography")

#pct_no_access(ct_counties)

'{"New London": {"pct_no_internet": 16.4, "moe_pct": 12.0, "total": 105504, "no_internet": 17260, "moe": 2074}, "New Haven": {"pct_no_internet": 19.5, "moe_pct": 5.1, "total": 326050, "no_internet": 63619, "moe": 3236}, "Middlesex": {"pct_no_internet": 14.1, "moe_pct": 17.3, "total": 67106, "no_internet": 9463, "moe": 1637}, "Litchfield": {"pct_no_internet": 15.4, "moe_pct": 13.2, "total": 73572, "no_internet": 11329, "moe": 1491}, "Fairfield": {"pct_no_internet": 12.8, "moe_pct": 6.2, "total": 338421, "no_internet": 43152, "moe": 2689}, "Tolland": {"pct_no_internet": 11.7, "moe_pct": 16.3, "total": 53984, "no_internet": 6293, "moe": 1025}, "Windham": {"pct_no_internet": 17.7, "moe_pct": 13.7, "total": 44655, "no_internet": 7911, "moe": 1085}, "Hartford": {"pct_no_internet": 16.5, "moe_pct": 7.0, "total": 346525, "no_internet": 57035, "moe": 3991}}'

In [299]:
to_json_with_index(pct_no_access(us_states), "geography")


'{"Mississippi": {"pct_no_internet": 33.3, "moe_pct": 2.3, "total": 1095823, "no_internet": 365183, "moe": 8562}, "Oklahoma": {"pct_no_internet": 24.4, "moe_pct": 1.6, "total": 1459759, "no_internet": 355920, "moe": 5801}, "Delaware": {"pct_no_internet": 19.3, "moe_pct": 5.5, "total": 349743, "no_internet": 67569, "moe": 3692}, "Minnesota": {"pct_no_internet": 16.8, "moe_pct": 2.2, "total": 2129195, "no_internet": 357927, "moe": 7720}, "Illinois": {"pct_no_internet": 19.7, "moe_pct": 1.2, "total": 4772421, "no_internet": 938264, "moe": 11664}, "Georgia": {"pct_no_internet": 21.4, "moe_pct": 1.6, "total": 3587521, "no_internet": 768817, "moe": 12083}, "Arkansas": {"pct_no_internet": 28.9, "moe_pct": 2.8, "total": 1131288, "no_internet": 327261, "moe": 9020}, "New Mexico": {"pct_no_internet": 28.2, "moe_pct": 2.7, "total": 760916, "no_internet": 214441, "moe": 5791}, "Ohio": {"pct_no_internet": 21.2, "moe_pct": 1.1, "total": 4593172, "no_internet": 975407, "moe": 10943}, "Indiana": {"pct

## Broadband access

In [300]:
map(lambda x : x, ct_counties.columns)

['Id',
 'Id2',
 'Geography',
 'Estimate; Total:',
 'Margin of Error; Total:',
 'Estimate; With an Internet subscription:',
 'Margin of Error; With an Internet subscription:',
 'Estimate; With an Internet subscription: - Dial-up alone',
 'Margin of Error; With an Internet subscription: - Dial-up alone',
 'Estimate; With an Internet subscription: - DSL:',
 'Margin of Error; With an Internet subscription: - DSL:',
 'Estimate; With an Internet subscription: - DSL: - With mobile broadband',
 'Margin of Error; With an Internet subscription: - DSL: - With mobile broadband',
 'Estimate; With an Internet subscription: - DSL: - Without mobile broadband',
 'Margin of Error; With an Internet subscription: - DSL: - Without mobile broadband',
 'Estimate; With an Internet subscription: - Cable modem:',
 'Margin of Error; With an Internet subscription: - Cable modem:',
 'Estimate; With an Internet subscription: - Cable modem: - With mobile broadband',
 'Margin of Error; With an Internet subscription: 

In [301]:
#ct_counties[['Geography','Estimate; With an Internet subscription: - Fiber-optic:',
# 'Margin of Error; With an Internet subscription: - Fiber-optic:',
# 'Estimate; With an Internet subscription: - Fiber-optic: - With mobile broadband',
# 'Margin of Error; With an Internet subscription: - Fiber-optic: - With mobile broadband',
# 'Estimate; With an Internet subscription: - Fiber-optic: - Without mobile broadband',
# 'Margin of Error; With an Internet subscription: - Fiber-optic: - Without mobile broadband',
#]]

def fiber_report(df):
    tmp_df = df[["Geography",
                "Estimate; With an Internet subscription:",
                "Estimate; Total:",
                "Estimate; With an Internet subscription: - Fiber-optic:",
                "Margin of Error; With an Internet subscription: - Fiber-optic:"]]
    
    tmp_df.loc[tmp_df["Geography"].notnull(),
               "moe_pct"] = tmp_df["Margin of Error; With an Internet subscription: - Fiber-optic:"] * 100 / tmp_df["Estimate; With an Internet subscription: - Fiber-optic:"]
    tmp_df.loc[tmp_df["Geography"].notnull(),
                "fiber_pct"] = tmp_df["Estimate; With an Internet subscription: - Fiber-optic:"] * 100 / tmp_df["Estimate; With an Internet subscription:"]
    tmp_df.columns = ["geography",
                      "with_internet", 
                      "total",
                      "with_fiber_subs",
                      "moe",
                      "moe_fiber",
                      "fiber_pct"]

    return tmp_df.sort_values(by="fiber_pct",ascending = False)

fiber_report(ct_counties)

Unnamed: 0,geography,with_internet,total,with_fiber_subs,moe,moe_fiber,fiber_pct
0,Fairfield,286626,338421,13593,1763,12.969911,4.742417
1,Hartford,278623,346525,12236,1622,13.255966,4.391597
5,New London,85993,105504,3240,900,27.777778,3.767749
4,New Haven,250775,326050,9280,1219,13.135776,3.700528
6,Tolland,46006,53984,1526,489,32.044561,3.316959
3,Middlesex,56230,67106,1573,419,26.636999,2.797439
2,Litchfield,60609,73572,1659,546,32.911392,2.737217
7,Windham,33950,44655,217,238,109.677419,0.639175


In [303]:
fiber_report(us_states)

Unnamed: 0,geography,with_internet,total,with_fiber_subs,moe,moe_fiber,fiber_pct
20,Maryland,1747997,2165438,424677,8858,2.085821,24.295065
39,Rhode Island,316499,409654,69179,3776,5.458304,21.857573
7,Delaware,266404,349743,55996,2847,5.084292,21.019204
46,Virginia,2404990,3083820,467564,8800,1.882095,19.441411
30,New Jersey,2600646,3194844,480357,10045,2.091153,18.47068
32,New York,5607687,7282398,734888,13119,1.78517,13.105011
21,Massachusetts,2065260,2549336,268375,6557,2.443223,12.994732
38,Pennsylvania,3701070,4945972,451938,8013,1.773031,12.211009
34,North Dakota,230082,305431,23704,1508,6.361795,10.302414
43,Texas,6829438,9277197,636058,10436,1.640731,9.313475


In [315]:
def highspeed_report (df):
    tmp_df = df[["Geography",
                 "Estimate; With an Internet subscription:",
                 "Margin of Error; With an Internet subscription:",
                 "Estimate; With an Internet subscription: - Dial-up alone",
                 "Margin of Error; With an Internet subscription: - Dial-up alone"]]
    tmp_df.loc[tmp_df["Geography"].notnull(),
              "moe_dialup_pct"] = tmp_df['Margin of Error; With an Internet subscription: - Dial-up alone'] * 100 / tmp_df['Estimate; With an Internet subscription: - Dial-up alone']
    return tmp_df 
    return map(lambda x : x,df.columns)
highspeed_report (ct_counties)

#map(lambda x : x,ct_counties.columns)


Unnamed: 0,Geography,Estimate; With an Internet subscription:,Margin of Error; With an Internet subscription:,Estimate; With an Internet subscription: - Dial-up alone,Margin of Error; With an Internet subscription: - Dial-up alone,moe_dialup_pct
0,Fairfield,286626,3702,1466,519,35.402456
1,Hartford,278623,4213,2795,797,28.515206
2,Litchfield,60609,1858,766,383,50.0
3,Middlesex,56230,2163,278,218,78.417266
4,New Haven,250775,4301,1314,457,34.7793
5,New London,85993,2720,565,299,52.920354
6,Tolland,46006,1645,191,160,83.769634
7,Windham,33950,1399,191,157,82.198953
