In [1]:
# Dependencies and Setup
import pandas as pd
import os

In [2]:
# link to FCC data & create dataframe
fcc_data = '../resources/fcc/fcc_area_table_december_2019_extract.csv'
fcc_df = pd.read_csv(fcc_data)
fcc_df.head()

Unnamed: 0,type,id,tech,urban_rural,tribal_non,speed,has_0,has_1,has_2,has_3more
0,county,6001,acfosw,R,N,0.2,0,3,0,13621
1,county,6001,acfosw,U,N,0.2,0,4,3,1657698
2,county,6003,acfosw,R,N,0.2,0,0,0,962
3,county,6003,acfosw,R,T,0.2,0,0,0,167
4,county,6005,acfosw,R,N,0.2,0,1,0,24969


In [3]:
# check the data types
fcc_df.dtypes

type            object
id               int64
tech            object
urban_rural     object
tribal_non      object
speed          float64
has_0            int64
has_1            int64
has_2            int64
has_3more        int64
dtype: object

In [4]:
# Change ID to a string
fcc_df["id"] = fcc_df["id"].astype("str")
fcc_df.dtypes

type            object
id              object
tech            object
urban_rural     object
tribal_non      object
speed          float64
has_0            int64
has_1            int64
has_2            int64
has_3more        int64
dtype: object

In [5]:
# Add full GEOID to join w/ census tables
fcc_df["geo_id"] = "0500000US0" + fcc_df["id"]
fcc_df.head()

Unnamed: 0,type,id,tech,urban_rural,tribal_non,speed,has_0,has_1,has_2,has_3more,geo_id
0,county,6001,acfosw,R,N,0.2,0,3,0,13621,0500000US06001
1,county,6001,acfosw,U,N,0.2,0,4,3,1657698,0500000US06001
2,county,6003,acfosw,R,N,0.2,0,0,0,962,0500000US06003
3,county,6003,acfosw,R,T,0.2,0,0,0,167,0500000US06003
4,county,6005,acfosw,R,N,0.2,0,1,0,24969,0500000US06005


In [6]:
# Remove existing id and type columns
del fcc_df['id']
del fcc_df['type']
fcc_df.head()

Unnamed: 0,tech,urban_rural,tribal_non,speed,has_0,has_1,has_2,has_3more,geo_id
0,acfosw,R,N,0.2,0,3,0,13621,0500000US06001
1,acfosw,U,N,0.2,0,4,3,1657698,0500000US06001
2,acfosw,R,N,0.2,0,0,0,962,0500000US06003
3,acfosw,R,T,0.2,0,0,0,167,0500000US06003
4,acfosw,R,N,0.2,0,1,0,24969,0500000US06005


In [7]:
#Reorganize columns
fcc_df = fcc_df[["geo_id", "tech", "urban_rural", "tribal_non", "speed", "has_0", "has_1", "has_2", "has_3more"]]
fcc_df.head()

Unnamed: 0,geo_id,tech,urban_rural,tribal_non,speed,has_0,has_1,has_2,has_3more
0,0500000US06001,acfosw,R,N,0.2,0,3,0,13621
1,0500000US06001,acfosw,U,N,0.2,0,4,3,1657698
2,0500000US06003,acfosw,R,N,0.2,0,0,0,962
3,0500000US06003,acfosw,R,T,0.2,0,0,0,167
4,0500000US06005,acfosw,R,N,0.2,0,1,0,24969


In [8]:
#Create a new column to capture total population that has at least 1 provider &
# Create a total population column
fcc_df["has_access"] = fcc_df["has_1"]+fcc_df["has_2"]+fcc_df["has_3more"]
fcc_df["total_pop"] = fcc_df["has_0"]+fcc_df["has_1"]+fcc_df["has_2"]+fcc_df["has_3more"]
fcc_df.head()

Unnamed: 0,geo_id,tech,urban_rural,tribal_non,speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,acfosw,R,N,0.2,0,3,0,13621,13624,13624
1,0500000US06001,acfosw,U,N,0.2,0,4,3,1657698,1657705,1657705
2,0500000US06003,acfosw,R,N,0.2,0,0,0,962,962,962
3,0500000US06003,acfosw,R,T,0.2,0,0,0,167,167,167
4,0500000US06005,acfosw,R,N,0.2,0,1,0,24969,24970,24970


In [9]:
# Rename Columns
fcc_df.rename(columns={'tech': 'broadband_tech', 'speed': 'dl_speed'}, inplace=True)
fcc_df.head()

Unnamed: 0,geo_id,broadband_tech,urban_rural,tribal_non,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,acfosw,R,N,0.2,0,3,0,13621,13624,13624
1,0500000US06001,acfosw,U,N,0.2,0,4,3,1657698,1657705,1657705
2,0500000US06003,acfosw,R,N,0.2,0,0,0,962,962,962
3,0500000US06003,acfosw,R,T,0.2,0,0,0,167,167,167
4,0500000US06005,acfosw,R,N,0.2,0,1,0,24969,24970,24970


In [10]:
# Update column data names
broadband_dict = {
    "acfosw": "All Broadband Connections",
    "a": "ADSL",
    "c": "Cable",
    "f": "Fiber",
    "o": "Other",
    "s": "Satellite",
    "w": "Wireless"
}
fcc_df['broadband_tech'].replace(broadband_dict, inplace=True)
fcc_df.head()

Unnamed: 0,geo_id,broadband_tech,urban_rural,tribal_non,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,All Broadband Connections,R,N,0.2,0,3,0,13621,13624,13624
1,0500000US06001,All Broadband Connections,U,N,0.2,0,4,3,1657698,1657705,1657705
2,0500000US06003,All Broadband Connections,R,N,0.2,0,0,0,962,962,962
3,0500000US06003,All Broadband Connections,R,T,0.2,0,0,0,167,167,167
4,0500000US06005,All Broadband Connections,R,N,0.2,0,1,0,24969,24970,24970


In [11]:
# double check count of tech
fcc_df["broadband_tech"].value_counts()

Wireless                     1120
Other                        1120
Cable                        1120
Fiber                        1120
ADSL                         1120
All Broadband Connections    1120
Satellite                    1120
Name: broadband_tech, dtype: int64

In [12]:
# build new condensed table
fcc_condensed = fcc_df.drop(columns = ["tribal_non"])
fcc_condensed = fcc_condensed.loc[fcc_condensed["broadband_tech"] == "All Broadband Connections", ]
fcc_condensed.head()

Unnamed: 0,geo_id,broadband_tech,urban_rural,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,All Broadband Connections,R,0.2,0,3,0,13621,13624,13624
1,0500000US06001,All Broadband Connections,U,0.2,0,4,3,1657698,1657705,1657705
2,0500000US06003,All Broadband Connections,R,0.2,0,0,0,962,962,962
3,0500000US06003,All Broadband Connections,R,0.2,0,0,0,167,167,167
4,0500000US06005,All Broadband Connections,R,0.2,0,1,0,24969,24970,24970


In [13]:
# Consolidate Urban rural
fcc_condensed = fcc_condensed.groupby(["geo_id", "urban_rural", "dl_speed"]).sum()
fcc_condensed.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,has_0,has_1,has_2,has_3more,has_access,total_pop
geo_id,urban_rural,dl_speed,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0500000US06001,R,0.2,0,3,0,13621,13624,13624
0500000US06001,R,4.0,0,3,648,12973,13624,13624
0500000US06001,R,10.0,0,3,648,12973,13624,13624
0500000US06001,R,25.0,0,3,937,12684,13624,13624
0500000US06001,R,100.0,4987,2611,3362,2664,8637,13624


In [14]:
# Re-index
fcc_condensed = pd.DataFrame(fcc_condensed)
fcc_condensed = fcc_condensed.reset_index()
fcc_condensed.head()

Unnamed: 0,geo_id,urban_rural,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,R,0.2,0,3,0,13621,13624,13624
1,0500000US06001,R,4.0,0,3,648,12973,13624,13624
2,0500000US06001,R,10.0,0,3,648,12973,13624,13624
3,0500000US06001,R,25.0,0,3,937,12684,13624,13624
4,0500000US06001,R,100.0,4987,2611,3362,2664,8637,13624


In [15]:
# Create percent of population w/ access column
fcc_condensed["access_pop_percent"] = fcc_condensed["has_access"]/fcc_condensed["total_pop"]
fcc_condensed.head()

Unnamed: 0,geo_id,urban_rural,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop,access_pop_percent
0,0500000US06001,R,0.2,0,3,0,13621,13624,13624,1.0
1,0500000US06001,R,4.0,0,3,648,12973,13624,13624,1.0
2,0500000US06001,R,10.0,0,3,648,12973,13624,13624,1.0
3,0500000US06001,R,25.0,0,3,937,12684,13624,13624,1.0
4,0500000US06001,R,100.0,4987,2611,3362,2664,8637,13624,0.633955


In [16]:
# Create top 100% user access table by urban and rural
fcc_100ua_one = fcc_condensed.loc[fcc_condensed["access_pop_percent"]==1,]
fcc_100ua_two = fcc_100ua_one.groupby(["geo_id", "urban_rural"]).max()["dl_speed"]
fcc_100ua_three = pd.DataFrame(fcc_100ua_two)
fcc_100ua_three = fcc_100ua_three.reset_index()
fcc_100ua_three

Unnamed: 0,geo_id,urban_rural,dl_speed
0,0500000US06001,R,25.0
1,0500000US06001,U,25.0
2,0500000US06003,R,25.0
3,0500000US06005,R,25.0
4,0500000US06005,U,25.0
...,...,...,...
108,0500000US06111,U,25.0
109,0500000US06113,R,25.0
110,0500000US06113,U,25.0
111,0500000US06115,R,25.0


In [17]:
fcc_not100ua_one = fcc_condensed.loc[fcc_condensed["access_pop_percent"]<1,]
fcc_not100ua_one

Unnamed: 0,geo_id,urban_rural,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop,access_pop_percent
4,0500000US06001,R,100.0,4987,2611,3362,2664,8637,13624,0.633955
5,0500000US06001,R,250.0,7174,3845,2605,0,6450,13624,0.473429
6,0500000US06001,R,1000.0,11003,2621,0,0,2621,13624,0.192381
11,0500000US06001,U,100.0,11860,104419,411197,1130229,1645845,1657705,0.992846
12,0500000US06001,U,250.0,61933,688352,738690,168730,1595772,1657705,0.962639
...,...,...,...,...,...,...,...,...,...,...
782,0500000US06115,R,250.0,18832,4404,364,0,4768,23600,0.202034
783,0500000US06115,R,1000.0,22933,667,0,0,667,23600,0.028263
788,0500000US06115,U,100.0,336,27940,26220,572,54732,55068,0.993898
789,0500000US06115,U,250.0,541,52470,2057,0,54527,55068,0.990176


In [19]:
fcc_dropped_ur_one = pd.merge(fcc_100ua_three, fcc_condensed, how = "left", left_on=["geo_id", "urban_rural", "dl_speed"], right_on=["geo_id", "urban_rural", "dl_speed"])

fcc_dropped_ur_two = [fcc_not100ua_one,fcc_dropped_ur_one]
fcc_dropped_ur_three = pd.concat(fcc_dropped_ur_two)
fcc_dropped_ur_three

Unnamed: 0,geo_id,urban_rural,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop,access_pop_percent
4,0500000US06001,R,100.0,4987,2611,3362,2664,8637,13624,0.633955
5,0500000US06001,R,250.0,7174,3845,2605,0,6450,13624,0.473429
6,0500000US06001,R,1000.0,11003,2621,0,0,2621,13624,0.192381
11,0500000US06001,U,100.0,11860,104419,411197,1130229,1645845,1657705,0.992846
12,0500000US06001,U,250.0,61933,688352,738690,168730,1595772,1657705,0.962639
...,...,...,...,...,...,...,...,...,...,...
108,0500000US06111,U,25.0,0,1,1866,811160,813027,813027,1.000000
109,0500000US06113,R,25.0,0,1,7281,13110,20392,20392,1.000000
110,0500000US06113,U,25.0,0,0,5812,194296,200108,200108,1.000000
111,0500000US06115,R,25.0,0,0,343,23257,23600,23600,1.000000


In [20]:
fcc_dropped_ur_final = fcc_dropped_ur_three.sort_values(by=['geo_id', 'urban_rural', 'access_pop_percent'], ascending = [True, False, False])
fcc_dropped_ur_final

Unnamed: 0,geo_id,urban_rural,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop,access_pop_percent
1,0500000US06001,U,25.0,0,5,169,1657531,1657705,1657705,1.000000
11,0500000US06001,U,100.0,11860,104419,411197,1130229,1645845,1657705,0.992846
12,0500000US06001,U,250.0,61933,688352,738690,168730,1595772,1657705,0.962639
13,0500000US06001,U,1000.0,767327,804471,84097,1810,890378,1657705,0.537115
0,0500000US06001,R,25.0,0,3,937,12684,13624,13624,1.000000
...,...,...,...,...,...,...,...,...,...,...
790,0500000US06115,U,1000.0,53054,2014,0,0,2014,55068,0.036573
111,0500000US06115,R,25.0,0,0,343,23257,23600,23600,1.000000
781,0500000US06115,R,100.0,18644,4446,510,0,4956,23600,0.210000
782,0500000US06115,R,250.0,18832,4404,364,0,4768,23600,0.202034


In [17]:
df.to_csv('../resources/fcc/test2.csv', index=False)

In [None]:
df_sorted.to_csv('../resources/fcc/test3.csv', index=False)

In [18]:
# Urban vs Rural Split by county
urban_rural_by_county = fcc_df.drop(columns = ["tribal_non"])
urban_rural_by_county = urban_rural_by_county.loc[urban_rural_by_county["dl_speed"] == 0.2, ]
urban_rural_by_county = urban_rural_by_county.loc[urban_rural_by_county["broadband_tech"] == "All Broadband Connections", ]
urban_rural_by_county.head()

Unnamed: 0,geo_id,broadband_tech,urban_rural,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,All Broadband Connections,R,0.2,0,3,0,13621,13624,13624
1,0500000US06001,All Broadband Connections,U,0.2,0,4,3,1657698,1657705,1657705
2,0500000US06003,All Broadband Connections,R,0.2,0,0,0,962,962,962
3,0500000US06003,All Broadband Connections,R,0.2,0,0,0,167,167,167
4,0500000US06005,All Broadband Connections,R,0.2,0,1,0,24969,24970,24970


In [19]:
# double check count of tech
urban_rural_by_county["dl_speed"].value_counts()

0.2    160
Name: dl_speed, dtype: int64

In [20]:
urban_rural_by_county = urban_rural_by_county.groupby(["geo_id", "urban_rural"]).sum()["total_pop"]
urban_rural_by_county.head()

geo_id          urban_rural
0500000US06001  R                13624
                U              1657705
0500000US06003  R                 1129
0500000US06005  R                24983
                U                14768
Name: total_pop, dtype: int64

In [21]:
urban_rural_by_county = pd.DataFrame(urban_rural_by_county)
urban_rural_by_county = urban_rural_by_county.reset_index()
urban_rural_by_county.head()

Unnamed: 0,geo_id,urban_rural,total_pop
0,0500000US06001,R,13624
1,0500000US06001,U,1657705
2,0500000US06003,R,1129
3,0500000US06005,R,24983
4,0500000US06005,U,14768


In [22]:
# Export all DataFrames as CSV files
urban_rural_by_county.to_csv('../resources/fcc/urban_rural_by_county.csv', index=False)
fcc_df.to_csv('../resources/fcc/fcc_area_dec_2019.csv', index=False)