In [1]:
# Dependencies and Setup
import pandas as pd
import os

In [2]:
# link to FCC data
fcc_data = '../resources/fcc/fcc_area_table_december_2019_extract.csv'

In [3]:
# Create dataframe
fcc_df = pd.read_csv(fcc_data)
fcc_df.head()

Unnamed: 0,type,id,tech,urban_rural,tribal_non,speed,has_0,has_1,has_2,has_3more
0,county,6001,acfosw,R,N,0.2,0,3,0,13621
1,county,6001,acfosw,U,N,0.2,0,4,3,1657698
2,county,6003,acfosw,R,N,0.2,0,0,0,962
3,county,6003,acfosw,R,T,0.2,0,0,0,167
4,county,6005,acfosw,R,N,0.2,0,1,0,24969


In [4]:
# check the data types
fcc_df.dtypes

type            object
id               int64
tech            object
urban_rural     object
tribal_non      object
speed          float64
has_0            int64
has_1            int64
has_2            int64
has_3more        int64
dtype: object

In [5]:
# Change ID to a string
fcc_df["id"] = fcc_df["id"].astype("str")
fcc_df.dtypes

type            object
id              object
tech            object
urban_rural     object
tribal_non      object
speed          float64
has_0            int64
has_1            int64
has_2            int64
has_3more        int64
dtype: object

In [6]:
# Add full GEOID to join w/ census tables
fcc_df["geo_id"] = "0500000US0" + fcc_df["id"]
fcc_df.head()

Unnamed: 0,type,id,tech,urban_rural,tribal_non,speed,has_0,has_1,has_2,has_3more,geo_id
0,county,6001,acfosw,R,N,0.2,0,3,0,13621,0500000US06001
1,county,6001,acfosw,U,N,0.2,0,4,3,1657698,0500000US06001
2,county,6003,acfosw,R,N,0.2,0,0,0,962,0500000US06003
3,county,6003,acfosw,R,T,0.2,0,0,0,167,0500000US06003
4,county,6005,acfosw,R,N,0.2,0,1,0,24969,0500000US06005


In [7]:
# Remove existing id and type columns
del fcc_df['id']
del fcc_df['type']
fcc_df.head()

Unnamed: 0,tech,urban_rural,tribal_non,speed,has_0,has_1,has_2,has_3more,geo_id
0,acfosw,R,N,0.2,0,3,0,13621,0500000US06001
1,acfosw,U,N,0.2,0,4,3,1657698,0500000US06001
2,acfosw,R,N,0.2,0,0,0,962,0500000US06003
3,acfosw,R,T,0.2,0,0,0,167,0500000US06003
4,acfosw,R,N,0.2,0,1,0,24969,0500000US06005


In [8]:
#Reorganize columns
fcc_df = fcc_df[["geo_id", "tech", "urban_rural", "tribal_non", "speed", "has_0", "has_1", "has_2", "has_3more"]]
fcc_df.head()

Unnamed: 0,geo_id,tech,urban_rural,tribal_non,speed,has_0,has_1,has_2,has_3more
0,0500000US06001,acfosw,R,N,0.2,0,3,0,13621
1,0500000US06001,acfosw,U,N,0.2,0,4,3,1657698
2,0500000US06003,acfosw,R,N,0.2,0,0,0,962
3,0500000US06003,acfosw,R,T,0.2,0,0,0,167
4,0500000US06005,acfosw,R,N,0.2,0,1,0,24969


In [9]:
#Create a new column to capture total population that has at least 1 provider &
# Create a total population column
fcc_df["has_access"] = fcc_df["has_1"]+fcc_df["has_2"]+fcc_df["has_3more"]
fcc_df["total_pop"] = fcc_df["has_0"]+fcc_df["has_1"]+fcc_df["has_2"]+fcc_df["has_3more"]
fcc_df.head()

Unnamed: 0,geo_id,tech,urban_rural,tribal_non,speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,acfosw,R,N,0.2,0,3,0,13621,13624,13624
1,0500000US06001,acfosw,U,N,0.2,0,4,3,1657698,1657705,1657705
2,0500000US06003,acfosw,R,N,0.2,0,0,0,962,962,962
3,0500000US06003,acfosw,R,T,0.2,0,0,0,167,167,167
4,0500000US06005,acfosw,R,N,0.2,0,1,0,24969,24970,24970


In [10]:
# Rename Columns
fcc_df.rename(columns={'tech': 'broadband_tech', 'speed': 'dl_speed'}, inplace=True)
fcc_df.head()

Unnamed: 0,geo_id,broadband_tech,urban_rural,tribal_non,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,acfosw,R,N,0.2,0,3,0,13621,13624,13624
1,0500000US06001,acfosw,U,N,0.2,0,4,3,1657698,1657705,1657705
2,0500000US06003,acfosw,R,N,0.2,0,0,0,962,962,962
3,0500000US06003,acfosw,R,T,0.2,0,0,0,167,167,167
4,0500000US06005,acfosw,R,N,0.2,0,1,0,24969,24970,24970


In [11]:
# Update column data names
broadband_dict = {
    "acfosw": "All Broadband Connections",
    "a": "ADSL",
    "c": "Cable",
    "f": "Fiber",
    "o": "Other",
    "s": "Satellite",
    "w": "Wireless"
}
fcc_df['broadband_tech'].replace(broadband_dict, inplace=True)
fcc_df.head()

Unnamed: 0,geo_id,broadband_tech,urban_rural,tribal_non,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,All Broadband Connections,R,N,0.2,0,3,0,13621,13624,13624
1,0500000US06001,All Broadband Connections,U,N,0.2,0,4,3,1657698,1657705,1657705
2,0500000US06003,All Broadband Connections,R,N,0.2,0,0,0,962,962,962
3,0500000US06003,All Broadband Connections,R,T,0.2,0,0,0,167,167,167
4,0500000US06005,All Broadband Connections,R,N,0.2,0,1,0,24969,24970,24970


In [12]:
# double check count of tech
fcc_df["broadband_tech"].value_counts()

All Broadband Connections    1120
Fiber                        1120
Satellite                    1120
Wireless                     1120
ADSL                         1120
Other                        1120
Cable                        1120
Name: broadband_tech, dtype: int64

In [13]:
# New Table Build
df = fcc_df.drop(columns = ["tribal_non"])
df = df.loc[df["broadband_tech"] == "All Broadband Connections", ]
df.head()

Unnamed: 0,geo_id,broadband_tech,urban_rural,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,All Broadband Connections,R,0.2,0,3,0,13621,13624,13624
1,0500000US06001,All Broadband Connections,U,0.2,0,4,3,1657698,1657705,1657705
2,0500000US06003,All Broadband Connections,R,0.2,0,0,0,962,962,962
3,0500000US06003,All Broadband Connections,R,0.2,0,0,0,167,167,167
4,0500000US06005,All Broadband Connections,R,0.2,0,1,0,24969,24970,24970


In [14]:
# Consolidate Urban rural
df = df.groupby(["geo_id", "urban_rural", "dl_speed"]).sum()
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,has_0,has_1,has_2,has_3more,has_access,total_pop
geo_id,urban_rural,dl_speed,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0500000US06001,R,0.2,0,3,0,13621,13624,13624
0500000US06001,R,4.0,0,3,648,12973,13624,13624
0500000US06001,R,10.0,0,3,648,12973,13624,13624
0500000US06001,R,25.0,0,3,937,12684,13624,13624
0500000US06001,R,100.0,4987,2611,3362,2664,8637,13624


In [15]:
df = pd.DataFrame(df)
df = df.reset_index()
df.head()

Unnamed: 0,geo_id,urban_rural,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,R,0.2,0,3,0,13621,13624,13624
1,0500000US06001,R,4.0,0,3,648,12973,13624,13624
2,0500000US06001,R,10.0,0,3,648,12973,13624,13624
3,0500000US06001,R,25.0,0,3,937,12684,13624,13624
4,0500000US06001,R,100.0,4987,2611,3362,2664,8637,13624


In [16]:
# Create percent of population w/ access column
df["access_pop_percent"] = df["has_access"]/df["total_pop"]
df.head()

Unnamed: 0,geo_id,urban_rural,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop,access_pop_percent
0,0500000US06001,R,0.2,0,3,0,13621,13624,13624,1.0
1,0500000US06001,R,4.0,0,3,648,12973,13624,13624,1.0
2,0500000US06001,R,10.0,0,3,648,12973,13624,13624,1.0
3,0500000US06001,R,25.0,0,3,937,12684,13624,13624,1.0
4,0500000US06001,R,100.0,4987,2611,3362,2664,8637,13624,0.633955


In [37]:
df_test_one = df.loc[df["access_pop_percent"]==1,]
df_test_2 = df_test_one.groupby(["geo_id", "urban_rural"]).max()["dl_speed"]
df_test_3 = pd.DataFrame(df_test_2)
df_test_3 = df_test_3.reset_index()
df_test_3

Unnamed: 0,geo_id,urban_rural,dl_speed
0,0500000US06001,R,25.0
1,0500000US06001,U,25.0
2,0500000US06003,R,25.0
3,0500000US06005,R,25.0
4,0500000US06005,U,25.0
...,...,...,...
108,0500000US06111,U,25.0
109,0500000US06113,R,25.0
110,0500000US06113,U,25.0
111,0500000US06115,R,25.0


In [17]:
df.to_csv('../resources/fcc/test2.csv', index=False)

In [18]:
# Urban vs Rural Split by county
urban_rural_by_county = fcc_df.drop(columns = ["tribal_non"])
urban_rural_by_county = urban_rural_by_county.loc[urban_rural_by_county["dl_speed"] == 0.2, ]
urban_rural_by_county = urban_rural_by_county.loc[urban_rural_by_county["broadband_tech"] == "All Broadband Connections", ]
urban_rural_by_county.head()

Unnamed: 0,geo_id,broadband_tech,urban_rural,dl_speed,has_0,has_1,has_2,has_3more,has_access,total_pop
0,0500000US06001,All Broadband Connections,R,0.2,0,3,0,13621,13624,13624
1,0500000US06001,All Broadband Connections,U,0.2,0,4,3,1657698,1657705,1657705
2,0500000US06003,All Broadband Connections,R,0.2,0,0,0,962,962,962
3,0500000US06003,All Broadband Connections,R,0.2,0,0,0,167,167,167
4,0500000US06005,All Broadband Connections,R,0.2,0,1,0,24969,24970,24970


In [19]:
# double check count of tech
urban_rural_by_county["dl_speed"].value_counts()

0.2    160
Name: dl_speed, dtype: int64

In [20]:
urban_rural_by_county = urban_rural_by_county.groupby(["geo_id", "urban_rural"]).sum()["total_pop"]
urban_rural_by_county.head()

geo_id          urban_rural
0500000US06001  R                13624
                U              1657705
0500000US06003  R                 1129
0500000US06005  R                24983
                U                14768
Name: total_pop, dtype: int64

In [21]:
urban_rural_by_county = pd.DataFrame(urban_rural_by_county)
urban_rural_by_county = urban_rural_by_county.reset_index()
urban_rural_by_county.head()

Unnamed: 0,geo_id,urban_rural,total_pop
0,0500000US06001,R,13624
1,0500000US06001,U,1657705
2,0500000US06003,R,1129
3,0500000US06005,R,24983
4,0500000US06005,U,14768


In [22]:
# Export all DataFrames as CSV files
urban_rural_by_county.to_csv('../resources/fcc/urban_rural_by_county.csv', index=False)
fcc_df.to_csv('../resources/fcc/fcc_area_dec_2019.csv', index=False)