# EDA of FCC broadband map data

In [1]:
import pandas as pd
import numpy as np
from pprint import pprint

Downloaded from [here](http://transition.fcc.gov/form477/BroadbandData/Fixed/Dec16/Version%201/US-Fixed-with-Satellite-Dec2016.zip\ "Fixed Broadband Deployment Data from FCC Form 477") plus the Data [dictionary](https://www.fcc.gov/general/explanation-broadband-deployment-data) and mobile [addendum](https://transition.fcc.gov/form477/MBD/formatting_mbd.pdf).

In [2]:
# datafile = 'fbd_us_with_satellite_dec2016_v1.csv' # ~10 Gb
datafile = 'fbd_us_without_satellite_dec2016_v1.csv' # ~4 Gb
df = pd.read_csv(datafile)

### Number of entries

In [3]:
print("{:,} Entries".format(len(df)))

25,738,665 Entries


### Column names

In [59]:
pprint(df.dtypes[sorted(df.dtypes.to_dict())])

BlockCode               int64
Business                int64
Consumer                int64
DBAName                object
FRN                     int64
HocoFinal              object
HocoNum                 int64
HoldingCompanyName     object
LogRecNo                int64
MaxAdDown             float64
MaxAdUp               float64
MaxCIRDown            float64
MaxCIRUp              float64
ProviderName           object
Provider_Id             int64
StateAbbr              object
TechCode                int64
dtype: object


### Representative sample of data

In [5]:
df.sample(10)

Unnamed: 0,LogRecNo,Provider_Id,FRN,ProviderName,DBAName,HoldingCompanyName,HocoNum,HocoFinal,StateAbbr,BlockCode,TechCode,Consumer,MaxAdDown,MaxAdUp,Business,MaxCIRDown,MaxCIRUp
10199312,20734511,26471,18589226,"Lightower Fiber Networks I, LLC (fka Light Tow...",Lightower,LTS Group Holdings LLC,131095,LTS Group Holdings LLC,CT,90035204004012,50,0,0.0,0.0,1,1000.0,1000.0
11640254,22175453,26913,11562717,The Montana Internet Corporation,Montana Internet Corporation - H,Montana Internet,240109,Montana Internet,MT,300439622011003,10,1,7.0,1.0,1,0.0,0.0
22996891,65665801,28708,3737467,New Paris Telephones Quality Cablevision,New Paris Telephone's Quality Cable,"New Paris Telephone Company, Inc",130905,"New Paris Telephone Company, Inc.",IN,180390009002005,42,1,14.0,3.0,1,14.0,3.0
446646,446647,25566,3737673,"Service Electric Cablevision, Inc.","Service Electric Cablevision, Inc.","Service Electric Television, Inc.",330002,Service Electric Television Inc.,PA,420930502001062,41,1,200.0,15.0,0,0.0,0.0
25643835,68320711,29424,15457351,Kellin Communications,Kellin Communications,Kellin Communications,300078,Kellin Communications,CO,80350139041016,70,1,15.0,4.0,1,100.0,100.0
13805443,24340642,27085,3768165,"COMCAST CABLE COMMUNICATIONS, LLC",Comcast,Comcast Corporation,130317,Comcast Corporation,VA,515708303002019,42,1,200.0,10.0,1,0.0,0.0
24503791,67180667,29050,17169327,"King Street Wireless, L.P.",King Street Wireless L.P.,"King Street Wireless, L.P.",300079,"King Street Wireless, L.P.",MI,260270017002062,70,1,1.0,0.2,0,0.0,0.0
10398690,20933889,26500,18515536,"CSS, Inc.",CSSInc,"Computer Sales & Services, Inc.",130331,"Computer Sales & Services, Inc.",LA,220570217003017,70,1,10.0,10.0,1,0.0,0.0
18330664,28865863,27656,25646373,"Charter Communications, Inc.",Charter Communications Inc,Charter Communications,130235,Charter Communications,OH,390110406003061,42,1,50.0,5.0,1,0.0,0.0
12664635,23199834,27085,3768165,"COMCAST CABLE COMMUNICATIONS, LLC",Comcast,Comcast Corporation,130317,Comcast Corporation,IL,170313301004054,42,1,987.0,35.0,1,0.0,0.0


### Percent of consumer vs business broadband providers

In [6]:
num_consumer = len(df[df['Consumer'] == 1])
num_business = len(df[df['Consumer'] == 0])

print("Consumer facing provider percent: {:.2f}%".format(100.0 * num_consumer / len(df)))
print("Business facing provider percent: {:.2f}%".format(100.0 * num_business / len(df)))

Consumer facing provider percent: 91.08%
Business facing provider percent: 8.92%


### Highest number of providers per block
Curious results here. There's a census block in Iowa with 29 providers?🤔 I expected that the most populous states would be represented here but I think it is necessary to account for the population size of each block individually.

In [60]:
df_state_and_block = df.groupby(['StateAbbr', 'BlockCode'])

In [61]:
df_providers = df_state_and_block['Provider_Id']
df_providers.nunique().sort_values(ascending=False)

StateAbbr  BlockCode      
IA         191530111141017    29
VA         511076110061011    24
IL         170313301004040    24
GA         131210119002049    24
CA         60372077101037     23
TX         481130100001102    23
DC         110010101001010    21
TX         481130136163033    20
IL         170438446022007    20
           170318391002001    20
           170438446012015    20
NY         360610096001005    19
           360610092001015    19
DC         110010107001019    19
NY         360610033003021    18
WA         530330072001058    18
IL         170317705002070    18
DC         110010101001015    18
           110010107001030    18
IL         170318391002021    18
CA         60750233001003     18
PA         421010004022016    18
NY         360610109001008    18
PA         421010376002033    18
NY         360610080002000    18
           360610109001001    18
           360610119001004    18
TX         481130137221015    18
IL         170318391001095    18
NY         36061

LogRecNo                int64
Provider_Id             int64
FRN                     int64
ProviderName           object
DBAName                object
HoldingCompanyName     object
HocoNum                 int64
HocoFinal              object
StateAbbr              object
BlockCode               int64
TechCode                int64
Consumer                int64
MaxAdDown             float64
MaxAdUp               float64
Business                int64
MaxCIRDown            float64
MaxCIRUp              float64
dtype: object

### Number of blockcodes per state
No surprises here, the data is more or less sorted by state / territory population. Though it should be noted that it isn't exact, eg. CA has a higher population than TX and FL should be 3rd. This is probably because of how census tracts are defined rather than anything wierd in the data

In [37]:
df_states = df.groupby('StateAbbr')['BlockCode'].count()

In [38]:
df_states.sort_values(ascending=False)

StateAbbr
TX    2457999
CA    1883962
IL    1497218
OH    1027982
PA     948335
FL     883904
IN     879092
MI     868378
NY     795356
MO     775587
WI     707543
KS     677036
MN     660231
WA     643924
NJ     612360
NC     600011
NE     584816
OK     525402
CO     488886
IA     487728
TN     475339
GA     470800
AL     455383
VA     437609
AZ     434195
OR     381340
LA     359752
PR     355781
KY     345970
SC     328419
AR     299062
WV     298764
ID     292829
MA     279229
MD     255308
UT     247661
MS     238762
MT     225679
NM     219008
CT     207384
ND     175258
SD     150056
NV     140477
WY     130200
ME     114922
NH      87381
VT      70192
HI      55027
RI      52364
DE      42155
AK      38898
VI      32053
DC      25581
GU       4308
MP       4092
AS       1677
Name: BlockCode, dtype: int64