In [1]:
import numpy as np
import pandas as pd

In [55]:
# Raw Data Fetched from Here
# https://www.fcc.gov/general/broadband-deployment-data-fcc-form-477

In [36]:
county_codes = ['36005', '36047', '36061', '36081', '36085']

def county_to_boro(x):
    if x == '36005':
        return 'bronx'
    if x == '36047':
        return 'brooklyn'
    if x == '36061':
        return 'manhattan'
    if x == '36081':
        return 'queens'
    if x == '36085':
        return 'staten island'
    return 'UNKNOWN'

# 2-digit code indicating the Technology of Transmission used to offer broadband service. 
def tech_code_translator(x):
    # ADSL, Cable, Fiber, Fixed Wireless, Satellite, Other
    if x in ['10', '11', '12', '20', '30']:
        return 'ADSL'
    if x in ['40', '41', '42', '43']:
        return 'Cable'
    if x == '50':
        return 'Fiber'
    if x == '60':
        return 'Satellite'
    if x == '70':
        return 'Fixed Wireless'
    return 'Other'

In [11]:
converters = {'BlockCode'
              'tract': lambda x: str(x), 
              'block': lambda x: str(x),
              'Technology Code': lambda x: tech_code_translator(x)
             }

data2019 = pd.read_csv("NY-Fixed-Jun2019-v1.csv", converters = converters, encoding = "ISO-8859-1")
data2019.head()

Unnamed: 0,Logical Record Number,Provider ID,FRN,Provider Name,DBA Name,Holding Company Name,Holding Company Number,Holding Company Final,State,Census Block FIPS Code,...,Consumer,Max Advertised Downstream Speed (mbps),Max Advertised Upstream Speed (mbps),Business,Max CIR Downstream Speed (mbps),Max CIR Upstream Speed (mbps),county_code,tract,block,boro
0,19376907,50820,4963088,"ViaSat, Inc.",Viasat Inc,"ViaSat, Inc.",290111,"ViaSat, Inc.",NY,360050001000001,...,1,35.0,3.0,1,0.0,0.0,36005,100,1,bronx
1,19376908,50820,4963088,"ViaSat, Inc.",Viasat Inc,"ViaSat, Inc.",290111,"ViaSat, Inc.",NY,360050001000002,...,1,35.0,3.0,1,0.0,0.0,36005,100,2,bronx
2,19376909,50820,4963088,"ViaSat, Inc.",Viasat Inc,"ViaSat, Inc.",290111,"ViaSat, Inc.",NY,360050001001000,...,1,35.0,3.0,1,0.0,0.0,36005,100,1000,bronx
3,59544342,52979,1568880,GCI Communication Corp.,GCI Communication Corp.,GCI Holdings LLC,130534,GCI Holdings LLC,NY,360050001001000,...,0,0.0,0.0,1,0.0,0.0,36005,100,1000,bronx
4,59881129,53153,12369286,"HNS License Sub, LLC",HughesNet,"Hughes Network Systems, LLC",130627,"Hughes Network Systems, LLC",NY,360050001001000,...,1,25.0,3.0,1,0.0,0.0,36005,100,1000,bronx


In [33]:
data2019[['county_code','boro']].drop_duplicates()

Unnamed: 0,county_code,boro
0,36005,bronx
40944,36047,brooklyn
121125,36061,manhattan
169424,36081,queens
278968,36085,staten island


In [51]:
def process_raw_file(file, newfile):
    converters = {'BlockCode': lambda x: str(x),
                  'TechCode': lambda x: tech_code_translator(x)
                 }

    data_file = pd.read_csv(file, converters = converters, encoding = "ISO-8859-1")
    data_file = data_file[data_file['BlockCode'].str.startswith(tuple(county_codes))]
    data_file['county_code'] = data_file['BlockCode'].str[:5]
    data_file['tract'] = data_file['BlockCode'].str[5:11]
    data_file['block'] = data_file['BlockCode'].str[11:]
    data_file['boro'] = data_file['county_code'].apply(county_to_boro)

    data_file = data_file.rename(columns={
        'LogRecNo':'Logical Record Number', 
        'Provider_Id': 'Provider ID', 
        'ProviderName': 'Provider Name', 
        'DBAName': 'DBA Name',
        'HoldingCompanyName': 'Holding Company Name', 
        'HocoNum': 'Holding Company Number', 
        'HocoFinal': 'Holding Company Final', 
        'StateAbbr': 'State', 
        'BlockCode': 'Census Block FIPS Code',
        'TechCode': 'Technology Code', 
        'MaxAdDown': 'Max Advertised Downstream Speed (mbps)', 
        'MaxAdUp': 'Max Advertised Upstream Speed (mbps)', 
        'MaxCIRDown': 'Max CIR Downstream Speed (mbps)', 
        'MaxCIRUp': 'Max CIR Upstream Speed (mbps)'
    })

    data_file.to_csv(newfile,index=False)

In [52]:
process_raw_file("NY-Fixed-Jun2018-v1_raw.csv", "NY-Fixed-Jun2018-v1.csv")

In [54]:
process_raw_file("NY-Fixed-Jun2017-v3_raw.csv", "NY-Fixed-Jun2017-v3.csv")