In [58]:
import pandas as pd
import numpy as np
import os
import warnings
import datetime as dt
from IPython.core.display import display, HTML
warnings.filterwarnings('ignore')
pd.options.display.max_rows = 200
display(HTML("<style>.container { width:100% !important; }</style>"))

In [7]:
# Reading in FCC wisp data
filepath = "../data/bb_data.csv"
wisp_df = pd.read_csv(filepath)

In [13]:
# Reading in population densities
filepath = "../data/us2016.csv"
pop_df = pd.read_csv(filepath)

In [21]:
# Pennsylvania population density
pa_pop_df = pop_df[pop_df['stateabbr'] == 'PA']

# Pennsylvania WISPs
pa_wisp_df = wisp_df[wisp_df['State']=='PA']

In [218]:
'''
Create Montgomery county subsets

PA state FIPS code 42
Montgomery county FIPS code 091
'''
pa_pop_df['block_fips']= pa_pop_df['block_fips'].apply(str)
pa_pop_df['mont_county'] = pa_pop_df['block_fips'].map(lambda x: True if x[:5] == '42091' else False)
mont_pop_df = pa_pop_df[pa_pop_df['mont_county']==True]

pa_wisp_df['Census Block FIPS Code'] = pa_wisp_df['Census Block FIPS Code'].apply(str)
pa_wisp_df['mont_county'] = pa_wisp_df['Census Block FIPS Code'].map(lambda x: True if x[:5] == '42091' else False)
mont_wisp_df = pa_wisp_df[pa_wisp_df['mont_county'] == True]

In [219]:
'''
Population information:

hu = housing units
hh = households
pop = population

'''
cols = ['stateabbr','block_fips','hu2016','hh2016','pop2016']
mont_pop_df = mont_pop_df[cols]

In [234]:
print(len(mont_wisp_df['Census Block FIPS Code'].unique()))
print(mont_pop_df.shape)
print(mont_wisp_df.columns.values)

13769
(13769, 5)
['Logical Record Number' 'Provider ID' 'FRN' 'Provider Name' 'DBA Name'
 'Holding Company Name' 'Holding Company Number' 'Holding Company Final'
 'State' 'Census Block FIPS Code' 'Technology Code' 'Consumer'
 'Max Advertised Downstream Speed (mbps)'
 'Max Advertised Upstream Speed (mbps)' 'Business'
 'Max CIR Downstream Speed (mbps)' 'Max CIR Upstream Speed (mbps)'
 'mont_county']


In [221]:
# Using the groubpy function to get metrics for each census block: count, types, mean up/down speeds
def get_metrics(raw_df):
    
    # A count of all the unique WISP's in a given census block,  if block isn't included in dataframe, then there are no WISPs in that block
    wisp_count_df = pd.DataFrame({'wisp_count':raw_df.groupby(['Census Block FIPS Code']).size()}).reset_index()
    wisp_count_df.set_index('Census Block FIPS Code', inplace=True)

    # Number of WISPs allowed to serve consumers in the given block
    consumer_df = pd.DataFrame({'consumer_num':raw_df.groupby(['Census Block FIPS Code','Consumer']).size()}).reset_index()
    consumer_df = consumer_df[consumer_df['Consumer']==1]
    consumer_df.set_index('Census Block FIPS Code', inplace=True)

    # Number of WISPs allowed to serve businesses in the given block
    business_df = pd.DataFrame({'business_num':raw_df.groupby(['Census Block FIPS Code', 'Business']).size()}).reset_index()
    business_df = business_df[business_df['Business']==1]
    business_df.set_index('Census Block FIPS Code', inplace = True)

    # Average up/downstream speeds in each census block
    adv_max_down_df = pd.DataFrame({'adv_max_down':raw_df.groupby(['Census Block FIPS Code'])['Max Advertised Downstream Speed (mbps)'].mean()}).reset_index()
    adv_max_down_df.set_index('Census Block FIPS Code', inplace = True)
    
    adv_max_up_df = pd.DataFrame({'adv_max_up':raw_df.groupby(['Census Block FIPS Code'])['Max Advertised Upstream Speed (mbps)'].mean()}).reset_index()
    adv_max_up_df.set_index('Census Block FIPS Code', inplace = True)
    
    cir_max_up_df = pd.DataFrame({'cir_max_up':raw_df.groupby(['Census Block FIPS Code'])['Max CIR Upstream Speed (mbps)'].mean()}).reset_index()
    cir_max_up_df.set_index('Census Block FIPS Code', inplace = True)
    
    cir_max_down_df = pd.DataFrame({'cir_max_down':raw_df.groupby(['Census Block FIPS Code'])['Max CIR Downstream Speed (mbps)'].mean()}).reset_index()
    cir_max_down_df.set_index('Census Block FIPS Code', inplace = True)
    
    #TODO: Merge the 7 df's
    df_list = [wisp_count_df,consumer_df,business_df,adv_max_down_df,adv_max_up_df,cir_max_up_df,cir_max_down_df]
    print('Merging dataframes...')
    for i in df_list:
        if wisp_count_df.equals(i):
            final_df = wisp_count_df
        else:
            final_df = pd.merge(final_df,i,how='outer',left_index=True, right_index=True)
    print('Done merging dataframes!')
    final_df.drop(['Consumer', 'Business'], axis = 1, inplace = True)
    return final_df 

In [226]:
mont_metrics_df = get_metrics(mont_wisp_df)

Merging dataframes...
Done merging dataframes!


In [235]:
# Merging population data and WISP metrics
def merge_pop_wisp(pop_df,wisp_df):
    if (pop_df.index.name != 'block_fips'):
        pop_df.set_index('block_fips',inplace = True)
    final_df = pd.merge(pop_df, wisp_df,how = 'left', left_index = True, right_index = True)
    return final_df 

In [268]:
final_df = merge_pop_wisp(mont_pop_df, mont_metrics_df)
final_df['density'] = final_df['wisp_count']/(final_df['pop2016'] + 1)
final_df.reset_index(inplace = True)
final_df.head()

Unnamed: 0,block_fips,stateabbr,hu2016,hh2016,pop2016,wisp_count,consumer_num,business_num,adv_max_down,adv_max_up,cir_max_up,cir_max_down,density
0,420912001031000,PA,6,6.0,18,6,5,6,197.0,149.383333,0.55,2.833333,0.315789
1,420912001031001,PA,0,0.0,0,4,3,4,10.5,1.575,0.825,4.25,4.0
2,420912001031002,PA,3,3.0,12,5,4,5,48.4,3.26,0.66,3.4,0.384615
3,420912001031003,PA,1,0.9448,1,8,5,8,123.125,110.8835,0.6,2.3125,4.0
4,420912001031004,PA,0,0.0,1,9,4,9,5.0,0.785333,1.033333,2.555556,4.5


In [267]:
# Write final_df to csv
final_df.to_csv('../data/montgomery_wisp_metrics.txt')

## Baseline statistics

What are the important measures we want to see?