# Verizon Spotcheck
We re-collected Boston addresses and looked at differences in cost and speed.

In [1]:
import glob
from tqdm import tqdm
from multiprocess import Pool

import pandas as pd

from parsers import verizon_workflow

In [2]:
# inputs
fn_verizon = '../data/output/speed_price_verizon.csv.gz'
pattern_spotcheck = '../data/intermediary/isp_spotcheck/verizon/*/*.geojson.gz'

In [3]:
data_verizon = []
files = glob.glob(pattern_spotcheck)
with Pool(20) as pool:
    for record in tqdm(pool.imap_unordered(verizon_workflow, files), 
                       total=len(files)):
        data_verizon.extend(record)
verizon_spot = pd.DataFrame(data_verizon)
del data_verizon

  0%|          | 0/549 [00:00<?, ?it/s]

  5%|▍         | 27/549 [00:00<00:02, 257.79it/s]

 10%|▉         | 53/549 [00:00<00:02, 235.27it/s]

 14%|█▍        | 77/549 [00:00<00:02, 197.92it/s]

 18%|█▊        | 98/549 [00:00<00:02, 196.27it/s]

 21%|██▏       | 118/549 [00:00<00:02, 185.09it/s]

 25%|██▌       | 138/549 [00:00<00:02, 178.39it/s]

 30%|██▉       | 163/549 [00:00<00:01, 195.28it/s]

 33%|███▎      | 183/549 [00:00<00:02, 173.12it/s]

 37%|███▋      | 204/549 [00:01<00:01, 180.42it/s]

 42%|████▏     | 229/549 [00:01<00:01, 195.65it/s]

 45%|████▌     | 249/549 [00:01<00:01, 191.72it/s]

 49%|████▉     | 269/549 [00:01<00:01, 193.84it/s]

 53%|█████▎    | 293/549 [00:01<00:01, 201.82it/s]

 58%|█████▊    | 318/549 [00:01<00:01, 215.25it/s]

 62%|██████▏   | 340/549 [00:01<00:01, 204.79it/s]

 66%|██████▌   | 362/549 [00:01<00:00, 205.84it/s]

 70%|██████▉   | 383/549 [00:01<00:00, 179.74it/s]

 73%|███████▎  | 402/549 [00:02<00:00, 176.56it/s]

 77%|███████▋  | 421/549 [00:02<00:00, 174.69it/s]

 82%|████████▏ | 451/549 [00:02<00:00, 197.68it/s]

 86%|████████▌ | 472/549 [00:02<00:00, 192.54it/s]

 90%|████████▉ | 492/549 [00:02<00:00, 184.20it/s]

 94%|█████████▍| 517/549 [00:02<00:00, 194.36it/s]

 99%|█████████▉| 546/549 [00:02<00:00, 190.63it/s]

100%|██████████| 549/549 [00:02<00:00, 190.52it/s]




In [4]:
verizon = pd.read_csv(fn_verizon, compression='gzip')

In [5]:
verizon.head(2)

Unnamed: 0,address_full,incorporated_place,major_city,state,lat,lon,block_group,collection_datetime,in_service,provider,...,address_full_closest_fiber,lat_closest_fiber,lon_closest_fiber,race_perc_non_white,income_lmi,ppl_per_sq_mile,n_providers,income_dollars_below_median,internet_perc_broadband,median_household_income
0,"4879 POTOMAC AVE NW, WASHINGTON, DC, 20007-153...",Washington city,washington,DC,38.918017,-77.098932,110010008014,1650442663,True,Verizon,...,"4879 POTOMAC AVE NW, WASHINGTON, DC, 20007-153...",38.918017,-77.098932,0.264579,1.621585,1569.712349,3.0,-56466.0,1.0,147308
1,"4836 RESERVOIR RD NW, WASHINGTON, DC, 20007-15...",Washington city,washington,DC,38.915848,-77.09608,110010008014,1650442656,False,Verizon,...,"4840 RESERVOIR RD NW, WASHINGTON, DC, 20007-15...",38.915914,-77.096268,0.264579,1.621585,1569.712349,3.0,-56466.0,1.0,147308


In [6]:
cols = [
    'price_new', 'price_old', 'speed_down_new', 'speed_down_old'
]

In [7]:
verizon_spot = verizon_spot.merge(verizon[['lat', 'lon', 'price', 'speed_down', 'race_perc_non_white', 'income_dollars_below_median']],
                                  how='inner',
                                  on=['lat', 'lon'], suffixes=["_new", "_old"])

In [8]:
verizon_spot[verizon_spot['price_new'] != verizon_spot['price_old']][cols]

Unnamed: 0,price_new,price_old,speed_down_new,speed_down_old
0,,40.00,0.0,10.0
2,,,0.0,0.0
3,,,0.0,0.0
4,,,0.0,0.0
5,,,0.0,0.0
...,...,...,...,...
13437,39.99,49.99,300.0,300.0
13444,,,0.0,0.0
13445,,,0.0,0.0
13446,39.99,49.99,300.0,300.0


In [9]:
verizon_spot.price_new.replace({40: 39.99}, inplace=True)
verizon_spot.price_old.replace({40: 39.99}, inplace=True)

In [10]:
verizon_spot.price_new.describe(), verizon_spot.price_old.describe()

(count    9838.000000
 mean       40.507382
 std         2.514905
 min        39.990000
 25%        39.990000
 50%        39.990000
 75%        39.990000
 max        84.990000
 Name: price_new, dtype: float64,
 count    9823.000000
 mean       42.345187
 std         4.389388
 min        39.990000
 25%        39.990000
 50%        39.990000
 75%        39.990000
 max        64.990000
 Name: price_old, dtype: float64)

In [11]:
price_changed = verizon_spot[(verizon_spot['price_new'] != verizon_spot['price_old']) &
             (~verizon_spot.price_new.isnull()) &
             (~verizon_spot.price_old.isnull())]

In [12]:
price_changed[cols]

Unnamed: 0,price_new,price_old,speed_down_new,speed_down_old
16,49.99,39.99,300.0,300.0
25,39.99,49.99,300.0,300.0
27,39.99,49.99,300.0,300.0
33,49.99,39.99,300.0,300.0
35,39.99,49.99,300.0,300.0
...,...,...,...,...
13426,39.99,49.99,300.0,300.0
13431,49.99,39.99,300.0,300.0
13437,39.99,49.99,300.0,300.0
13446,39.99,49.99,300.0,300.0


In [13]:
len(price_changed[
    price_changed['price_new'] > price_changed['price_old']
]) / len(price_changed)

0.12774957698815567

In [14]:
len(price_changed[
    price_changed['price_new'] < price_changed['price_old']
]) / len(price_changed)

0.8722504230118443

In [15]:
# note none of the speeds change.
speed_changed = verizon_spot[
    (verizon_spot['speed_down_new'] != verizon_spot['speed_down_old']) &
    (~verizon_spot['price_new'].isnull()) &
    (~verizon_spot['price_old'].isnull())
]

In [16]:
len(verizon_spot[verizon_spot.speed_down_old <= 25])

4030

In [17]:
len(speed_changed)

340

In [18]:
speed_changed[speed_changed['speed_down_new'] == 300][cols]

Unnamed: 0,price_new,price_old,speed_down_new,speed_down_old
5034,39.99,39.99,300.0,10.0


In [19]:
speed_changed[speed_changed['speed_down_old'] == 300][cols]

Unnamed: 0,price_new,price_old,speed_down_new,speed_down_old
1028,39.99,39.99,50.0,300.0


In [20]:
speed_changed['speed_down_old'].value_counts()

10.0     329
7.1        9
300.0      1
15.0       1
Name: speed_down_old, dtype: int64

all but one instance of a household being offered different speeds was for Verizon's "High Speed Internet"

In [21]:
verizon_spot[verizon_spot['price_new'] > 39.99].income_dollars_below_median.describe()

count       441.000000
mean       3131.877551
std       46188.593798
min     -108990.000000
25%      -32720.000000
50%       13459.000000
75%       47704.000000
max       67113.000000
Name: income_dollars_below_median, dtype: float64

In [22]:
verizon_spot[verizon_spot['price_old'] > 39.99].income_dollars_below_median.describe()

count      2151.000000
mean      -7185.992562
std       46976.896763
min     -119952.000000
25%      -49483.000000
50%       -6410.000000
75%       35794.500000
max       67113.000000
Name: income_dollars_below_median, dtype: float64

In [23]:
verizon_spot[verizon_spot['price_new'] > 39.99].race_perc_non_white.describe()

count    456.000000
mean       0.514393
std        0.310580
min        0.000000
25%        0.237150
50%        0.521895
75%        0.831577
max        1.000000
Name: race_perc_non_white, dtype: float64

In [24]:
verizon_spot[verizon_spot['price_old'] > 39.99].race_perc_non_white.describe()

count    2264.000000
mean        0.467853
std         0.322709
min         0.000000
25%         0.175947
50%         0.447368
75%         0.762159
max         1.000000
Name: race_perc_non_white, dtype: float64

In [25]:
# median income for everything
verizon_spot.income_dollars_below_median.describe()

count     12534.000000
mean      -2769.074358
std       46325.610972
min     -138247.000000
25%      -39119.000000
50%        2965.000000
75%       39965.000000
max       67113.000000
Name: income_dollars_below_median, dtype: float64

In [26]:
verizon_spot.race_perc_non_white.describe()

count    13452.000000
mean         0.491652
std          0.313279
min          0.000000
25%          0.220965
50%          0.447368
75%          0.822259
max          1.000000
Name: race_perc_non_white, dtype: float64

The median income was lower in higher cost plans from the first sample, but not the second sample.

The racial demographics were less white for more expensive plans

In [27]:
verizon_spot[verizon_spot.price_old != '39.99'].price_new.value_counts(normalize=True)

39.99    0.953649
49.99    0.042895
64.99    0.003354
84.99    0.000102
Name: price_new, dtype: float64

In [28]:
verizon_spot[verizon_spot.price_new != '39.99'].price_old.value_counts(normalize=True)

39.99    0.769521
49.99    0.227120
64.99    0.003359
Name: price_old, dtype: float64

95 percent of offers that were over 39.99 were switched back to 39.99 when we checked again. Similarly, 77 percent of offers that are now over 39.99 were originally 39.99.