# Testing calculating methods

In [2]:
!pip install haversine



In [3]:
import pandas as pd
import numpy as np
from haversine import haversine, Unit

We can test the method through calculating the distance between timesquare and whitehouse

In [5]:
timesquare = (40.75773, -73.985708) # (lat, lon)
bostoncommon = (42.355, -71.0656)
whitehouse = (38.897957, -77.036560)
ny_cityhall = (40.712772, -74.006058)
bos_cityhall = (42.3562, -71.0548)
dc_cityhall = (38.895278, -77.017778)
haversine(timesquare, whitehouse)

332.58751900742413

# Calculating C-Distance using zipcode

In [29]:
ny = pd.read_csv('ny_data_by_zipcode.csv')
bos = pd.read_csv('bos_data_by_zipcode.csv')
dc = pd.read_csv('dc_data_by_zipcode.csv')
centroid = pd.read_csv('gaz2016zcta5centroid.csv')

In [30]:
centroid = centroid.rename(columns={'intptlat':'lat', 'intptlong':'long' ,'zcta5':'zipcode'})

In [31]:
def get_distance(zipcode, center):
    row = centroid[centroid['zipcode'] == zipcode]
    coordinate = (row.iloc[0,0],row.iloc[0,1])
    distance = haversine(coordinate, center)
    return round(distance,3)

In [32]:
get_distance(10025, timesquare)

4.82

# NYC C-distance

In [34]:
ny['C-distance_ts'] = ny.apply(lambda x: get_distance(x['zipcode'], timesquare), axis=1)
ny['C-distance_ch'] = ny.apply(lambda x: get_distance(x['zipcode'], ny_cityhall), axis=1)
ny.to_csv('ny_data_by_zipcode_with_C-distance.csv')
ny.head()

Unnamed: 0.1,Unnamed: 0,zipcode,num_of_airbnb,num_of_beds,avg_price,total_reviews,avg_location_review_score,avg_review_score,num_of_bus,num_of_sub,...,"$25,000-$34,999","$35,000-$49,999","$50,000-$64,999","$65,000-$74,999","$75,000-$99,999","$100,000_or_more",median_household_income,mean_household_income,C-distance_ts,C-distance_ch
0,0,7093,1,3.0,288.0,77,9.0,9.0,,,...,13.1,8.7,13.9,18.2,10.4,42.5,47611,68956,4.012,8.388
1,1,10001,513,466.0,250.08577,11336,9.754144,8.950276,17.0,5.0,...,9.3,5.1,9.2,8.5,9.8,27.5,86801,158183,1.247,4.276
2,2,10002,1213,1377.0,185.379225,33405,9.690816,9.318367,33.0,6.0,...,14.0,8.8,10.0,14.2,7.1,31.3,33726,61946,4.665,1.706
3,3,10003,1032,1156.0,238.921512,22102,9.908616,9.35248,18.0,6.0,...,7.0,4.9,6.4,13.0,11.3,30.7,98151,161692,2.895,2.552
4,4,10004,68,62.0,201.426471,853,9.925,9.45,3.0,3.0,...,1.7,6.5,5.1,7.1,14.8,27.0,119691,177262,8.132,2.848


# Boston C-distance

In [35]:
bos['C-distance_bc'] = bos.apply(lambda x: get_distance(x['zipcode'], bostoncommon), axis=1)
bos['C-distance_ch'] = bos.apply(lambda x: get_distance(x['zipcode'], bos_cityhall), axis=1)
bos.to_csv('bos_data_by_zipcode_with_C-distance.csv')
bos.head()

Unnamed: 0.1,Unnamed: 0,zipcode,num_of_airbnb,num_of_beds,avg_price,total_reviews,avg_location_review_score,avg_review_score,num_of_bus,num_of_sub,...,"$25,000-$34,999","$35,000-$49,999","$50,000-$64,999","$65,000-$74,999","$75,000-$99,999","$100,000_or_more",median_household_income,mean_household_income,C-distance_bc,C-distance_ch
0,0,2026,1,1.0,50.0,1,4.0,8.0,31.0,,...,6.9,5.8,10.0,15.7,13.3,39.0,83438,109399,15.24,15.903
1,1,2108,111,154.0,256.279279,6217,9.90566,9.235849,24.0,6.0,...,3.6,4.5,8.4,6.8,12.9,28.1,103973,176335,0.312,0.848
2,2,2109,88,121.0,240.136364,4185,9.985075,9.179104,13.0,1.0,...,3.8,3.6,7.8,9.8,8.9,26.5,111313,166627,1.825,1.255
3,3,2110,39,37.0,249.0,1184,9.514286,9.028571,23.0,,...,7.0,6.2,7.7,12.1,8.9,28.7,128210,240767,1.652,0.859
4,4,2111,213,234.0,255.187793,5426,9.641618,8.936416,8.0,6.0,...,10.7,6.7,6.8,12.9,6.2,25.9,51480,121824,0.732,0.723


# Washington DC C-distance

In [37]:
dc['C-distance_wh'] = dc.apply(lambda x: get_distance(x['zipcode'], whitehouse), axis=1)
dc['C-distance_ch'] = dc.apply(lambda x: get_distance(x['zipcode'], dc_cityhall), axis=1)
dc.to_csv('dc_data_by_zipcode_with_C-distance.csv')
dc.head()

Unnamed: 0.1,Unnamed: 0,zipcode,num_of_airbnb,num_of_beds,avg_price,total_reviews,avg_location_review_score,avg_review_score,num_of_bus,num_of_sub,...,"$25,000-$34,999","$35,000-$49,999","$50,000-$64,999","$65,000-$74,999","$75,000-$99,999","$100,000_or_more",median_household_income,mean_household_income,C-distance_wh,C-distance_ch
0,0,2939,1,1.0,100.0,1,10.0,10.0,,,...,,,,,,,,,625.272,626.236
1,1,20001,1290,1947.0,210.800775,62223,9.695446,9.47723,177.0,4.0,...,7.5,5.5,7.1,10.1,9.3,26.5,85976.0,106670.0,2.134,1.676
2,2,20002,1593,2359.0,196.335844,73416,9.554883,9.596212,258.0,2.0,...,6.9,6.0,9.2,12.0,11.5,32.7,74303.0,98792.0,4.649,3.15
3,3,20003,742,1095.0,219.907008,35561,9.812601,9.623586,89.0,5.0,...,3.7,3.5,6.0,12.2,11.6,29.8,112083.0,140336.0,4.336,2.762
4,4,20004,18,24.0,482.833333,103,10.0,9.666667,40.0,2.0,...,0.6,2.2,5.9,9.2,7.8,22.9,135859.0,173373.0,0.766,0.941
