In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import seaborn as sns
import requests
import json
import csv

%matplotlib inline
pd.set_option('display.max_colwidth', -1)

In [2]:
def clean_homeaway_row(row):
    row_price_clean = re.sub(r'[\$,]', '', row.price)
    row.price = int(row_price_clean)
    row.rating = np.nan if np.isnan(row.rating) else float(row.rating)
    row.number_reviews = 0 if np.isnan(row.number_reviews) else int(row.number_reviews)
    row.bathrooms = int(row.bathrooms)
    row.bedrooms = 0 if (row.bedrooms == 'Studio') else int(row.bedrooms)
    row.geo_lat = float(row.geo_lat)
    row.geo_long = float(row.geo_long)
    
    # Clean minimum stay string
    if isinstance(row.min_stay, str):
        nights = row.min_stay.replace('nights', '').strip()
        nights = [int(n) for n in nights.split('-')]
        min_stay = np.floor(np.average(nights))
    elif(row.min_stay):
        min_stay = 1
    row.min_stay = min_stay
    return row
    
def clean_homeaway_data(df):
    df = df.drop_duplicates(subset=['url'])
    df = df[df.price.notnull()]
    df = df.apply(clean_homeaway_row, axis=1)
    df = df.drop_duplicates(subset=['title', 'price', 'sleeps'])
    return df
    
data_file = 'data/homeaway_rentals_nyc_ALL.txt'
headers = ['url', 'price', 'title', 'geo_lat', 'geo_long', 'rating',
           'number_reviews', 'sleeps', 'bedrooms', 'bathrooms',
           'min_stay']

df = pd.read_csv(data_file, sep='\t', header=None, names=headers)

df = clean_homeaway_data(df)
df.head()

Unnamed: 0,url,price,title,geo_lat,geo_long,rating,number_reviews,sleeps,bedrooms,bathrooms,min_stay
0,https://www.homeaway.com/vacation-rental/p353201,100,Modern Studio Haven in Quiet Cul-de-sac... - HomeAway Bedford-Stuyvesant,40.680474,-73.935103,4.6,38,3,0,1,4.0
1,https://www.homeaway.com/vacation-rental/p240675,271,Charming 3 Bedroom/2Bath Duplex Apartment... - HomeAway Bedford-Stuyvesant,40.680691,-73.935513,4.7,59,9,3,2,4.0
2,https://www.homeaway.com/vacation-rental/p3926833,904,"5 Bedroom 3 Bathroom Residence - 2,500... - HomeAway Financial District",40.711948,-74.00674,4.9,50,12,5,3,4.0
3,https://www.homeaway.com/vacation-rental/p3478776,590,LUXURY TWO BED/TW0 BATH CHELSEA HIGH... - HomeAway Chelsea,40.745583,-73.991396,4.9,73,5,2,2,5.0
4,https://www.homeaway.com/vacation-rental/p4227371,142,Sydni's Patch Near Manhattan - HomeAway Bedford-Stuyvesant,40.684658,-73.955302,4.6,23,6,2,1,5.0


In [3]:
df.describe()

Unnamed: 0,price,geo_lat,geo_long,rating,number_reviews,sleeps,bedrooms,bathrooms,min_stay
count,4000.0,4000.0,4000.0,1630.0,4000.0,4000.0,4000.0,4000.0,4000.0
mean,36928.13,40.739385,-73.97273,4.563742,6.6005,5.078,1.8325,1.3025,9.1675
std,2313178.0,0.050599,0.060157,0.611546,16.193192,2.826467,1.154676,0.75242,12.31399
min,0.0,40.461098,-74.357016,1.0,0.0,0.0,0.0,0.0,1.0
25%,161.0,40.714727,-73.99444,4.4,0.0,3.0,1.0,1.0,2.0
50%,250.0,40.743607,-73.979966,4.8,0.0,4.0,2.0,1.0,3.0
75%,400.0,40.766266,-73.951813,5.0,4.0,6.0,2.0,1.0,7.0
max,146298500.0,40.974511,-73.673384,5.0,175.0,38.0,15.0,23.0,180.0


In [4]:
# Find number of outliers
print('# Rentals with nightly price >= $3000:', (df.loc[df.price >= 3000].shape[0]))
print('# Rentals with minimum # night stay >= 100:', (df.loc[df.min_stay >= 100].shape[0]))
print('# Rentals with # bedrooms >= 10:', (df.loc[df.bedrooms >= 10].shape[0]))
print('# Rentals with # bathrooms >= 10:', (df.loc[df.bathrooms >= 10].shape[0]))
print('# Rentals with sleeps >= 10:', (df.loc[df.bathrooms >= 10].shape[0]))
df.loc[df.bathrooms >= 10].url

# Rentals with nightly price >= $3000: 13
# Rentals with minimum # night stay >= 100: 2
# Rentals with # bedrooms >= 10: 2
# Rentals with # bathrooms >= 10: 1
# Rentals with sleeps >= 10: 1


748    https://www.homeaway.com/vacation-rental/p787841vb
Name: url, dtype: object

In [5]:
# Remove these outliers
df = df[df.price < 3000]
df = df[df.bedrooms < 10]
df = df[df.min_stay < 100]
df = df[df.bathrooms < 10]
df = df[df.sleeps < 10]

In [10]:
# Add new features using latitude, longitude and Google's Places API

data_file_TS_distances = 'data/homeaway_urls_times_square.csv'
data_file_subway_counts = 'data/homeaway_urls_subway.csv'

def nearby_subway_count(lat, long):
    """
    Count # subway stations within a 1 mile radius
    https://developers.google.com/places/web-service/search
    """
    api_key = 'AIzaSyDp5YP40O02jkGRN2hQ3uVwnPckSDSxjTU' # nana - last used 07/15 5pm
    # api_key = 'AIzaSyDgm8KI2voT4pNCA-AakvqzwRr_oE_PDXI' # li - last used 07/15 9pm
    api_url = ('https://maps.googleapis.com/maps/api/place/nearbysearch/json?' 
    + 'location=' + repr(lat) + ',' + repr(long)
    + '&radius=800'
    + '&type=subway_station'
    + '&key=' + api_key)
    response = requests.get(api_url).text
    resp_obj = json.loads(response)
    subway_stations = resp_obj['results']
    return len(subway_stations)


def write_subway_counts():
    """
    One time collection of data from Google API for subway counts
    """
    with open(data_file_subway_counts, 'a+') as homeaway_file:
        writer = csv.writer(homeaway_file, delimiter='\t')
        for i, row in enumerate(df.as_matrix()[2810:3810]):
            url = row[0]
            lat = row[3]
            long = row[4]
            subway_count = nearby_subway_count(lat, long)
            data = [i, url, lat, long, subway_count]
            print(data)
            writer.writerow(data)
            homeaway_file.flush()
    homeaway_file.close()
    
    
def distance_to_times_square(lat, long):
    """
    Calculate distance (in meter) to drive to Times Square
    using public transportation with Google API
    https://developers.google.com/maps/documentation/distance-matrix/intro
    """
    times_square_lat = '40.759171'
    times_square_long = '-73.985517'
    # api_key = 'AIzaSyBO_xmtgW62tEXaDNaofO9LZ0GOVGLtmDw' # nana - last used 07/15 5pm
    api_key = 'AIzaSyB4ZjNy5zx8Ed32i3I7sGVBgKeR3s20dD4' # li - last used 07/14 9:30pm
    api_url = ('https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial'
               + '&origins=' + repr(lat) + ',' + repr(long)
               + '&destinations=' + times_square_lat + ',' + times_square_long
               + '&key=' + api_key)
    response = requests.get(api_url).text
    resp_obj = json.loads(response)
    distance = resp_obj['rows'][0]['elements'][0]['distance']['value']
    return distance
  
    
def write_driving_distance():
    """
    One time collection of data from Google API for driving distance to Times Square
    """
    with open(data_file_TS_distances, 'a+') as homeaway_file:
        writer = csv.writer(homeaway_file, delimiter='\t')
        for i, row in enumerate(df.as_matrix()):
            url = row[0]
            lat = row[3]
            long = row[4]
            distance = distance_to_times_square(lat, long)
            data = [i, url, lat, long, distance]
            print(data)
            writer.writerow(data)
            homeaway_file.flush()
    homeaway_file.close()

# Store data collected from Google about each rental in a separate file because of API quotas
# Run these functions once

# write_subway_counts()
# write_driving_distance() #DONE

[0, 'https://www.homeaway.com/vacation-rental/p4624536', 40.762887, -73.98692199999999, 10]
[1, 'https://www.homeaway.com/vacation-rental/p4292617', 40.7779763, -73.9508573, 3]
[2, 'https://www.homeaway.com/vacation-rental/p4543442', 40.77363356, -73.95628673, 4]
[3, 'https://www.homeaway.com/vacation-rental/p4137965', 40.731435600000005, -73.9874733, 5]
[4, 'https://www.homeaway.com/vacation-rental/p4362396', 40.78299759, -73.94926963, 4]
[5, 'https://www.homeaway.com/vacation-rental/p4624602', 40.708735, -74.01358499999999, 17]
[6, 'https://www.homeaway.com/vacation-rental/p4251417', 40.774024600000004, -73.95456300000001, 4]
[7, 'https://www.homeaway.com/vacation-rental/p4422937', 40.7570792, -73.96983879999999, 5]
[8, 'https://www.homeaway.com/vacation-rental/p4606645', 40.73063586, -73.98666567, 5]
[9, 'https://www.homeaway.com/vacation-rental/p4210923', 40.753814899999995, -73.96742579999999, 2]
[10, 'https://www.homeaway.com/vacation-rental/p4543445', 40.76790876, -73.99459306, 

[92, 'https://www.homeaway.com/vacation-rental/p4207455', 40.76582016, -73.98759643, 7]
[93, 'https://www.homeaway.com/vacation-rental/p4422971', 40.7419747, -73.9808689, 5]
[94, 'https://www.homeaway.com/vacation-rental/p4422972', 40.771107799999996, -73.9617145, 4]
[95, 'https://www.homeaway.com/vacation-rental/p4630093', 40.760411, -73.98745749999999, 11]
[96, 'https://www.homeaway.com/vacation-rental/p6835419', 40.757715000000005, -73.9694197, 5]
[97, 'https://www.homeaway.com/vacation-rental/p4434533', 40.692731, -73.98598299999999, 10]
[98, 'https://www.homeaway.com/vacation-rental/p4439031', 40.7064807, -74.0076486, 17]
[99, 'https://www.homeaway.com/vacation-rental/p4616403', 40.7520075, -73.99879820000001, 6]
[100, 'https://www.homeaway.com/vacation-rental/p6835399', 40.7742581, -73.95267940000001, 4]
[101, 'https://www.homeaway.com/vacation-rental/p4543606', 40.74702533, -73.98780558, 12]
[102, 'https://www.homeaway.com/vacation-rental/p4422949', 40.7555055, -73.968065, 2]
[1

[183, 'https://www.homeaway.com/vacation-rental/p4624590', 40.763451, -73.960036, 6]
[184, 'https://www.homeaway.com/vacation-rental/p4422931', 40.7570792, -73.96983879999999, 5]
[185, 'https://www.homeaway.com/vacation-rental/p4266407', 40.775963399999995, -73.9533614, 3]
[186, 'https://www.homeaway.com/vacation-rental/p4305822', 40.7045906, -74.0089036, 13]
[187, 'https://www.homeaway.com/vacation-rental/p4489486', 40.742005999999996, -73.994889, 11]
[188, 'https://www.homeaway.com/vacation-rental/p4558671', 40.634878, -73.96204200000001, 3]
[189, 'https://www.homeaway.com/vacation-rental/p4317749', 40.7569704, -73.9682768, 5]
[190, 'https://www.homeaway.com/vacation-rental/p4587339', 40.755128899999995, -73.9652429, 2]
[191, 'https://www.homeaway.com/vacation-rental/p4422969', 40.728421399999995, -73.99969250000001, 10]
[192, 'https://www.homeaway.com/vacation-rental/p4624670', 40.756057, -73.993948, 9]
[193, 'https://www.homeaway.com/vacation-rental/p4422939', 40.7628658, -73.98941

[274, 'https://www.homeaway.com/vacation-rental/p8243420', 40.7824979, -73.946405, 3]
[275, 'https://www.homeaway.com/vacation-rental/p4422958', 40.760411, -73.98745749999999, 11]
[276, 'https://www.homeaway.com/vacation-rental/p4351185', 40.759791, -73.987927, 11]
[277, 'https://www.homeaway.com/vacation-rental/p4438962', 40.688257, -73.985584, 11]
[278, 'https://www.homeaway.com/vacation-rental/p8122282', 40.729022, -73.981526, 3]
[279, 'https://www.homeaway.com/vacation-rental/p6923779', 40.7628658, -73.98941090000001, 6]
[280, 'https://www.homeaway.com/vacation-rental/p8079464', 40.7586532, -73.9687051, 6]
[281, 'https://www.homeaway.com/vacation-rental/p8079705', 40.7824979, -73.946405, 3]
[282, 'https://www.homeaway.com/vacation-rental/p8242548', 40.7641329, -73.9884998, 8]
[283, 'https://www.homeaway.com/vacation-rental/p8242503', 40.74257720000001, -73.98040959999999, 5]
[284, 'https://www.homeaway.com/vacation-rental/p4483788', 40.76341729999999, -73.9628523, 5]
[285, 'https:/

[364, 'https://www.homeaway.com/vacation-rental/p6923824', 40.7570792, -73.96983879999999, 5]
[365, 'https://www.homeaway.com/vacation-rental/p4348044', 40.7565614, -73.9691535, 5]
[366, 'https://www.homeaway.com/vacation-rental/p4439287', 40.7127837, -74.00594129999999, 18]
[367, 'https://www.homeaway.com/vacation-rental/p4458158', 40.7657968, -73.9875562, 7]
[368, 'https://www.homeaway.com/vacation-rental/p8244929', 40.760411, -73.98745749999999, 11]
[369, 'https://www.homeaway.com/vacation-rental/p6841572', 40.7979091, -73.9322662, 1]
[370, 'https://www.homeaway.com/vacation-rental/p4324630', 40.7453037, -73.98162020000001, 6]
[371, 'https://www.homeaway.com/vacation-rental/p3895262', 40.746913, -73.99201500000001, 11]
[372, 'https://www.homeaway.com/vacation-rental/p3952122', 40.753558399999996, -73.971712, 3]
[373, 'https://www.homeaway.com/vacation-rental/p4271186', 40.74360720000001, -73.97248499999999, 0]
[374, 'https://www.homeaway.com/vacation-rental/p4207436', 40.71873579, -

[455, 'https://www.homeaway.com/vacation-rental/p4435515', 40.691037200000004, -73.98260440000001, 10]
[456, 'https://www.homeaway.com/vacation-rental/p4440092', 40.688257, -73.985584, 11]
[457, 'https://www.homeaway.com/vacation-rental/p4432153', 40.766038200000004, -73.9918274, 1]
[458, 'https://www.homeaway.com/vacation-rental/p4267678', 40.779808200000005, -73.95033719999999, 4]
[459, 'https://www.homeaway.com/vacation-rental/p2619854', 40.70643, -74.00766, 17]
[460, 'https://www.homeaway.com/vacation-rental/p4441302', 40.7657968, -73.9875562, 7]
[461, 'https://www.homeaway.com/vacation-rental/p4315496', 40.7779763, -73.9508573, 3]
[462, 'https://www.homeaway.com/vacation-rental/p2619855', 40.70643, -74.00766, 17]
[463, 'https://www.homeaway.com/vacation-rental/p4329854', 40.7509877, -73.9703579, 3]
[464, 'https://www.homeaway.com/vacation-rental/p4281521', 40.74360720000001, -73.97248499999999, 0]
[465, 'https://www.homeaway.com/vacation-rental/p4269116', 40.756356, -73.9944858, 9

[546, 'https://www.homeaway.com/vacation-rental/p4216111', 40.7279838, -73.9830634, 3]
[547, 'https://www.homeaway.com/vacation-rental/p4249714', 40.8198857, -73.9529809, 4]
[548, 'https://www.homeaway.com/vacation-rental/p4460658', 40.754090999999995, -73.992079, 12]
[549, 'https://www.homeaway.com/vacation-rental/p3931827', 40.776516, -73.9080169, 1]
[550, 'https://www.homeaway.com/vacation-rental/p2583292', 40.75078, -73.99746999999999, 8]
[551, 'https://www.homeaway.com/vacation-rental/p4479278', 40.7573593, -73.96495340000001, 4]
[552, 'https://www.homeaway.com/vacation-rental/p4324621', 40.756501, -73.994438, 9]
[553, 'https://www.homeaway.com/vacation-rental/p4435508', 40.688257, -73.985584, 11]
[554, 'https://www.homeaway.com/vacation-rental/p4207230', 40.73294992, -74.00725901, 4]
[555, 'https://www.homeaway.com/vacation-rental/p2605213', 40.73774, -73.98311, 6]
[556, 'https://www.homeaway.com/vacation-rental/p4276156', 40.74360720000001, -73.97248499999999, 0]
[557, 'https://

[637, 'https://www.homeaway.com/vacation-rental/p8244443', 40.74010915, -73.97916093, 3]
[638, 'https://www.homeaway.com/vacation-rental/p8244426', 40.72351067, -73.99514732, 11]
[639, 'https://www.homeaway.com/vacation-rental/p8244429', 40.78190042, -73.95463141, 4]
[640, 'https://www.homeaway.com/vacation-rental/p8244431', 40.75975129, -73.96213417, 5]
[641, 'https://www.homeaway.com/vacation-rental/p8298119', 40.78296815, -73.97636877, 6]
[642, 'https://www.homeaway.com/vacation-rental/p8298201', 40.74391272, -73.97691519, 2]
[643, 'https://www.homeaway.com/vacation-rental/p8244435', 40.74260634, -73.97992965, 5]
[644, 'https://www.homeaway.com/vacation-rental/p8298133', 40.75634551, -73.99632455, 7]
[645, 'https://www.homeaway.com/vacation-rental/p8298160', 40.76369185, -73.98272943, 11]
[646, 'https://www.homeaway.com/vacation-rental/p8298095', 40.76413413, -73.96808587, 8]
[647, 'https://www.homeaway.com/vacation-rental/p8244472', 40.76279832, -73.98368189, 12]
[648, 'https://www

[729, 'https://www.homeaway.com/vacation-rental/p3850585', 40.68492379999999, -73.9662225, 5]
[730, 'https://www.homeaway.com/vacation-rental/p3870123', 40.84557, -73.81432868, 0]
[731, 'https://www.homeaway.com/vacation-rental/p3811287', 40.685093, -73.913792, 4]
[732, 'https://www.homeaway.com/vacation-rental/p3894360', 40.626982, -73.925475, 0]
[733, 'https://www.homeaway.com/vacation-rental/p710626vb', 40.76783471, -73.89541911, 0]
[734, 'https://www.homeaway.com/vacation-rental/p3960265', 40.7019198, -73.9085484, 5]
[735, 'https://www.homeaway.com/vacation-rental/p1851486', 40.823277000000004, -73.949814, 4]
[736, 'https://www.homeaway.com/vacation-rental/p745416vb', 40.7773054, -73.9221573, 0]
[737, 'https://www.homeaway.com/vacation-rental/p747825vb', 40.74940201, -73.99650775, 9]
[738, 'https://www.homeaway.com/vacation-rental/p750993vb', 40.693622999999995, -73.823566, 0]
[739, 'https://www.homeaway.com/vacation-rental/p3999130', 40.7720616, -73.9564595, 4]
[740, 'https://www.

[821, 'https://www.homeaway.com/vacation-rental/p3709255', 40.785455, -74.025077, 0]
[822, 'https://www.homeaway.com/vacation-rental/p278905', 40.7315523, -74.0709363, 0]
[823, 'https://www.homeaway.com/vacation-rental/p3945724', 40.757338, -74.04006690000001, 0]
[824, 'https://www.homeaway.com/vacation-rental/p3709273', 40.772206, -74.03269300000001, 0]
[825, 'https://www.homeaway.com/vacation-rental/p442424vb', 40.787698999999996, -74.004094, 0]
[826, 'https://www.homeaway.com/vacation-rental/p561856vb', 40.748295299999995, -74.0283108, 0]
[827, 'https://www.homeaway.com/vacation-rental/p3677417', 40.772206, -74.03269300000001, 0]
[828, 'https://www.homeaway.com/vacation-rental/p3787974', 40.733453000000004, -74.061736, 0]
[829, 'https://www.homeaway.com/vacation-rental/p3992966', 40.5846649, -73.6960603, 0]
[830, 'https://www.homeaway.com/vacation-rental/p3871484', 40.762329, -74.029462, 0]
[831, 'https://www.homeaway.com/vacation-rental/p3549732', 40.7184403, -74.0466912, 0]
[832, 

[914, 'https://www.homeaway.com/vacation-rental/p3952959', 40.742297, -74.219239, 0]
[915, 'https://www.homeaway.com/vacation-rental/p892350vb', 40.7594275, -74.1700869, 0]
[916, 'https://www.homeaway.com/vacation-rental/p1103129vb', 40.76730072, -74.02657886, 0]
[917, 'https://www.homeaway.com/vacation-rental/p902846vb', 40.651162, -74.349691, 0]
[918, 'https://www.homeaway.com/vacation-rental/p1005142vb', 40.7674564, -74.03357740000001, 0]
[919, 'https://www.homeaway.com/vacation-rental/p4613144', 40.80918732, -73.73981528, 0]
[920, 'https://www.homeaway.com/vacation-rental/p632646vb', 40.704453, -74.09651099999999, 0]
[921, 'https://www.homeaway.com/vacation-rental/p3956051', 40.935842, -73.900338, 0]
[922, 'https://www.homeaway.com/vacation-rental/p744755vb', 40.74461516, -74.03732665, 0]
[923, 'https://www.homeaway.com/vacation-rental/p4432900', 40.766303, -74.024458, 0]
[924, 'https://www.homeaway.com/vacation-rental/p4585338', 40.73275170000001, -74.2253346, 0]
[925, 'https://ww

In [7]:
# Merge Homeaway rental data with location data collected from Google Places API 

def url_to_TS_distance():
    url_to_distance = {}
    with open(data_file_TS_distances, 'r') as f:
        for TS_line in csv.reader(f, delimiter='\t'):
            url_to_distance[TS_line[1]] = TS_line[4]
    return url_to_distance


def add_geo_data(row, url_to_distance):
    url = row.url
    distance = url_to_distance[url]
    row.distance_to_TS = int(distance)
    return row


def merge_data(df):
    #df['subway_counts'] = None
    df['distance_to_TS'] = None
    df = df.apply(lambda row: add_geo_data(row, url_to_TS_distance()), axis=1)
    return df
    
df = merge_data(df)
df.head()

Unnamed: 0,url,price,title,geo_lat,geo_long,rating,number_reviews,sleeps,bedrooms,bathrooms,min_stay,distance_to_TS
0,https://www.homeaway.com/vacation-rental/p353201,100,Modern Studio Haven in Quiet Cul-de-sac... - HomeAway Bedford-Stuyvesant,40.680474,-73.935103,4.6,38,3,0,1,4.0,17124
1,https://www.homeaway.com/vacation-rental/p240675,271,Charming 3 Bedroom/2Bath Duplex Apartment... - HomeAway Bedford-Stuyvesant,40.680691,-73.935513,4.7,59,9,3,2,4.0,17152
3,https://www.homeaway.com/vacation-rental/p3478776,590,LUXURY TWO BED/TW0 BATH CHELSEA HIGH... - HomeAway Chelsea,40.745583,-73.991396,4.9,73,5,2,2,5.0,2605
4,https://www.homeaway.com/vacation-rental/p4227371,142,Sydni's Patch Near Manhattan - HomeAway Bedford-Stuyvesant,40.684658,-73.955302,4.6,23,6,2,1,5.0,15102
5,https://www.homeaway.com/vacation-rental/p324686,120,Charming 1 Bedroom Apartment in Quiet... - HomeAway Bedford-Stuyvesant,40.680474,-73.935103,4.6,27,4,1,1,4.0,17124


In [8]:
# Final stats after cleaning and feature engineering:
df.describe()

Unnamed: 0,price,geo_lat,geo_long,rating,number_reviews,sleeps,bedrooms,bathrooms,min_stay,distance_to_TS
count,3717.0,3717.0,3717.0,1475.0,3717.0,3717.0,3717.0,3717.0,3717.0,3717.0
mean,318.503632,40.74004,-73.972358,4.564339,6.46516,4.546946,1.670702,1.224913,9.472962,8777.994888
std,271.132441,0.049464,0.057537,0.614956,16.149953,1.828422,0.939067,0.547028,12.076079,8171.555161
min,0.0,40.551015,-74.349691,1.0,0.0,0.0,0.0,0.0,1.0,51.0
25%,157.0,40.716084,-73.993742,4.4,0.0,3.0,1.0,1.0,2.0,2845.0
50%,243.0,40.743869,-73.979954,4.8,0.0,4.0,2.0,1.0,3.0,5881.0
75%,386.0,40.766514,-73.952469,5.0,4.0,6.0,2.0,1.0,9.0,12669.0
max,2688.0,40.974511,-73.679948,5.0,175.0,9.0,6.0,6.0,90.0,47405.0


In [11]:
df = df[df.distance_to_TS < 60000]

In [12]:
# Write to file to be used for modeling
filename = 'data/homeaway_rentals_nyc_PARSED.csv' 
print('Writing to ', filename)
# df.to_csv(filename, sep='\t')

Writing to  data/homeaway_rentals_nyc_PARSED.csv
