In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import seaborn as sns
#import dateutil.parser
import datetime
import requests
import json
import csv

%matplotlib inline
#pd.set_option('display.max_colwidth', -1)

In [2]:
def clean_homeaway_row(row):
    row_price_clean = re.sub(r'[\$,]', '', row.price)
    row.price = int(row_price_clean)
    row.rating = np.nan if np.isnan(row.rating) else float(row.rating)
    row.number_reviews = 0 if np.isnan(row.number_reviews) else int(row.number_reviews)
    row.bathrooms = int(row.bathrooms)
    row.bedrooms = 0 if (row.bedrooms == 'Studio') else int(row.bedrooms)
    row.geo_lat = float(row.geo_lat)
    row.geo_long = float(row.geo_long)
#     row.subway_count = nearby_subway_count(lat, long)
#     row.distance_to_TS = distance_to_times_square(lat, long)
    
    # Clean minimum stay string
    if isinstance(row.min_stay, str):
        nights = row.min_stay.replace('nights', '').strip()
        nights = [int(n) for n in nights.split('-')]
        min_stay = np.floor(np.average(nights))
    elif(row.min_stay):
        min_stay = 1
    row.min_stay = min_stay
    return row
    
def clean_homeaway_data(df):
    df = df.drop_duplicates(subset=['url'])
    df = df[df.price.notnull()]
    df = df.apply(clean_homeaway_row, axis=1)
    df = df.drop_duplicates(subset=['title', 'price', 'sleeps'])
    return df
    
data_file = 'data/homeaway_rentals_nyc_ALL.txt'
headers = ['url', 'price', 'title', 'geo_lat', 'geo_long', 'rating',
           'number_reviews', 'sleeps', 'bedrooms', 'bathrooms',
           'min_stay']

df = pd.read_csv(data_file, sep='\t', header=None, names=headers)

df = clean_homeaway_data(df)
df.head()

Unnamed: 0,url,price,title,geo_lat,geo_long,rating,number_reviews,sleeps,bedrooms,bathrooms,min_stay
0,https://www.homeaway.com/vacation-rental/p353201,100,Modern Studio Haven in Quiet Cul-de-sac... - H...,40.680474,-73.935103,4.6,38,3,0,1,4.0
1,https://www.homeaway.com/vacation-rental/p240675,271,Charming 3 Bedroom/2Bath Duplex Apartment... -...,40.680691,-73.935513,4.7,59,9,3,2,4.0
2,https://www.homeaway.com/vacation-rental/p3926833,904,"5 Bedroom 3 Bathroom Residence - 2,500... - Ho...",40.711948,-74.00674,4.9,50,12,5,3,4.0
3,https://www.homeaway.com/vacation-rental/p3478776,590,LUXURY TWO BED/TW0 BATH CHELSEA HIGH... - Home...,40.745583,-73.991396,4.9,73,5,2,2,5.0
4,https://www.homeaway.com/vacation-rental/p4227371,142,Sydni's Patch Near Manhattan - HomeAway Bedfor...,40.684658,-73.955302,4.6,23,6,2,1,5.0


In [3]:
df.describe()

Unnamed: 0,price,geo_lat,geo_long,rating,number_reviews,sleeps,bedrooms,bathrooms,min_stay
count,4000.0,4000.0,4000.0,1630.0,4000.0,4000.0,4000.0,4000.0,4000.0
mean,36928.13,40.739385,-73.97273,4.563742,6.6005,5.078,1.8325,1.3025,9.1675
std,2313178.0,0.050599,0.060157,0.611546,16.193192,2.826467,1.154676,0.75242,12.31399
min,0.0,40.461098,-74.357016,1.0,0.0,0.0,0.0,0.0,1.0
25%,161.0,40.714727,-73.99444,4.4,0.0,3.0,1.0,1.0,2.0
50%,250.0,40.743607,-73.979966,4.8,0.0,4.0,2.0,1.0,3.0
75%,400.0,40.766266,-73.951813,5.0,4.0,6.0,2.0,1.0,7.0
max,146298500.0,40.974511,-73.673384,5.0,175.0,38.0,15.0,23.0,180.0


In [4]:
# Find number of outliers
print('# Rentals with nightly price >= $3000:', (df.loc[df.price >= 3000].shape[0]))
print('# Rentals with minimum # night stay >= 100:', (df.loc[df.min_stay >= 100].shape[0]))
print('# Rentals with # bedrooms >= 10:', (df.loc[df.bedrooms >= 10].shape[0]))

# Rentals with nightly price >= $3000: 13
# Rentals with minimum # night stay >= 100: 2
# Rentals with # bedrooms >= 10: 2


In [5]:
# Remove these outliers
df = df[df.price < 3000]
df = df[df.bedrooms < 10]
df = df[df.min_stay < 100]

In [6]:
# Final stats after all cleaning and feature engineering:
df.describe()

Unnamed: 0,price,geo_lat,geo_long,rating,number_reviews,sleeps,bedrooms,bathrooms,min_stay
count,3983.0,3983.0,3983.0,1628.0,3983.0,3983.0,3983.0,3983.0,3983.0
mean,339.644991,40.739395,-73.972663,4.563943,6.616621,5.057494,1.823249,1.296008,9.081346
std,305.013191,0.05057,0.06024,0.611734,16.217721,2.745794,1.124537,0.737529,11.855628
min,0.0,40.461098,-74.357016,1.0,0.0,0.0,0.0,0.0,1.0
25%,161.0,40.714782,-73.994389,4.4,0.0,3.0,1.0,1.0,2.0
50%,250.0,40.743607,-73.979947,4.8,0.0,4.0,2.0,1.0,3.0
75%,400.0,40.766315,-73.951797,5.0,4.0,6.0,2.0,1.0,7.0
max,2875.0,40.974511,-73.673384,5.0,175.0,32.0,9.0,23.0,90.0


In [15]:
def distance_to_times_square(lat, long):
    """
    Calculate distance (in meter) to drive to Times Square
    using public transportation with Google API
    https://developers.google.com/maps/documentation/distance-matrix/intro
    """
    times_square_lat = '40.759171'
    times_square_long = '-73.985517'
    api_key = 'AIzaSyBO_xmtgW62tEXaDNaofO9LZ0GOVGLtmDw'
    api_url = ('https://maps.googleapis.com/maps/api/distancematrix/json?units=imperial'
               + '&origins=' + repr(lat) + ',' + repr(long)
               + '&destinations=' + times_square_lat + ',' + times_square_long
               + '&key=' + api_key)
    response = requests.get(api_url).text
    resp_obj = json.loads(response)
    distance = resp_obj['rows'][0]['elements'][0]['distance']['value']
    return distance
  
    
def nearby_subway_count(lat, long):
    """
    Count # subway stations within a 1 mile radius
    https://developers.google.com/places/web-service/search
    """
    api_key = 'AIzaSyDp5YP40O02jkGRN2hQ3uVwnPckSDSxjTU'
    api_url = ('https://maps.googleapis.com/maps/api/place/nearbysearch/json?' 
    + 'location=' + repr(lat) + ',' + repr(long)
    + '&radius=1600'
    + '&type=subway_station'
    + '&key=' + api_key)
    print(api_url)
    response = requests.get(api_url).text
    resp_obj = json.loads(response)
    subway_stations = resp_obj['results']
    return len(subway_stations)


def write_subway_counts():
    """
    One time collection of data from Google API for subway counts
    """
    with open('data/homeaway_urls_subway.csv', 'a+') as homeaway_file:
        writer = csv.writer(homeaway_file, delimiter='\t')
        for i, row in enumerate(df.as_matrix()):
            url = row[0]
            lat = row[3]
            long = row[4]
            subway_count = nearby_subway_count(lat, long)
            data = [i, url, lat, long, subway_count]
            print(data)
            writer.writerow(data)
            homeaway_file.flush()
    homeaway_file.close()
    
    
def write_driving_distance():
    """
    One time collection of data from Google API for driving distance to Times Square
    """
    with open('data/homeaway_urls_times_square.csv', 'a+') as homeaway_file:
        writer = csv.writer(homeaway_file, delimiter='\t')
        for i, row in enumerate(df.as_matrix()[0:1300]):
            url = row[0]
            lat = row[3]
            long = row[4]
            distance = distance_to_times_square(lat, long)
            data = [i, url, lat, long, distance]
            print(data)
            writer.writerow(data)
            homeaway_file.flush()
    homeaway_file.close()
        
# nearby_subway_count(40.680474, -73.935103)
# distance_to_times_square(40.680474, -73.935103)

# write_subway_counts()
write_driving_distance()

[0, 'https://www.homeaway.com/vacation-rental/p353201', 40.68047429999999, -73.9351033, 17124]
[1, 'https://www.homeaway.com/vacation-rental/p240675', 40.680690999999996, -73.935513, 17152]
[2, 'https://www.homeaway.com/vacation-rental/p3926833', 40.71194835, -74.00674018, 9560]
[3, 'https://www.homeaway.com/vacation-rental/p3478776', 40.745583, -73.991396, 2605]
[4, 'https://www.homeaway.com/vacation-rental/p4227371', 40.684658, -73.955302, 15102]
[5, 'https://www.homeaway.com/vacation-rental/p324686', 40.68047429999999, -73.9351033, 17124]
[6, 'https://www.homeaway.com/vacation-rental/p750325vb', 40.77526714, -73.98715641, 2097]
[7, 'https://www.homeaway.com/vacation-rental/p4249189', 40.74987035, -73.9903988, 1973]
[8, 'https://www.homeaway.com/vacation-rental/p3002032', 40.828998999999996, -73.9419023, 11666]
[9, 'https://www.homeaway.com/vacation-rental/p337061vb', 40.757075, -73.919071, 7486]
[10, 'https://www.homeaway.com/vacation-rental/p864025vb', 40.74548198, -73.9875423, 202

[91, 'https://www.homeaway.com/vacation-rental/p811837vb', 40.685263899999995, -73.979168, 13365]
[92, 'https://www.homeaway.com/vacation-rental/p573531vb', 40.7529407, -73.9742537, 1325]
[93, 'https://www.homeaway.com/vacation-rental/p843231vb', 40.7949704, -73.96561240000001, 4811]
[94, 'https://www.homeaway.com/vacation-rental/p851550vb', 40.778024, -73.98099350000001, 2758]
[95, 'https://www.homeaway.com/vacation-rental/p321774vb', 40.76397464, -73.99341695, 991]
[96, 'https://www.homeaway.com/vacation-rental/p488877vb', 40.787551, -73.970042, 4330]
[97, 'https://www.homeaway.com/vacation-rental/p651432vb', 40.67100353, -73.94778425, 16737]
[98, 'https://www.homeaway.com/vacation-rental/p1691113', 40.718277, -73.98954, 7455]
[99, 'https://www.homeaway.com/vacation-rental/p4078456', 40.71632953, -73.99990536, 9632]
[100, 'https://www.homeaway.com/vacation-rental/p299459', 40.739437, -73.992512, 2775]
[101, 'https://www.homeaway.com/vacation-rental/p3480587', 40.76378023, -73.9279364

[179, 'https://www.homeaway.com/vacation-rental/p1704134?uni_id=3464101', 40.7015543, -73.94049694, 14672]
[180, 'https://www.homeaway.com/vacation-rental/p4226411', 40.724716, -73.9511058, 7115]
[181, 'https://www.homeaway.com/vacation-rental/p1704134?uni_id=3283837', 40.7015543, -73.94049694, 14672]
[182, 'https://www.homeaway.com/vacation-rental/p743144vb', 40.669574, -73.973641, 20126]
[183, 'https://www.homeaway.com/vacation-rental/p837296vb', 40.8028837, -73.94634920000001, 10502]
[184, 'https://www.homeaway.com/vacation-rental/p332976vb', 40.67973709, -73.97291565, 14375]
[185, 'https://www.homeaway.com/vacation-rental/p3752754', 40.680302000000005, -73.957414, 15168]
[186, 'https://www.homeaway.com/vacation-rental/p4071006', 40.7612432, -73.9918662, 572]
[187, 'https://www.homeaway.com/vacation-rental/p364804', 40.7601778, -73.9843051, 172]
[188, 'https://www.homeaway.com/vacation-rental/p3833887', 40.78537591, -73.9464395, 6536]
[189, 'https://www.homeaway.com/vacation-rental/

[267, 'https://www.homeaway.com/vacation-rental/p919247', 40.78318, -73.97173599999999, 3725]
[268, 'https://www.homeaway.com/vacation-rental/p1187112', 40.80435, -73.946662, 6773]
[269, 'https://www.homeaway.com/vacation-rental/p4261389', 40.753240000000005, -73.96658000000001, 1873]
[270, 'https://www.homeaway.com/vacation-rental/p4304484', 40.74638196, -73.98394022, 2020]
[271, 'https://www.homeaway.com/vacation-rental/p4241163', 40.695563, -73.969581, 12852]
[272, 'https://www.homeaway.com/vacation-rental/p573914vb', 40.6839745, -73.99929209999999, 13553]
[273, 'https://www.homeaway.com/vacation-rental/p3835371', 40.806793, -73.948543, 10451]
[274, 'https://www.homeaway.com/vacation-rental/p4262961', 40.7368501, -73.99094740000001, 3478]
[275, 'https://www.homeaway.com/vacation-rental/p3961666', 40.78316701, -73.95193817, 6511]
[276, 'https://www.homeaway.com/vacation-rental/p919101', 40.71833, -73.956028, 8315]
[277, 'https://www.homeaway.com/vacation-rental/p214934vb', 40.7765604

[356, 'https://www.homeaway.com/vacation-rental/p4325781', 40.80287088, -73.95267936, 10258]
[357, 'https://www.homeaway.com/vacation-rental/p993020', 40.804507, -73.948995, 10220]
[358, 'https://www.homeaway.com/vacation-rental/p3845424', 40.652153999999996, -73.974579, 18530]
[359, 'https://www.homeaway.com/vacation-rental/p688808vb', 40.70950581, -73.96863965, 9285]
[360, 'https://www.homeaway.com/vacation-rental/p4315211', 40.754537, -73.772785, 22102]
[361, 'https://www.homeaway.com/vacation-rental/p3903424', 40.77526487, -73.95116953, 5722]
[362, 'https://www.homeaway.com/vacation-rental/p333327', 40.726675, -73.978189, 5225]
[363, 'https://www.homeaway.com/vacation-rental/p3522485', 40.744021999999994, -73.982493, 2782]
[364, 'https://www.homeaway.com/vacation-rental/p901067vb', 40.78807443, -73.97547215, 4045]
[365, 'https://www.homeaway.com/vacation-rental/p994618vb', 40.674981, -73.940651, 16871]
[366, 'https://www.homeaway.com/vacation-rental/p157019', 40.6788319, -73.950677

[444, 'https://www.homeaway.com/vacation-rental/p4206309', 40.79976603, -73.96148717, 5497]
[445, 'https://www.homeaway.com/vacation-rental/p3795240', 40.687909999999995, -73.947334, 15148]
[446, 'https://www.homeaway.com/vacation-rental/p3967128', 40.68415012, -73.93175714, 17042]
[447, 'https://www.homeaway.com/vacation-rental/p4291906', 40.775110999999995, -73.9886025, 2520]
[448, 'https://www.homeaway.com/vacation-rental/p4486162', 40.61180545, -73.92558985, 39406]
[449, 'https://www.homeaway.com/vacation-rental/p248004vb', 40.74893529999999, -73.9765458, 1921]
[450, 'https://www.homeaway.com/vacation-rental/p266219vb', 40.76210395, -73.97932179, 779]
[451, 'https://www.homeaway.com/vacation-rental/p270136', 40.760584, -73.984565, 153]
[452, 'https://www.homeaway.com/vacation-rental/p368228', 40.581643, -73.96392, 32470]
[453, 'https://www.homeaway.com/vacation-rental/p357979vb', 40.709199, -73.805142, 20463]
[454, 'https://www.homeaway.com/vacation-rental/p3496092', 40.68081, -73.

[533, 'https://www.homeaway.com/vacation-rental/p795508vb', 40.689656, -73.931554, 16492]
[534, 'https://www.homeaway.com/vacation-rental/p6546956', 40.719099, -73.99060531, 8890]
[535, 'https://www.homeaway.com/vacation-rental/p986883vb', 40.79355848, -73.96824101, 4617]
[536, 'https://www.homeaway.com/vacation-rental/p978991', 40.752263, -73.986315, 1084]
[537, 'https://www.homeaway.com/vacation-rental/p3587458', 40.646801, -74.080134, 32805]
[538, 'https://www.homeaway.com/vacation-rental/p706320vb', 40.717647899999996, -73.983187, 6720]
[539, 'https://www.homeaway.com/vacation-rental/p840233vb', 40.81385224, -73.94672508, 10409]
[540, 'https://www.homeaway.com/vacation-rental/p839712vb', 40.73079768, -74.00902962, 4790]
[541, 'https://www.homeaway.com/vacation-rental/p4284881', 40.761847700000004, -73.98584659999999, 407]
[542, 'https://www.homeaway.com/vacation-rental/p4497321', 40.769785799999994, -73.9582108, 3537]
[543, 'https://www.homeaway.com/vacation-rental/p473582vb', 40.7

[622, 'https://www.homeaway.com/vacation-rental/p4463471', 40.78312003, -73.97288395, 3806]
[623, 'https://www.homeaway.com/vacation-rental/p4567317', 40.71215547, -73.95934964, 9805]
[624, 'https://www.homeaway.com/vacation-rental/p242032', 40.682615999999996, -73.9492822, 15474]
[625, 'https://www.homeaway.com/vacation-rental/p452674vb', 40.778207, -73.983402, 3194]
[626, 'https://www.homeaway.com/vacation-rental/p676253vb', 40.62782703, -74.15856457, 35550]
[627, 'https://www.homeaway.com/vacation-rental/p778598vb', 40.66183578, -73.98978119, 16540]
[628, 'https://www.homeaway.com/vacation-rental/p4117266', 40.769017, -73.770619, 24447]
[629, 'https://www.homeaway.com/vacation-rental/p4223498', 40.711257, -73.961742, 9632]
[630, 'https://www.homeaway.com/vacation-rental/p4487099', 40.76457146, -73.92487957, 7005]
[631, 'https://www.homeaway.com/vacation-rental/p503902vb', 40.7545865, -73.99834399999999, 2337]
[632, 'https://www.homeaway.com/vacation-rental/p3780592', 40.739047, -73.

[711, 'https://www.homeaway.com/vacation-rental/p4075847', 40.75429904, -73.93424385, 5750]
[712, 'https://www.homeaway.com/vacation-rental/p4108097', 40.68571546, -73.94422519999999, 15881]
[713, 'https://www.homeaway.com/vacation-rental/p4152938', 40.8416662, -73.94005859999999, 12982]
[714, 'https://www.homeaway.com/vacation-rental/p865610vb', 40.67403764, -73.97670098, 14377]
[715, 'https://www.homeaway.com/vacation-rental/p4431318', 40.775110999999995, -73.9886025, 2520]
[716, 'https://www.homeaway.com/vacation-rental/p4479601', 40.61637226, -74.00374251, 24599]
[717, 'https://www.homeaway.com/vacation-rental/p236744vb', 40.72569262, -73.98385574, 6971]
[718, 'https://www.homeaway.com/vacation-rental/p940567', 40.786381, -73.973435, 4266]
[719, 'https://www.homeaway.com/vacation-rental/p3677726', 40.655475, -73.958775, 20428]
[720, 'https://www.homeaway.com/vacation-rental/p3775364', 40.746913, -73.99201500000001, 2014]
[721, 'https://www.homeaway.com/vacation-rental/p3893551', 40

[800, 'https://www.homeaway.com/vacation-rental/p3932644', 40.78222599, -73.98434234, 4583]
[801, 'https://www.homeaway.com/vacation-rental/p3932628', 40.7612205, -73.96085694, 3621]
[802, 'https://www.homeaway.com/vacation-rental/p707389vb', 40.72069883, -74.00282859, 5133]
[803, 'https://www.homeaway.com/vacation-rental/p778629vb', 40.661966, -73.990382, 16492]
[804, 'https://www.homeaway.com/vacation-rental/p4091122', 40.726460499999995, -74.0033087, 4296]
[805, 'https://www.homeaway.com/vacation-rental/p4108924', 40.70027395, -73.98625928, 11018]
[806, 'https://www.homeaway.com/vacation-rental/p4130872', 40.7224005, -73.9899138, 7168]
[807, 'https://www.homeaway.com/vacation-rental/p4152934', 40.84173113, -73.93988694, 12991]
[808, 'https://www.homeaway.com/vacation-rental/p863266vb', 40.689656, -73.931554, 16492]
[809, 'https://www.homeaway.com/vacation-rental/p4211817', 40.795146, -73.94468020000001, 9267]
[810, 'https://www.homeaway.com/vacation-rental/p4236818', 40.73437004, -7

[890, 'https://www.homeaway.com/vacation-rental/p4534889', 40.78981013, -73.9455583, 7293]
[891, 'https://www.homeaway.com/vacation-rental/p241908vb', 40.74315643, -74.00157928, 3340]
[892, 'https://www.homeaway.com/vacation-rental/p452269vb', 40.687544, -73.96288, 14315]
[893, 'https://www.homeaway.com/vacation-rental/p478287vb', 40.7471609, -73.9819567, 1952]
[894, 'https://www.homeaway.com/vacation-rental/p756740vb', 40.77692886, -73.98141823, 2564]
[895, 'https://www.homeaway.com/vacation-rental/p700277vb', 40.71509075, -73.94627844, 8673]
[896, 'https://www.homeaway.com/vacation-rental/p4211205', 40.666883899999995, -73.94486628, 17319]
[897, 'https://www.homeaway.com/vacation-rental/p4220990', 40.7715836, -73.92371458, 7597]
[898, 'https://www.homeaway.com/vacation-rental/p4247811', 40.63385, -73.905575, 37020]
[899, 'https://www.homeaway.com/vacation-rental/p4370378', 40.7450282, -73.9817657, 2391]
[900, 'https://www.homeaway.com/vacation-rental/p995907vb', 40.6725919, -73.96092

[978, 'https://www.homeaway.com/vacation-rental/p797156vb', 40.8152657, -73.9420753, 11439]
[979, 'https://www.homeaway.com/vacation-rental/p803672vb', 40.675558, -73.964812, 15185]
[980, 'https://www.homeaway.com/vacation-rental/p724487vb', 40.68824363, -73.92566165, 14340]
[981, 'https://www.homeaway.com/vacation-rental/p885337vb', 40.6777453, -73.98024609999999, 14104]
[982, 'https://www.homeaway.com/vacation-rental/p4278805', 40.68655067, -73.95549103, 14685]
[983, 'https://www.homeaway.com/vacation-rental/p4316255', 40.760117, -73.990055, 817]
[984, 'https://www.homeaway.com/vacation-rental/p4348923', 40.72931434, -74.00358364, 4097]
[985, 'https://www.homeaway.com/vacation-rental/p4366596', 40.78706272, -73.95337861, 4839]
[986, 'https://www.homeaway.com/vacation-rental/p4427563', 40.67908931, -73.99888081, 15426]
[987, 'https://www.homeaway.com/vacation-rental/p4463665', 40.74632789, -73.98933975, 1849]
[988, 'https://www.homeaway.com/vacation-rental/p4482530', 40.7443999, -73.9

[1066, 'https://www.homeaway.com/vacation-rental/p870662vb', 40.724410999999996, -73.979075, 6986]
[1067, 'https://www.homeaway.com/vacation-rental/p893717vb', 40.7126919, -73.9438412, 8470]
[1068, 'https://www.homeaway.com/vacation-rental/p4266330', 40.68048502, -73.95739791, 15164]
[1069, 'https://www.homeaway.com/vacation-rental/p4318676', 40.71717228, -73.99442368, 9837]
[1070, 'https://www.homeaway.com/vacation-rental/p4359204', 40.72877855, -74.00430823, 5326]
[1071, 'https://www.homeaway.com/vacation-rental/p4372109', 40.807496, -73.950227, 10062]
[1072, 'https://www.homeaway.com/vacation-rental/p4385656', 40.69085262, -73.91210448, 14497]
[1073, 'https://www.homeaway.com/vacation-rental/p4435291', 40.66926377, -73.9579943, 15724]
[1074, 'https://www.homeaway.com/vacation-rental/p991058vb', 40.67516824, -73.96348823, 15105]
[1075, 'https://www.homeaway.com/vacation-rental/p4460834', 40.892274, -73.898663, 18716]
[1076, 'https://www.homeaway.com/vacation-rental/p1024289vb', 40.78

[1154, 'https://www.homeaway.com/vacation-rental/p4078454', 40.728333899999996, -73.8156209, 18495]
[1155, 'https://www.homeaway.com/vacation-rental/p4138073', 40.72229894, -74.00412083, 4938]
[1156, 'https://www.homeaway.com/vacation-rental/p4157897', 40.76450473, -73.98784711, 850]
[1157, 'https://www.homeaway.com/vacation-rental/p4190825', 40.83954893, -73.86216234, 20086]
[1158, 'https://www.homeaway.com/vacation-rental/p4211171', 40.771612, -73.92384717, 7608]
[1159, 'https://www.homeaway.com/vacation-rental/p887039vb', 40.7575339, -73.9655896, 2193]
[1160, 'https://www.homeaway.com/vacation-rental/p4249628', 40.7692123, -73.98477240000001, 1523]
[1161, 'https://www.homeaway.com/vacation-rental/p4260907', 40.753474, -73.99252800000001, 1006]
[1162, 'https://www.homeaway.com/vacation-rental/p901508vb', 40.744075, -73.9810679, 2744]
[1163, 'https://www.homeaway.com/vacation-rental/p921309vb', 40.744075, -73.9810679, 2744]
[1164, 'https://www.homeaway.com/vacation-rental/p4322151', 4

[1241, 'https://www.homeaway.com/vacation-rental/p726626vb', 40.7429739, -73.99446800000001, 2469]
[1242, 'https://www.homeaway.com/vacation-rental/p776578vb', 40.789745, -73.9497545, 5525]
[1243, 'https://www.homeaway.com/vacation-rental/p6037022', 40.65547, -73.95877, 20427]
[1244, 'https://www.homeaway.com/vacation-rental/p4131943', 40.607896999999994, -74.067588, 27493]
[1245, 'https://www.homeaway.com/vacation-rental/p4137818', 40.770773999999996, -73.95288215, 5215]
[1246, 'https://www.homeaway.com/vacation-rental/p4058748', 40.76248326, -73.97971112, 1142]
[1247, 'https://www.homeaway.com/vacation-rental/p4150114', 40.755454, -73.966607, 2387]
[1248, 'https://www.homeaway.com/vacation-rental/p4199701', 40.730667600000004, -74.0092442, 4773]
[1249, 'https://www.homeaway.com/vacation-rental/p877593vb', 40.682701, -73.93726590000001, 17020]
[1250, 'https://www.homeaway.com/vacation-rental/p4261919', 40.760411, -73.98745749999999, 383]
[1251, 'https://www.homeaway.com/vacation-renta

In [14]:
nearby_subway_count(40.68047429999999, -73.9351033)

https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=40.68047429999999,-73.9351033&radius=1600&type=subway_station&key=AIzaSyDp5YP40O02jkGRN2hQ3uVwnPckSDSxjTU


0

In [None]:
# Not all rentals have reviews
df_with_reviews = df[df.rating.notnull()]

# Plot some basic graphs to understand the data
fig_overview, ax_overview = plt.subplots(2, 2, figsize=(15, 15))

ax_overview[0][0].hist(df.bedrooms.as_matrix())
ax_overview[0][0].set_title('Distribution of Bedroom Count')
ax_overview[0][0].set_xlabel('# Bedrooms')
ax_overview[0][0].set_ylabel('Frequency')

ax_overview[0][1].hist(df_with_reviews.rating.as_matrix())
ax_overview[0][1].set_title('Distribution of Ratings')
ax_overview[0][1].set_xlabel('Rating')
ax_overview[0][1].set_ylabel('Frequency')
ax_overview[0][1].xaxis.set_ticks(np.arange(1,5,1))

ax_overview[1][0].hist(df.price.as_matrix())
ax_overview[1][0].set_title('Distribution of Nightly Rental Prices')
ax_overview[1][0].set_xlabel('Rental Price per Night')
ax_overview[1][0].set_ylabel('Frequency')

ax_overview[1][1].hist(df.min_stay.as_matrix())
ax_overview[1][1].set_title('Distribution of Minimum # Nights Required')
ax_overview[1][1].set_xlabel('Min # Nights')
ax_overview[1][1].set_ylabel('Frequency')

In [None]:
filename = 'data/homeaway_rentals_nyc_PARSED' + datetime.datetime.now().isoformat() + '.csv' 
print('Writing to ', filename)
# df.to_csv(filename, sep='\t')

In [None]:
# 30 - 61 nights = 30 nights
# 3 - 30 nights = 30 nights
# 5 - 8 nights = 5 nights
# 30 - 180 nights = 180 nights, all booked!
# 7 - 14 nights = 14 nights
# 5 - 90 = 5 nights
# 1 - 15 nights = 1?
# 6 - 30 nights = both
# 4 - 21 nights = 4
#print(df.loc[df.min_stay == '4 - 18 nights'].url)
