In [1]:
import pandas as pd
import numpy as np
from math import radians, cos, sin, asin, sqrt

def read_data():
    listings_data = pd.read_csv('listings.csv.gz')
    amenities_data = pd.read_json('amenities-vancouver.json.gz', lines=True)
    return listings_data, amenities_data


def clean_amenities_data(amenities_data, amenities_required):

    #find unique amenities and the number of them to choose which are important for a traveller
    # print(amenities_data['amenity'].value_counts())

    #adapted from : https://www.kite.com/python/answers/how-to-filter-a-pandas-dataframe-with-a-list-by-%60in%60-or-%60not-in%60-in-python
    bool_series = amenities_data.amenity.isin(amenities_required)
    filtered_amenities_df = amenities_data[bool_series]
    filtered_amenities_df = filtered_amenities_df.drop(['timestamp','tags'], axis=1).dropna() # dropping unnecessary columns, and filter out NA values
    filtered_amenities_df.reset_index(inplace=True, drop=True)

    return filtered_amenities_df


#reference: https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points
def haversine_distance(lon1, lat1, lon2, lat2):
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r * 1000

def haversine_distance2(df, lon2, lat2):
    # convert decimal degrees to radians 
    lon1=np.radians(df['lon'])
    lat1=np.radians(df['lat'])
    lon2=np.radians(lon2)
    lat2=np.radians(lat2)
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = (dlat/2).apply(sin)**2 + (lat1).apply(sin) * cos(lat2) * (dlon/2).apply(sin)**2
    c = 2 * ((a).apply(sqrt).apply(asin)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r * 1000

def clean_listings_data(listings_data):
    #keep only the columns we need
    columns_needed = ['id', 'listing_url', 'name', 'description', 'picture_url', 'latitude', 'longitude', 'property_type', 'accommodates', 'bedrooms', 'beds', 'amenities', 'price']
    listings_data = listings_data[columns_needed]
    return listings_data


# #TODO: return a dictionary with number of amenities in a 1km radius of this lat and lon
def num_amenities(lat, lon, amenities_data_clean):
    distance = haversine_distance2(amenities_data_clean, lon, lat)
    amenities_data_clean['distance'] = distance
    data_withinR = amenities_data_clean.loc[amenities_data_clean['distance'] < 1000].reset_index(drop=True)
    amenities_series = data_withinR.pivot_table(columns = ['amenity'], aggfunc='size')  # Counts # of amenities, type=pd.series
    amenities_dict = amenities_series.to_dict()# converts series to dict
    return amenities_dict

In [2]:
#Read Data
listings_data, amenities_data = read_data()

# Change amenities here (updated the "restaurant" typo)
amenities_required = ['restaurant', 'fast_food', 'cafe','bank','atm','pharmacy','bicycle_rental','fuel','pub','bar','car_sharing','car_rental','clinic','doctors','hospital','ice_cream','fountain','theatre','police','bus_station']

#Data Cleaning
amenities_data_clean = clean_amenities_data(amenities_data, amenities_required)
listings_data_clean = clean_listings_data(listings_data)


#Return a dict of amenities:
lat_input = 49.225164  # sample lat input
lon_input = -123.003742  # sample lon input
amen_dict = num_amenities(lat_input, lon_input, amenities_data_clean)

In [3]:
amen_dict

{'bank': 16,
 'bar': 16,
 'bus_station': 1,
 'cafe': 27,
 'car_rental': 1,
 'clinic': 8,
 'fast_food': 61,
 'fountain': 1,
 'fuel': 1,
 'ice_cream': 1,
 'pharmacy': 9,
 'police': 1,
 'pub': 1,
 'restaurant': 56}

In [4]:
listings_data_clean['accommodates'].max()

16

In [13]:
listings_data_clean.loc[listings_data_clean['accommodates'] == 17]

Unnamed: 0,id,listing_url,name,description,picture_url,latitude,longitude,property_type,accommodates,bedrooms,beds,amenities,price,num_amenities_nearby


In [14]:
listings_data_clean.empty()

TypeError: 'bool' object is not callable

In [6]:
# listings_data_clean = listings_data_clean.head(100)
# listings_data_clean

In [7]:
listings_data_clean['num_amenities_nearby'] = listings_data_clean.apply(lambda x: num_amenities(x['latitude'], x['longitude'], amenities_data_clean), axis = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_data_clean['num_amenities_nearby'] = listings_data_clean.apply(lambda x: num_amenities(x['latitude'], x['longitude'], amenities_data_clean), axis = 1)


In [8]:
listings_data_clean['num_amenities_nearby']

0       {'atm': 1, 'bank': 21, 'bar': 9, 'bicycle_rent...
1       {'atm': 1, 'bank': 30, 'bar': 27, 'bicycle_ren...
2       {'bank': 4, 'bicycle_rental': 7, 'cafe': 7, 'c...
3       {'bank': 3, 'bicycle_rental': 2, 'cafe': 7, 'c...
4       {'bank': 4, 'cafe': 6, 'doctors': 4, 'fast_foo...
                              ...                        
4294    {'atm': 1, 'bank': 11, 'bar': 22, 'bicycle_ren...
4295    {'atm': 1, 'bank': 4, 'bar': 5, 'bicycle_renta...
4296    {'atm': 1, 'bank': 24, 'bar': 18, 'bicycle_ren...
4297    {'bank': 18, 'bar': 12, 'bicycle_rental': 31, ...
4298    {'atm': 1, 'bank': 22, 'bar': 26, 'bicycle_ren...
Name: num_amenities_nearby, Length: 4299, dtype: object

In [9]:
listings_data_clean

Unnamed: 0,id,listing_url,name,description,picture_url,latitude,longitude,property_type,accommodates,bedrooms,beds,amenities,price,num_amenities_nearby
0,10080,https://www.airbnb.com/rooms/10080,D1 - Million Dollar View 2 BR,"Stunning two bedroom, two bathroom apartment. ...",https://a0.muscache.com/pictures/55778229/c2f7...,49.28872,-123.12046,Entire condominium,5,2.0,2.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$150.00,"{'atm': 1, 'bank': 21, 'bar': 9, 'bicycle_rent..."
1,13358,https://www.airbnb.com/rooms/13358,Monthly (or Longer ) Designer One Bedroom Down...,<b>The space</b><br />This suites central loca...,https://a0.muscache.com/pictures/c23bb7ef-e300...,49.28201,-123.12669,Entire condominium,2,1.0,1.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$85.00,"{'atm': 1, 'bank': 30, 'bar': 27, 'bicycle_ren..."
2,13490,https://www.airbnb.com/rooms/13490,Vancouver's best kept secret,This apartment rents for one month blocks of t...,https://a0.muscache.com/pictures/73394727/79d5...,49.25622,-123.06607,Entire apartment,2,1.0,1.0,"[""Iron"", ""Outdoor dining area"", ""Ethernet conn...",$145.00,"{'bank': 4, 'bicycle_rental': 7, 'cafe': 7, 'c..."
3,14267,https://www.airbnb.com/rooms/14267,EcoLoft Vancouver,"The Ecoloft is located in the lovely, family r...",https://a0.muscache.com/pictures/3646de9b-934e...,49.24922,-123.08139,Entire house,4,1.0,2.0,"[""Iron"", ""Dryer"", ""Cooking basics"", ""Hot water...",$140.00,"{'bank': 3, 'bicycle_rental': 2, 'cafe': 7, 'c..."
4,16254,https://www.airbnb.com/rooms/16254,Close to PNE/Hastings Park and East Village,2 Bedroom garden level guest suite.(lower leve...,https://a0.muscache.com/pictures/90623667/583c...,49.27721,-123.04086,Entire guest suite,4,2.0,3.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$195.00,"{'bank': 4, 'cafe': 6, 'doctors': 4, 'fast_foo..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4294,49132295,https://www.airbnb.com/rooms/49132295,Luxurious 3 Bedroom Unit With English Bay Views!,"Stunning large, modern, brand-new 3 bedroom an...",https://a0.muscache.com/pictures/d5f23f5b-f10b...,49.27391,-123.13012,Entire apartment,7,3.0,5.0,"[""Heating"", ""Essentials"", ""Kitchen"", ""Iron"", ""...",$500.00,"{'atm': 1, 'bank': 11, 'bar': 22, 'bicycle_ren..."
4295,49138797,https://www.airbnb.com/rooms/49138797,19-9-10 Private room,Private room in a new house with shared bathro...,https://a0.muscache.com/pictures/02048308-7b1f...,49.25275,-123.09184,Private room in house,2,1.0,1.0,"[""Heating"", ""Kitchen"", ""Fire extinguisher"", ""D...",$38.00,"{'atm': 1, 'bank': 4, 'bar': 5, 'bicycle_renta..."
4296,49146266,https://www.airbnb.com/rooms/49146266,Prime Coal harbour | Luxury Finishings,Welcome to this beautiful fully furnished Prim...,https://a0.muscache.com/pictures/ee697899-6185...,49.28631,-123.12455,Entire apartment,2,1.0,,"[""Kitchen"", ""Body soap"", ""Shampoo"", ""Hot tub"",...",$160.00,"{'atm': 1, 'bank': 24, 'bar': 18, 'bicycle_ren..."
4297,49148812,https://www.airbnb.com/rooms/49148812,DT- Ocean View 1BD w/ Private Bathroom Kitchen...,Private one bedroom suite apartment in heart o...,https://a0.muscache.com/pictures/02a2a9da-68e0...,49.28822,-123.13061,Entire apartment,1,1.0,1.0,"[""Dryer"", ""Bed linens"", ""Elevator"", ""Paid park...",$66.00,"{'bank': 18, 'bar': 12, 'bicycle_rental': 31, ..."


In [10]:
listings_data_clean['price'] = listings_data_clean['price'].apply(lambda x: float(x.replace('$','').replace(',','')))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_data_clean['price'] = listings_data_clean['price'].apply(lambda x: float(x.replace('$','').replace(',','')))


In [11]:
listings_data_clean = listings_data_clean.loc[(listings_data_clean['price'] <= 150) & (listings_data_clean['accommodates'] >= 3) & (listings_data_clean['bedrooms'] >= 2)]

In [12]:
listings_data_clean

Unnamed: 0,id,listing_url,name,description,picture_url,latitude,longitude,property_type,accommodates,bedrooms,beds,amenities,price,num_amenities_nearby
0,10080,https://www.airbnb.com/rooms/10080,D1 - Million Dollar View 2 BR,"Stunning two bedroom, two bathroom apartment. ...",https://a0.muscache.com/pictures/55778229/c2f7...,49.28872,-123.12046,Entire condominium,5,2.0,2.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",150.0,"{'atm': 1, 'bank': 21, 'bar': 9, 'bicycle_rent..."
5,16611,https://www.airbnb.com/rooms/16611,"1 block to skytrain station, shops,restaurant,...","My place is close to bank, coffee shops, groce...",https://a0.muscache.com/pictures/82101/7127b63...,49.26339,-123.07145,Entire house,6,3.0,4.0,"[""Heating"", ""Essentials"", ""Kitchen"", ""Iron"", ""...",100.0,"{'bank': 5, 'bar': 1, 'bicycle_rental': 17, 'c..."
6,17158,https://www.airbnb.com/rooms/17158,Vancouver 4br 3ba house 20min to DT,A newly renovated comfy and cozy family home i...,https://a0.muscache.com/pictures/5ddc28c0-484f...,49.25277,-123.03774,Entire house,8,4.0,5.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",130.0,"{'bank': 2, 'cafe': 4, 'fast_food': 2, 'fuel':..."
13,20085,https://www.airbnb.com/rooms/20085,Bright & Cheerful Uptown Garden Suite,"Welcome to our bright, cozy suite in the heart...",https://a0.muscache.com/pictures/miso/Hosting-...,49.25636,-123.08636,Entire guest suite,3,2.0,2.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",69.0,"{'bank': 1, 'bar': 1, 'bicycle_rental': 12, 'c..."
21,35102,https://www.airbnb.com/rooms/35102,Downtown Yaletown Trendy 2 Bd/2Bth,Large and bright corner unit of a brick concre...,https://a0.muscache.com/pictures/784805/089b1a...,49.27743,-123.11869,Entire condominium,4,2.0,2.0,"[""Iron"", ""Hot tub"", ""Dryer"", ""Bed linens"", ""El...",132.0,"{'atm': 1, 'bank': 26, 'bar': 28, 'bicycle_ren..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4247,48846867,https://www.airbnb.com/rooms/48846867,Beautiful Victorian Home - Central Vancouver!,This home is the owners’ winter residence and ...,https://a0.muscache.com/pictures/miso/Hosting-...,49.26148,-123.08115,Entire house,10,3.0,6.0,"[""Heating"", ""Essentials"", ""Kitchen"", ""Iron"", ""...",144.0,"{'bank': 4, 'bicycle_rental': 20, 'cafe': 11, ..."
4273,49030214,https://www.airbnb.com/rooms/49030214,Entire beautiful condo with stunning views!,This 2 bedroom suite has amazing views of the ...,https://a0.muscache.com/pictures/c8a3e847-d2ce...,49.27964,-123.10886,Entire condominium,4,2.0,2.0,"[""Hot tub"", ""Dryer"", ""Bed linens"", ""Cooking ba...",132.0,"{'bank': 17, 'bar': 24, 'bicycle_rental': 32, ..."
4280,49063578,https://www.airbnb.com/rooms/49063578,"Luxury 2 bedrooms,ocean and mountain view.","Close to sky train station,water front of Down...",https://a0.muscache.com/pictures/b0e15bc2-7796...,49.28717,-123.12182,Entire condominium,4,2.0,2.0,"[""Hot tub"", ""Dryer"", ""Bed linens"", ""Cooking ba...",128.0,"{'atm': 1, 'bank': 22, 'bar': 13, 'bicycle_ren..."
4282,49082089,https://www.airbnb.com/rooms/49082089,New 2 Bedroom at Coal Harbour Downtown - Water...,Vancouver's finest!! You'll love coming home t...,https://a0.muscache.com/pictures/d8c8f5b2-b828...,49.29013,-123.12886,Entire condominium,4,2.0,2.0,"[""EV charger"", ""Ethernet connection"", ""Central...",131.0,"{'bank': 19, 'bar': 5, 'bicycle_rental': 24, '..."
