In [1]:
import pandas as pd
import numpy as np
from math import radians, cos, sin, asin, sqrt

def read_data():
    listings_data = pd.read_csv('listings.csv.gz')
    amenities_data = pd.read_json('amenities-vancouver.json.gz', lines=True)
    return listings_data, amenities_data


def clean_amenities_data(amenities_data, amenities_required):

    #find unique amenities and the number of them to choose which are important for a traveller
    # print(amenities_data['amenity'].value_counts())

    #adapted from : https://www.kite.com/python/answers/how-to-filter-a-pandas-dataframe-with-a-list-by-%60in%60-or-%60not-in%60-in-python
    bool_series = amenities_data.amenity.isin(amenities_required)
    filtered_amenities_df = amenities_data[bool_series]
    filtered_amenities_df = filtered_amenities_df.drop(['timestamp','tags'], axis=1).dropna() # dropping unnecessary columns, and filter out NA values
    filtered_amenities_df.reset_index(inplace=True, drop=True)

    return filtered_amenities_df


#reference: https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points
def haversine_distance(lon1, lat1, lon2, lat2):
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r * 1000

def haversine_distance2(df, lon2, lat2):
    # convert decimal degrees to radians 
    lon1=np.radians(df['lon'])
    lat1=np.radians(df['lat'])
    lon2=np.radians(lon2)
    lat2=np.radians(lat2)
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = (dlat/2).apply(sin)**2 + (lat1).apply(sin) * cos(lat2) * (dlon/2).apply(sin)**2
    c = 2 * ((a).apply(sqrt).apply(asin)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r * 1000

def clean_listings_data(listings_data):
    #keep only the columns we need
    columns_needed = ['id', 'listing_url', 'name', 'description', 'picture_url', 'latitude', 'longitude', 'property_type', 'accommodates', 'bedrooms', 'beds', 'amenities', 'price']
    listings_data = listings_data[columns_needed]
    return listings_data


# #TODO: return a dictionary with number of amenities in a 1km radius of this lat and lon
def num_amenities(lat, lon, amenities_data_clean):
    distance = haversine_distance2(amenities_data_clean, lon, lat)
    amenities_data_clean['distance'] = distance
    data_withinR = amenities_data_clean.loc[amenities_data_clean['distance'] < 1000].reset_index(drop=True)
    amenities_series = data_withinR.pivot_table(columns = ['amenity'], aggfunc='size')  # Counts # of amenities, type=pd.series
    amenities_dict = amenities_series.to_dict()# converts series to dict
    return amenities_dict

In [2]:
#Read Data
listings_data, amenities_data = read_data()

# Change amenities here (updated the "restaurant" typo)
amenities_required = ['restaurant', 'fast_food', 'cafe','bank','atm','pharmacy','bicycle_rental','fuel','pub','bar','car_sharing','car_rental','clinic','doctors','hospital','ice_cream','fountain','theatre','police','bus_station']

#Data Cleaning
amenities_data_clean = clean_amenities_data(amenities_data, amenities_required)
listings_data_clean = clean_listings_data(listings_data)


#Return a dict of amenities:
lat_input = 49.225164  # sample lat input
lon_input = -123.003742  # sample lon input
amen_dict = num_amenities(lat_input, lon_input, amenities_data_clean)

In [3]:
amen_dict

{'bank': 16,
 'bar': 16,
 'bus_station': 1,
 'cafe': 27,
 'car_rental': 1,
 'clinic': 8,
 'fast_food': 61,
 'fountain': 1,
 'fuel': 1,
 'ice_cream': 1,
 'pharmacy': 9,
 'police': 1,
 'pub': 1,
 'restaurant': 56}

In [4]:
listings_data_clean['accommodates'].max()

16

In [5]:
listings_data_clean.loc[listings_data_clean['accommodates'] == 17]

Unnamed: 0,id,listing_url,name,description,picture_url,latitude,longitude,property_type,accommodates,bedrooms,beds,amenities,price


In [6]:
listings_data_clean.empty

False

In [7]:
listings_data_clean = listings_data_clean.head(100)
listings_data_clean

Unnamed: 0,id,listing_url,name,description,picture_url,latitude,longitude,property_type,accommodates,bedrooms,beds,amenities,price
0,10080,https://www.airbnb.com/rooms/10080,D1 - Million Dollar View 2 BR,"Stunning two bedroom, two bathroom apartment. ...",https://a0.muscache.com/pictures/55778229/c2f7...,49.28872,-123.12046,Entire condominium,5,2.0,2.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$150.00
1,13358,https://www.airbnb.com/rooms/13358,Monthly (or Longer ) Designer One Bedroom Down...,<b>The space</b><br />This suites central loca...,https://a0.muscache.com/pictures/c23bb7ef-e300...,49.28201,-123.12669,Entire condominium,2,1.0,1.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$85.00
2,13490,https://www.airbnb.com/rooms/13490,Vancouver's best kept secret,This apartment rents for one month blocks of t...,https://a0.muscache.com/pictures/73394727/79d5...,49.25622,-123.06607,Entire apartment,2,1.0,1.0,"[""Iron"", ""Outdoor dining area"", ""Ethernet conn...",$145.00
3,14267,https://www.airbnb.com/rooms/14267,EcoLoft Vancouver,"The Ecoloft is located in the lovely, family r...",https://a0.muscache.com/pictures/3646de9b-934e...,49.24922,-123.08139,Entire house,4,1.0,2.0,"[""Iron"", ""Dryer"", ""Cooking basics"", ""Hot water...",$140.00
4,16254,https://www.airbnb.com/rooms/16254,Close to PNE/Hastings Park and East Village,2 Bedroom garden level guest suite.(lower leve...,https://a0.muscache.com/pictures/90623667/583c...,49.27721,-123.04086,Entire guest suite,4,2.0,3.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$195.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,944021,https://www.airbnb.com/rooms/944021,Modern Serenity - Heart of Yaletown/Downtown,"Whether you're in town for business, or lookin...",https://a0.muscache.com/pictures/miso/Hosting-...,49.27805,-123.11794,Entire condominium,2,1.0,1.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$130.00
96,958090,https://www.airbnb.com/rooms/958090,Private apartment-Yaletown Downtown,<b>The space</b><br />I have been an airbnb ho...,https://a0.muscache.com/pictures/14252294/5842...,49.27744,-123.11524,Entire apartment,4,,2.0,"[""Heating"", ""Kitchen"", ""Gym"", ""Hot tub"", ""Drye...",$499.00
97,961006,https://www.airbnb.com/rooms/961006,Oakridge Canada Line completely renovated home,Bright 2 BR garden suite in a Newly renovated ...,https://a0.muscache.com/pictures/d4f28caa-bb4d...,49.23569,-123.10987,Entire guest suite,4,2.0,2.0,"[""Iron"", ""Dryer"", ""Hot water"", ""Essentials"", ""...",$219.00
98,961831,https://www.airbnb.com/rooms/961831,Spacious Room in Character Home,Large bedroom in a very spacious heritage hous...,https://a0.muscache.com/pictures/miso/Hosting-...,49.26648,-123.15939,Private room in house,2,1.0,1.0,"[""Kitchen"", ""Essentials"", ""Iron"", ""Fire exting...",$55.00


In [8]:
listings_data_clean['num_amenities_nearby'] = listings_data_clean.apply(lambda x: num_amenities(x['latitude'], x['longitude'], amenities_data_clean), axis = 1)

In [9]:
listings_data_clean

Unnamed: 0,id,listing_url,name,description,picture_url,latitude,longitude,property_type,accommodates,bedrooms,beds,amenities,price,num_amenities_nearby
0,10080,https://www.airbnb.com/rooms/10080,D1 - Million Dollar View 2 BR,"Stunning two bedroom, two bathroom apartment. ...",https://a0.muscache.com/pictures/55778229/c2f7...,49.28872,-123.12046,Entire condominium,5,2.0,2.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$150.00,"{'atm': 1, 'bank': 21, 'bar': 9, 'bicycle_rent..."
1,13358,https://www.airbnb.com/rooms/13358,Monthly (or Longer ) Designer One Bedroom Down...,<b>The space</b><br />This suites central loca...,https://a0.muscache.com/pictures/c23bb7ef-e300...,49.28201,-123.12669,Entire condominium,2,1.0,1.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$85.00,"{'atm': 1, 'bank': 30, 'bar': 27, 'bicycle_ren..."
2,13490,https://www.airbnb.com/rooms/13490,Vancouver's best kept secret,This apartment rents for one month blocks of t...,https://a0.muscache.com/pictures/73394727/79d5...,49.25622,-123.06607,Entire apartment,2,1.0,1.0,"[""Iron"", ""Outdoor dining area"", ""Ethernet conn...",$145.00,"{'bank': 4, 'bicycle_rental': 7, 'cafe': 7, 'c..."
3,14267,https://www.airbnb.com/rooms/14267,EcoLoft Vancouver,"The Ecoloft is located in the lovely, family r...",https://a0.muscache.com/pictures/3646de9b-934e...,49.24922,-123.08139,Entire house,4,1.0,2.0,"[""Iron"", ""Dryer"", ""Cooking basics"", ""Hot water...",$140.00,"{'bank': 3, 'bicycle_rental': 2, 'cafe': 7, 'c..."
4,16254,https://www.airbnb.com/rooms/16254,Close to PNE/Hastings Park and East Village,2 Bedroom garden level guest suite.(lower leve...,https://a0.muscache.com/pictures/90623667/583c...,49.27721,-123.04086,Entire guest suite,4,2.0,3.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$195.00,"{'bank': 4, 'cafe': 6, 'doctors': 4, 'fast_foo..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,944021,https://www.airbnb.com/rooms/944021,Modern Serenity - Heart of Yaletown/Downtown,"Whether you're in town for business, or lookin...",https://a0.muscache.com/pictures/miso/Hosting-...,49.27805,-123.11794,Entire condominium,2,1.0,1.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$130.00,"{'atm': 1, 'bank': 29, 'bar': 28, 'bicycle_ren..."
96,958090,https://www.airbnb.com/rooms/958090,Private apartment-Yaletown Downtown,<b>The space</b><br />I have been an airbnb ho...,https://a0.muscache.com/pictures/14252294/5842...,49.27744,-123.11524,Entire apartment,4,,2.0,"[""Heating"", ""Kitchen"", ""Gym"", ""Hot tub"", ""Drye...",$499.00,"{'atm': 1, 'bank': 23, 'bar': 28, 'bicycle_ren..."
97,961006,https://www.airbnb.com/rooms/961006,Oakridge Canada Line completely renovated home,Bright 2 BR garden suite in a Newly renovated ...,https://a0.muscache.com/pictures/d4f28caa-bb4d...,49.23569,-123.10987,Entire guest suite,4,2.0,2.0,"[""Iron"", ""Dryer"", ""Hot water"", ""Essentials"", ""...",$219.00,"{'bank': 7, 'cafe': 5, 'fast_food': 1, 'fuel':..."
98,961831,https://www.airbnb.com/rooms/961831,Spacious Room in Character Home,Large bedroom in a very spacious heritage hous...,https://a0.muscache.com/pictures/miso/Hosting-...,49.26648,-123.15939,Private room in house,2,1.0,1.0,"[""Kitchen"", ""Essentials"", ""Iron"", ""Fire exting...",$55.00,"{'bank': 8, 'bicycle_rental': 9, 'cafe': 28, '..."


In [10]:
listings_data_clean['price'] = listings_data_clean['price'].apply(lambda x: float(x.replace('$','').replace(',','')))

In [11]:
listings_data_clean = listings_data_clean.loc[(listings_data_clean['price'] <= 150) & (listings_data_clean['accommodates'] >= 3) & (listings_data_clean['bedrooms'] >= 2)]

In [18]:
listings_data_clean.reset_index(drop = True, inplace = True)

In [38]:
listings_data_clean['len_dict'] = listings_data_clean['num_amenities_nearby'].apply(lambda x : len(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_data_clean['len_dict'] = listings_data_clean['num_amenities_nearby'].apply(lambda x : len(x))


In [39]:
listings_data_clean

Unnamed: 0,id,listing_url,name,description,picture_url,latitude,longitude,property_type,accommodates,bedrooms,beds,amenities,price,num_amenities_nearby,len_dict
0,10080,https://www.airbnb.com/rooms/10080,D1 - Million Dollar View 2 BR,"Stunning two bedroom, two bathroom apartment. ...",https://a0.muscache.com/pictures/55778229/c2f7...,49.28872,-123.12046,Entire condominium,5,2.0,2.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",150.0,"{'atm': 1, 'bank': 21, 'bar': 9, 'bicycle_rent...",15
1,16611,https://www.airbnb.com/rooms/16611,"1 block to skytrain station, shops,restaurant,...","My place is close to bank, coffee shops, groce...",https://a0.muscache.com/pictures/82101/7127b63...,49.26339,-123.07145,Entire house,6,3.0,4.0,"[""Heating"", ""Essentials"", ""Kitchen"", ""Iron"", ""...",100.0,"{'bank': 5, 'bar': 1, 'bicycle_rental': 17, 'c...",11
2,17158,https://www.airbnb.com/rooms/17158,Vancouver 4br 3ba house 20min to DT,A newly renovated comfy and cozy family home i...,https://a0.muscache.com/pictures/5ddc28c0-484f...,49.25277,-123.03774,Entire house,8,4.0,5.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",130.0,"{'bank': 2, 'cafe': 4, 'fast_food': 2, 'fuel':...",5
3,20085,https://www.airbnb.com/rooms/20085,Bright & Cheerful Uptown Garden Suite,"Welcome to our bright, cozy suite in the heart...",https://a0.muscache.com/pictures/miso/Hosting-...,49.25636,-123.08636,Entire guest suite,3,2.0,2.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",69.0,"{'bank': 1, 'bar': 1, 'bicycle_rental': 12, 'c...",10
4,35102,https://www.airbnb.com/rooms/35102,Downtown Yaletown Trendy 2 Bd/2Bth,Large and bright corner unit of a brick concre...,https://a0.muscache.com/pictures/784805/089b1a...,49.27743,-123.11869,Entire condominium,4,2.0,2.0,"[""Iron"", ""Hot tub"", ""Dryer"", ""Bed linens"", ""El...",132.0,"{'atm': 1, 'bank': 26, 'bar': 28, 'bicycle_ren...",16
5,111077,https://www.airbnb.com/rooms/111077,"Clean and Comfy, 2 Bd, Walk to the Drive",This suite is close to vibrant Commercial Driv...,https://a0.muscache.com/pictures/731699/0abd40...,49.26758,-123.06016,Entire guest suite,3,2.0,2.0,"[""Iron"", ""Dryer"", ""Cooking basics"", ""Hot water...",125.0,"{'bank': 6, 'bar': 2, 'bicycle_rental': 8, 'ca...",11
6,170137,https://www.airbnb.com/rooms/170137,Family friendly Point Grey Home,Family home suitable for short or long term st...,https://a0.muscache.com/pictures/1192106/f3f7b...,49.25757,-123.18966,Entire house,6,3.0,3.0,"[""Iron"", ""Dryer"", ""Baby safety gates"", ""Bed li...",150.0,"{'cafe': 5, 'fast_food': 2, 'pharmacy': 1, 'pu...",5
7,219261,https://www.airbnb.com/rooms/219261,A Real Gem in the Heart of Kits,"Modern looking, clean and comfortable renovate...",https://a0.muscache.com/pictures/37e3994b-4511...,49.2719,-123.15043,Entire apartment,4,2.0,2.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",135.0,"{'bank': 6, 'bicycle_rental': 16, 'cafe': 28, ...",12
8,248014,https://www.airbnb.com/rooms/248014,"Stunning views, comfort, location!!",Gorgeous views and fabulous location. Availabl...,https://a0.muscache.com/pictures/21342517/ad2c...,49.29103,-123.05782,Entire house,3,2.0,0.0,"[""Iron"", ""Dryer"", ""Cooking basics"", ""Hot water...",100.0,"{'bar': 1, 'bicycle_rental': 1, 'cafe': 3, 'fa...",7
9,311354,https://www.airbnb.com/rooms/311354,Close to downtown/shops/restaurants/skytrain,Prices depend on the length of the lease. 6 be...,https://a0.muscache.com/pictures/c5dd03a6-7835...,49.26328,-123.07086,Entire house,7,3.0,3.0,"[""Iron"", ""Dryer"", ""Cooking basics"", ""Hot water...",100.0,"{'bank': 5, 'bar': 1, 'bicycle_rental': 17, 'c...",11


In [37]:
len(listings_data_clean['num_amenities_nearby'][0])

15

In [23]:
my_dict =  listings_data_clean['num_amenities_nearby'][4]
my_dict

{'atm': 1,
 'bank': 26,
 'bar': 28,
 'bicycle_rental': 33,
 'cafe': 124,
 'car_rental': 5,
 'car_sharing': 4,
 'clinic': 5,
 'fast_food': 133,
 'fountain': 1,
 'ice_cream': 4,
 'pharmacy': 14,
 'police': 1,
 'pub': 26,
 'restaurant': 186,
 'theatre': 7}

In [34]:
my_dict['restaurant'] 

196

In [35]:
len(my_dict)

16

In [30]:
for key in my_dict:
    print(key)
    

atm
bank
bar
bicycle_rental
cafe
car_rental
car_sharing
clinic
fast_food
fountain
ice_cream
pharmacy
police
pub
restaurant
theatre


In [29]:
restaurant, cafe, atm,car_rental, ice_cream, bus_station, pub, pharmacy

KeyError: 'lol'

In [44]:
def ameneties_score(my_dict):
    
    num_different_amenities = len(my_dict)
    score = num_different_amenities * 10 
        
    for key in my_dict:
        if (my_dict[key] > 30):
            score+=30
        else:
            score+= my_dict[key]
    return score

In [45]:
ameneties_score(my_dict)

402