In [5]:
import numpy as np
import pandas as pd

In [15]:
features = pd.read_csv('../data/processed/cancun_filtered_features.csv')
filters = pd.read_csv('../data/processed/cancun_filters.csv')

In [16]:
filters.head(1)

Unnamed: 0,bathrooms,bedrooms,num_beds,can_instant_book,is_fully_refundable,is_superhost,is_new_listing,room_type_category,person_capacity,lat,lng,price,url,listing_key,price_in_usd
0,1.0,1.0,1,True,True,True,False,private_room,2,21.13566,-86.76741,22,https://www.airbnb.com/rooms/20776319,20776319,16.541353


In [17]:
features.head(1)

Unnamed: 0,guest_satisfication,host_reviews,monthly_price_factor,picture_count,price,response_rate,reviews_count,weekly_price_factor,listing_key
0,95,311,0.88,19,22,100,50,0.88,20776319


In [27]:
# First deal with filters
filtered_listings = (
    filters
    .query('price_in_usd > 150 & price_in_usd < 200')
    .query('num_beds >= 4')
    .query('person_capacity >= 4')
)

In [28]:
filtered_listings.shape

(172, 15)

In [24]:
# Second get preferences from user
DEFAULT_WEIGHTINGS = {
    'guest_satisfication': 1,
    'host_reviews': 1,
    'reviews_count': 1,
    'response_rate': 1
}
FINAL_WEIGHTS = {k: v / len(DEFAULT_WEIGHTINGS) for k, v in DEFAULT_WEIGHTINGS.items()}
FINAL_WEIGHTS

{'guest_satisfication': 0.25,
 'host_reviews': 0.25,
 'response_rate': 0.25,
 'reviews_count': 0.25}

In [29]:
filtered_listings = pd.merge(filtered_listings, features, on='listing_key')
filtered_listings.shape

(172, 23)

In [34]:
# Create weightings (assume equal)
FINAL_WEIGHTS = {
    'guest_satisfication': 0.25,
    'host_reviews': 0.25,
    'reviews_count': 0.25,
    'response_rate': 0.25
}

ranked_features = pd.DataFrame()
# Compute Percentile Rank
ranked_features = ranked_features.assign(guest_satisfication_rank=filtered_listings['guest_satisfication'].rank(pct=True),
                                         host_reviews_rank=filtered_listings['host_reviews'].rank(pct=True),
                                         reviews_count_rank=filtered_listings['reviews_count'].rank(pct=True),
                                         response_rate_rank=filtered_listings['response_rate'].rank(pct=True),
                                         listing_key=filtered_listings['listing_key'],
                                        )
# Add up Percentile Rank * Column Weightings
ranked_features = ranked_features.assign(
    cumulative_score=ranked_features['guest_satisfication_rank'] * FINAL_WEIGHTS.get('guest_satisfication') +
                     ranked_features['host_reviews_rank'] * FINAL_WEIGHTS.get('host_reviews') +
                     ranked_features['reviews_count_rank'] * FINAL_WEIGHTS.get('reviews_count') +
                     ranked_features['response_rate_rank'] * FINAL_WEIGHTS.get('response_rate'))

# Grab the listing_key and cumulative_score columns
rankings = (
    ranked_features
    .sort_values(by='cumulative_score', ascending=False)
    .loc[:, ['cumulative_score', 'listing_key']]
)

In [37]:
# Reset the index 
rankings = rankings.reset_index()

In [39]:
rankings.head()

Unnamed: 0,index,cumulative_score,listing_key
0,140,0.837936,18977822
1,139,0.820494,17855607
2,102,0.802326,10823831
3,122,0.789244,29510479
4,71,0.763081,3575602


In [38]:
rankings[rankings.listing_key == 10823831]

Unnamed: 0,index,cumulative_score,listing_key
2,102,0.802326,10823831
