In [1]:
import numpy as np
import pandas as pd

#### Read [Airbnb listings dataset for Washington DC](http://data.insideairbnb.com/united-states/dc/washington-dc/2015-10-03/data/listings.csv.gz) into a Pandas DataFrame

In [2]:
dc_listings = pd.read_csv("dc_airbnb.csv")

#### Compute the `distance` for a single feature, the number of people accommodated by a listing, compared against a hypothetical listing which accommodates 3 people.

In [3]:
dc_listings["distance"] = dc_listings["accommodates"].apply(lambda x : abs(3 - x))

#### Randomize the index and sort by `distance`

In [4]:
np.random.seed(1)
dc_listings = dc_listings.loc[np.random.permutation(dc_listings.index)]
dc_listings.sort_values("distance", inplace=True)

#### Clean the `price` column, and convert its type from string to float, and find the mean price from the top five rows with `distance == 0`

In [8]:
def clean_price(price):
    return price.replace(',', '').replace('$', '')

#dc_listings["price"] = dc_listings["price"].apply(clean_price).astype(float)
mean_price = np.mean(dc_listings["price"][:5])
print("Average price for listings with same accomodation: {price}".format(price=mean_price))

Average price for listings with same accomodation: 156.6


#### Using the above approach we can create a general function for predicting the best price for a listing based on the number of people it accommodates:

In [9]:
def predict_price(sleepers, listings):
    """
    Predict an optimum listing price based on the number of people the listing accommodates (the feature), 
    using a dataset of existing listings as the training dataset.
    
    :param sleepers the number of people the listing accommodates
    :param listings DataFrame containing listings for the same market
    :return optimal price based on the above parameters
    """

    df = listings.copy()
    
    # get the "distance" based on number of people the listing accommodates
    df["distance"] = df["accommodates"].apply(lambda x : abs(sleepers - x))
    
    # sort into ascending order (lowest distances first)
    df.sort_values(by="distance", inplace=True)
    
    # return the average price of the top five (randomized) rows 
    return np.mean(df["price"][:5])


In [12]:
# show the best prices for listings which accommodate 1 - 6 people 
for x in range(1, 7):
    print("Optimal price for listing which accommodates {peeps} people:  ${price:.2f}".format(peeps=x, 
                                                                                              price=predict_price(x, dc_listings)))

Optimal price for listing which accommodates 1 people:  $78.80
Optimal price for listing which accommodates 2 people:  $126.00
Optimal price for listing which accommodates 3 people:  $194.80
Optimal price for listing which accommodates 4 people:  $197.60
Optimal price for listing which accommodates 5 people:  $183.20
Optimal price for listing which accommodates 6 people:  $186.00
