# AirBNB Price Analyis

1. Webscrape AirBNB page to find absolute location of stay
2. Convert absolute location to address
4. Find estimated rent of the AirBNB
5. Infer actual cost of living in the AirBNB unit with expenses
6. Compare AirBNB price with estimated price, analyze the AirBNB premium

### Imports

In [235]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.options import Options
import pandas as pd
import numpy as np
import requests
import json
import time

### Step 1: Webscraping AirBNB Rental

In [237]:
def getNumbers(w):
    nums = ""
    for c in w:
        if (c.isnumeric()):
            nums += c
    return (w if nums == "" else int(nums))
state_rent_data = pd.read_csv("state_rent_data.csv")
state_rent_data.head()

Unnamed: 0,state,MedianRent,averageRentZillow
0,Hawaii,1651,2850.0
1,District of Columbia,1607,
2,California,1586,3000.0
3,Maryland,1415,1844.0
4,New Jersey,1368,2200.0


In [300]:
options = Options()
#options.add_argument("-headless")
driver = webdriver.Firefox(options=options)

In [304]:
listing_url = "https://www.airbnb.com/rooms/633490307738150521"
driver.get(listing_url)
time.sleep(5)
info = driver.find_element("class name","lgx66tx").text.split("·")
price = getNumbers(driver.find_elements("class name", "_tyxjp1")[1].text.strip())
map_link, scrollDownHeight = None, 0
while not map_link:
    try:
        map_link = driver.find_element("xpath","/html/body/div[5]/div/div/div[1]/div/div[2]/div/div/div/div[1]/main/div/div[1]/div[5]/div/div/div/div[2]/section/div[3]/div[4]/div[2]/div/div/div[16]/div/div[5]/div[2]/a").get_attribute("href")
    except:
        scrollDownHeight += 20
        driver.execute_script("window.scroll(0, " + str(scrollDownHeight) + ");")
info = [x.strip() for x in info]
numGuests, numBr, numBed = (getNumbers(x) for x in info[0:3])
numBath = int(np.ceil(float(info[3].split()[0])))
lat, lng, spec_char_count = "", "", 0
for c in map_link:
    if(c == "@" or c == ","):
        spec_char_count += 1
    elif(spec_char_count == 1):
        lat += c
    elif(spec_char_count == 2):
        lng += c
    elif(spec_char_count == 3):
        break
print("$" + str(price), "per night")
print(str(numBr) + (" Bedroom, " if numBr == 1 else (" Bedrooms, " if isinstance(numBr,int) else ", ")) + str(numBath) + (" Bathroom" if numBath == 1 else " Bathrooms"))
print(lat,lng,sep = ", ")

$125 per night
1 Bedroom, 1 Bathroom
38.95735, -77.41617


### Step 2. Getting AirBNB Address

In [305]:
api_token = "AAPK3a1b443d322b4817bcf976c67026e67aY3eBBi031akgsnQKM7K7YG2EfSTflxsLFa-iuS81KbBhqpVKs8PnIpZiiKTTmuRd"
loc_url = "https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/reverseGeocode?f=pjson&location=" + lng + "," + lat + "&token=" + api_token 
response = requests.get(loc_url)
address_json = response.json()
addNum = address_json["address"]["AddNum"]
street = (address_json["address"]["Address"] if addNum == "" else address_json["address"]["Address"][(len(addNum) + 1):])
city = address_json["address"]["City"]
state = address_json["address"]["RegionAbbr"]
zipcode = address_json["address"]["Postal"]
street_address = ((street + ", ") if street != "" else "") + city + ", " + state + " " + zipcode
print((street_address if addNum == "" else addNum + " " + street_address))

2323 Dulles Station Blvd, Herndon, VA 20171


### Step 3. Getting Associated Estimated Rent

In [306]:
driver.get("https://app.rentcast.io/app")
elems = {"Text":[], "Element":[]}
for elem in driver.find_elements("class name", "dropdown-item"):
    elems[elem.get_attribute("textContent")] = elem
driver.find_element("class name", "form-control").send_keys(address)
driver.find_elements("class name", "dropdown-toggle")[3].click()
elems[" Apartment "].click()
driver.find_elements("class name", "dropdown-toggle")[4].click()
bedrooms = ""
elems[" " + (((str(numBr) if numBr < 6 else "6+") + (" Bed" if numBr == 1 else " Beds")) if isinstance(numBr, int) else numBr) + " "].click()
driver.find_elements("class name", "dropdown-toggle")[5].click()
elems[" " + (str(numBath) if numBath < 4 else "4+") + (" Bath" if numBath == 1 else " Baths") + " "].click()
driver.find_element("class name", "btn-primary").click()
time.sleep(3)
testAddresses = []
if (addNum != ""):
    for i in range(4):
        x = int(np.ceil((i+1)/2) * pow(-1,i))
        new_addNum = int(addNum) + x
        testAddresses.append(str(new_addNum) + " " + street_address)
testAddresses.append(street_address)
testAddresses.append(city + ", " + state + " " + zipcode)
rentEst = None
for test_address in testAddresses:
    if (rentEst):
        break
    try:
        rentEst = getNumbers(driver.find_element("class name", "display-3").get_attribute("textContent"))
    except:
        driver.find_element("class name", "form-control").clear()
        driver.find_element("class name", "form-control").send_keys(test_address)
        driver.find_element("class name", "btn-primary").click()
        time.sleep(len(testAddresses) + 2)
remoteLocation = False
if (not rentEst):
    rentEst = state_rent_data.loc[state_rent_data["state"] == address_json["address"]["Region"]].iloc[0,1]
    remoteLocation = True
print(rentEst)

2190


### 4. Calculate Stay Premium and Host Profits


In [307]:
utl_avg = 429.33 # from https://www.forbes.com/home-improvement/living/monthly-utility-costs-by-state/
dayRentEst = round(((rentEst + utl_avg) * 12 / 365.25),2)
premiumPerc = round(price/dayRentEst * 100)
hostProfit = round(price - dayRentEst,2)
print("Your daily price: $" + str(price))
print("Estimated daily host costs: $" + str(dayRentEst))
print("Estimated stay premium: " + str(premiumPerc) + "%")
print("Estimated daily host profit: $" + str(hostProfit))
if (remoteLocation):
    print("Remote location detected: estimates likely to be inaccurate")

Your daily price: $125
Estimated daily host costs: $86.06
Estimated stay premium: 145%
Estimated daily host profit: $38.94
