# Craiglist Rent Statistics Generator

Written by Kivalu Ramanlal

In [1]:
from bs4 import BeautifulSoup
from requests import get
import pandas as pd
import itertools, re
import sys
from fake_useragent import UserAgent

In [2]:
def get_apartment_stats(zipcode, header, num_bedrooms=0):
    """
    @param zipcode : (int) zipcode for location of interest
    @param header : (dict) header for python request
    @param num_bedrooms: (int) filter stats by desired number of bedrooms
                         the value is 0 if you want repartments with any number of bedrooms
    
    @return : (pd.DataFrame) with summary stats for price, area, bedrooms
    """
    url = "https://honolulu.craigslist.org/search/apa?postal={}&availabilityMode=0&sale_date=all+dates".format(zipcode)
    response = get(url, header)
    html_soup = BeautifulSoup(response.text, 'html.parser')
    apartment_containers = html_soup.find_all('div', class_ = 'result-info')

    df = pd.DataFrame(columns = ['name', 'price', 'bedrooms', 'area', 'location'])
    
    for apartment in apartment_containers:
        name = apartment.find("h3", class_ = "result-heading").text.strip()    
        price = float("".join(ch for ch in apartment.find("span", class_ = "result-price").text if ch.isnumeric()))
        housing_info = apartment.find("span", class_ = "housing")
        if housing_info != None:
            housing_info = housing_info.text
            bedrooms = re.findall(r"([0-9])\s*(?:br|bed\s*room)", housing_info)
            if len(bedrooms) > 0:
                bedrooms = int(bedrooms[0])
            else:
                bedrooms = None
            area = re.findall(r"([0-9]+[0-9,]*(?:\.[0-9]{2})?)\s*(?:ft|feet|(?:sq|square)\s*(?:ft|feet)?)", housing_info)
            if len(area) > 0:
                #print("area:", area)
                area = float(re.sub(r",", "", area[0]))
            else:
                area = None
        location = apartment.find("span", class_="result-hood")
        if location != None:
            location = re.sub("[^a-zA-Z0-9 ]", "", location.text).strip()
        if price != None and price > 200:
            df = df.append({'name' : name, 'price' : price, 'bedrooms' : bedrooms,
                            'area' : area, 'location' : location.upper() if location else None}, 
                            ignore_index = True)
            
    filtered = df if num_bedrooms == 0 else df.loc[df['bedrooms'] == num_bedrooms]
    print("# Houses: {}".format(len(filtered)))
    return filtered.agg({"price": ["min", "median", "mean", "max", "std"],
                         "bedrooms": ["min", "median", "mean", "max", "std"],
                         "area": ["min", "median", "mean", "max", "std"],
                        })

In [3]:
ua = UserAgent()
header = {'User Agent' : str(ua.chrome)}

## Kailua Apartment Statistics
(2-bedroom apartments only)

In [4]:
get_apartment_stats(zipcode=96734, header=header, num_bedrooms=2)

# Houses: 17


Unnamed: 0,price,bedrooms,area
min,1500.0,2.0,700.0
median,2800.0,2.0,1000.0
mean,3029.411765,2.0,1071.384615
max,4950.0,2.0,1800.0
std,1007.803742,0.0,340.510044


## Kaneohe Apartment Statistics
(2-bedroom apartments only)

In [5]:
get_apartment_stats(zipcode=96744, header=header, num_bedrooms=2)

# Houses: 13


Unnamed: 0,price,bedrooms,area
min,1400.0,2.0,532.0
median,2085.0,2.0,784.0
mean,2067.307692,2.0,791.727273
max,2500.0,2.0,1000.0
std,321.666335,0.0,158.213837


## Check Any Zipcode

Run the cell below and enter a zipcode, along with number of number of bedrooms you want to filter homes by to check prices for any zipcode. 

Note: To run the cell below, click on it and press "shift+enter"

In [None]:
new_zipcode = int(input("Enter zipcode: "))
bedrooms = int(input("Enter desired # of bedrooms (type 0 to search for any number): "))
get_apartment_stats(zipcode=new_zipcode, header=header, num_bedrooms=bedrooms)