# Download

In [96]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from dateutil.parser import parse as dateparse

In [181]:
buildings_dict = {
    28: {"name": "Promenade West", "region": "Bunker Hill"},
    27: {"name": "Promenade", "region": "Bunker Hill"},
    14: {"name": "Bunker Hill Tower", "region": "Bunker Hill"},
    36: {"name": "Douglas", "region": "Historic Core"},
    38: {"name": "El Dorado", "region": "Historic Core"},
    39: {"name": "Higgins", "region": "Historic Core"},
    47: {"name": "Pan American", "region": "Historic Core"},
    59: {"name": "Shy Barry Grand", "region": "Historic Core"},
    57: {"name": "The Rowan", "region": "Historic Core"},
    34: {"name": "655 Hope", "region": "Financial District"},
    35: {"name": "Library Court", "region": "Financial District"},    
    87: {"name": "Roosevelt", "region": "Financial District"},
    29: {"name": "110 Wilshire", "region": "City West"},
    30: {"name": "Vero", "region": "City West"},
    60: {"name": "Bartlett", "region": "Jewelry District"},
    62: {"name": "Brockman", "region": "Jewelry District"},
}

In [182]:
def safe_price(value):
    return int(value.replace("$", "").replace(",", ""))

def safe_beds(value):
    return int(value.split("/")[0].replace("BR", ""))

def safe_baths(value):
    return int(value.split("/")[1].replace("BATHS", "").replace(",", ""))

def safe_sqft(value):
    return int(value.replace(",", ""))

def parse_li(li):
    parts = li.a.text.split()
    return dict(
        url=li.a['href'],
        unit=parts[1].strip(),
        price=safe_price(parts[3]),
        date=dateparse(parts[4]),
        bedrooms=safe_beds(parts[5]),
        bathrooms=safe_baths(parts[5]),
        square_feet=safe_sqft(parts[6])
    )

In [183]:
def request_building(number, listing_type="Listing", months=240):
    print("Requesting {}".format(buildings_dict[number]['name']))
    url = "https://www.dlxco.com/property/getproperty/name/R-{}/value/{}/building/{}".format(
        listing_type,
        months,
        number
    )
    payload = dict(building_url="the-promenade-west-lofts-condos-for-sale-lease-downtown-losangeles")
    r = requests.post(url, data=payload)
    soup = BeautifulSoup(r.text, "html5lib")
    row_list = []
    for li in soup.find_all("li", {"class": "leased-prop"}):
        try:
            row_list.append(parse_li(li))
        except Exception:
            print("Count not parse\n {}".format(li))
    df = pd.DataFrame(row_list)
    df['building_number'] = number
    df['building_name'] = buildings_dict[number]['name']
    df['region'] = buildings_dict[number]['region']
    return df

In [184]:
df = pd.concat([request_building(number) for number in buildings_dict.keys()])

Requesting 655 Hope
Requesting Shy Barry Grand
Requesting Douglas
Count not parse
 <li class="leased-prop 715000"><a href="https://www.dlxco.com/the-promenade-west-lofts-condos-for-sale-lease-downtown-losangeles?mlsno=22180120IT">Unit PH C - $715,000, 04/04/2014, 1BR/3BATHS, 1,387 SQFT                                <span style="color: red; font-size: 10px; padding-left: 9px; font-weight: 700;"></span>

                            </a></li>
Count not parse
 <li class="leased-prop 592000"><a href="https://www.dlxco.com/the-promenade-west-lofts-condos-for-sale-lease-downtown-losangeles?mlsno=12-631137">Unit PH B - $592,000, 12/14/2012, 2BR/2BATHS, 1,370 SQFT                                <span style="color: red; font-size: 10px; padding-left: 9px; font-weight: 700;"></span>

                            </a></li>
Requesting El Dorado
Requesting Higgins
Requesting Bartlett
Requesting Bunker Hill Tower
Requesting Pan American
Requesting Library Court
Requesting Brockman
Requesting Roosevel

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [185]:
df['price_per_square_foot'] = df['price'] / df['square_feet']

In [180]:
df.sort_values(["building_name", "date"], ascending=[True, False]).to_csv("./_data/sales.csv", index=False)