# Download

In [1]:
import time
import requests
import pandas as pd
from bs4 import BeautifulSoup
from dateutil.parser import parse as dateparse

ModuleNotFoundError: No module named 'requests'

In [3]:
buildings_dict = {
    28: {"name": "Promenade West", "region": "Bunker Hill"},
    27: {"name": "Promenade", "region": "Bunker Hill"},
    14: {"name": "Bunker Hill Tower", "region": "Bunker Hill"},
    36: {"name": "Douglas", "region": "Historic Core"},
    38: {"name": "El Dorado", "region": "Historic Core"},
    39: {"name": "Higgins", "region": "Historic Core"},
    47: {"name": "Pan American", "region": "Historic Core"},
    59: {"name": "Shy Barry Grand", "region": "Historic Core"},
    57: {"name": "The Rowan", "region": "Historic Core"},
    34: {"name": "655 Hope", "region": "Financial District"},
    35: {"name": "Library Court", "region": "Financial District"},    
    87: {"name": "Roosevelt", "region": "Financial District"},
    29: {"name": "110 Wilshire", "region": "City West"},
    30: {"name": "Vero", "region": "City West"},
    60: {"name": "Bartlett", "region": "Jewelry District"},
    62: {"name": "Brockman", "region": "Jewelry District"},
    76: {"name": "Little Tokyo Lofts", "region": "Little Tokyo"},
    80: {"name": "Teramachi", "region": "Little Tokyo"},
    128: {"name": "Tokyo Villa", "region": "Little Tokyo"},
    17: {"name": "940 E. 2nd Street", "region": "Arts District"},
    20: {"name": "Barker Block Warehouse", "region": "Arts District"},
    18: {"name": "Barker Block Lofts", "region": "Arts District"},
    21: {"name": "Beacon Lofts", "region": "Arts District"},
    22: {"name": "Biscuit Company Lofts", "region": "Arts District"},
    63: {"name": "Gallery Lofts", "region": "Arts District"},
    50: {"name": "Molino Street Lofts", "region": "Arts District"},
    127: {"name": "Mura", "region": "Arts District"},
    24: {"name": "Savoy", "region": "Arts District"},
    25: {"name": "Toy Factory", "region": "Arts District"},
    26: {"name": "Toy Warehouse", "region": "Arts District"},
    31: {"name": "Santee Village: Cornell", "region": "Fashion District"},
    64: {"name": "Santee Village: Eckardt", "region": "Fashion District"},
    32: {"name": "Textile Building", "region": "Fashion District"},
    33: {"name": "Tomahawk Lofts", "region": "Fashion District"},
    92: {"name": "Atelier", "region": "L.A. Live"},
    94: {"name": "Circa", "region": "L.A. Live"},
    65: {"name": "Concerto Lofts", "region": "L.A. Live"},
    37: {"name": "Eastern Columbia", "region": "L.A. Live"},
    66: {"name": "Elleven Lofts", "region": "L.A. Live"},
    67: {"name": "Evo", "region": "L.A. Live"},
    68: {"name": "Flower Street Lofts", "region": "L.A. Live"},
    69: {"name": "Grand Lofts", "region": "L.A. Live"},
    82: {"name": "Level", "region": "L.A. Live"},
    70: {"name": "Luma Lofts", "region": "L.A. Live"},
    71: {"name": "Market Lofts", "region": "L.A. Live"},
    72: {"name": "Metropolis Tower I", "region": "L.A. Live"},
    83: {"name": "Metropolis Tower II", "region": "L.A. Live"},
    93: {"name": "Park Hyatt", "region": "L.A. Live"},
    74: {"name": "Ritz Carlton", "region": "L.A. Live"},
    73: {"name": "Sky Lofts", "region": "L.A. Live"},
    85: {"name": "Ten50", "region": "L.A. Live"},
    75: {"name": "Skyline", "region": "L.A. Live"},
    81: {"name": "WaterMarke", "region": "L.A. Live"},
}

In [11]:
building_list = [
    dict(model="buildings.Building", pk=k, fields=dict(name=v['name'], region=v['region']))
    for k, v in buildings_dict.items()
]

In [14]:
import json

In [16]:
print(json.dumps(building_list, indent=4))

[
    {
        "model": "buildings.Building",
        "pk": 28,
        "fields": {
            "name": "Promenade West",
            "region": "Bunker Hill"
        }
    },
    {
        "model": "buildings.Building",
        "pk": 27,
        "fields": {
            "name": "Promenade",
            "region": "Bunker Hill"
        }
    },
    {
        "model": "buildings.Building",
        "pk": 14,
        "fields": {
            "name": "Bunker Hill Tower",
            "region": "Bunker Hill"
        }
    },
    {
        "model": "buildings.Building",
        "pk": 36,
        "fields": {
            "name": "Douglas",
            "region": "Historic Core"
        }
    },
    {
        "model": "buildings.Building",
        "pk": 38,
        "fields": {
            "name": "El Dorado",
            "region": "Historic Core"
        }
    },
    {
        "model": "buildings.Building",
        "pk": 39,
        "fields": {
            "name": "Higgins",
            "region": "H

In [3]:
def safe_price(value):
    return int(value.replace("$", "").replace(",", ""))

def safe_beds(value):
    return int(value.split("/")[0].replace("BR", ""))

def safe_baths(value):
    return int(value.split("/")[1].replace("BATHS", "").replace(",", ""))

def safe_sqft(value):
    return int(value.replace(",", ""))

def parse_li(li):
    parts = li.a.text.split()
    return dict(
        url=li.a['href'],
        unit=parts[1].strip(),
        price=safe_price(parts[3]),
        date=dateparse(parts[4]),
        bedrooms=safe_beds(parts[5]),
        bathrooms=safe_baths(parts[5]),
        square_feet=safe_sqft(parts[6])
    )

In [4]:
def request_building(number, listing_type="Listing", months=240):
    print("Requesting {}".format(buildings_dict[number]['name']))
    url = "https://www.dlxco.com/property/getproperty/name/R-{}/value/{}/building/{}".format(
        listing_type,
        months,
        number
    )
    payload = dict(building_url="the-promenade-west-lofts-condos-for-sale-lease-downtown-losangeles")
    r = requests.post(url, data=payload)
    soup = BeautifulSoup(r.text, "html5lib")
    row_list = []
    for li in soup.find_all("li", {"class": "leased-prop"}):
        try:
            row_list.append(parse_li(li))
        except Exception:
            print("Count not parse\n {}".format(li))
    df = pd.DataFrame(row_list)
    df['building_number'] = number
    df['building_name'] = buildings_dict[number]['name']
    df['region'] = buildings_dict[number]['region']
    time.sleep(1)
    return df

In [5]:
df = pd.concat([request_building(number) for number in buildings_dict.keys()])

Requesting Bunker Hill Tower
Requesting 940 E. 2nd Street
Requesting Barker Block Lofts
Requesting Barker Block Warehouse
Requesting Beacon Lofts
Requesting Biscuit Company Lofts
Requesting Savoy
Requesting Toy Factory
Requesting Toy Warehouse
Requesting Promenade
Count not parse
 <li class="leased-prop 430000"><a href="https://www.dlxco.com/the-promenade-west-lofts-condos-for-sale-lease-downtown-losangeles?mlsno=316009980IT">Unit  - $430,000, 02/28/2017, 1BR/1BATHS, 793 SQFT                                <span style="color: red; font-size: 10px; padding-left: 9px; font-weight: 700;"></span>

                            </a></li>
Count not parse
 <li class="leased-prop 450000"><a href="https://www.dlxco.com/the-promenade-west-lofts-condos-for-sale-lease-downtown-losangeles?mlsno=315004139IT">Unit  - $450,000, 12/07/2015, 1BR/1BATHS, 793 SQFT                                <span style="color: red; font-size: 10px; padding-left: 9px; font-weight: 700;"></span>

                         

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [6]:
df['price_per_square_foot'] = df['price'] / df['square_feet']

In [7]:
df['year'] = df['date'].dt.year

In [8]:
df = df[df.year >= 2011]

In [9]:
df.sort_values(["building_name", "date"], ascending=[True, False]).to_csv("./_data/sales.csv", index=False)