## Parsing New York City Apartment Listing

In [1]:
import requests

In [2]:
from bs4 import BeautifulSoup

In [3]:
r = requests.get("https://newyork.craigslist.org/search/aap")

In [4]:
r.raise_for_status()

In [6]:
soup = BeautifulSoup(r.text, 'html.parser')

In [7]:
price_spans = soup.select('span .result-price')

In [8]:
prices = [int(span.text[1:]) for span in price_spans]

In [9]:
print(prices)

[2950, 1799, 1845, 1950, 1050, 3002, 3400, 2300, 3667, 2550, 1900, 2300, 3300, 4498, 2411, 2100, 3595, 2690, 2567, 1500, 1800, 4925, 3949, 4100, 2690, 2800, 2350, 2700, 4349, 3299, 2097, 1350, 1850, 1800, 2300, 3950, 1450, 2890, 1050, 3500, 1990, 2850, 2429, 3350, 2360, 950, 2138, 2299, 3050, 2400, 2145, 2699, 4079, 3799, 3000, 3200, 3667, 4200, 2585, 1050, 1725, 2300, 3800, 2250, 1750, 3388, 1800, 1504, 2862, 2659, 3400, 2954, 3000, 2700, 2700, 2395, 2500, 2700, 2860, 2040, 1849, 4200, 2694, 1300, 3500, 1500, 1000, 2550, 3900, 1000, 3290, 2350, 1700, 3300, 800, 2801, 2699, 800, 2900, 4399, 1600, 1600, 2850, 2900, 1600, 2400, 1900, 2400, 2300, 1700, 1900, 2000, 2450, 1600, 2100, 1900, 2450, 1800, 3077, 1800]


In [12]:
print("Highest price: ${}".format(max(prices)))

Highest price: $4925


In [13]:
print("Minimum price: ${}".format(min(prices)))

Minimum price: $800


In [14]:
print("Average price: ${}".format(sum(prices)/len(prices)))

Average price: $2524.6583333333333


## Parsing Canadian Cities Apartment Listings

In [59]:
r = requests.get("https://www.craigslist.org/about/sites")

In [60]:
r.raise_for_status()

In [61]:
soup = BeautifulSoup(r.text, 'html.parser')

In [62]:
ca_heading = soup.find('a', attrs={'name' : 'CA'} ).find_parent('h1')
ca_content = ca_heading.find_next_sibling('div')

In [63]:
pr_headings = ca_content.findAll('h4')
cities = []

In [64]:
for heading in pr_headings:
    ul = heading.find_next_sibling('ul')
    links = ul.findChildren('a')
    
    for link in links:
        cities.append({
            
            'province' : heading.text,
            'city' : link.text,
            'url' : link.attrs['href']
            
        })

In [65]:
print(urls[0:1])

[{'province': 'Alberta', 'city': 'calgary', 'url': 'https://calgary.craigslist.ca/'}]


In [66]:
for city in cities:
    r = requests.get("{}/search/apa".format(city['url']))
    r.raise_for_status
    soup = BeautifulSoup(r.text, 'html.parser')
    price_spans = soup.select('span.result-price')
    prices = [int(span.text[1:]) for span in price_spans]
    city['prices'] = filter(lambda x: 100 < x < 10000, prices)
    

In [67]:
print(cities[1])

{'province': 'Alberta', 'city': 'edmonton', 'url': 'https://edmonton.craigslist.ca/', 'prices': <filter object at 0x000000ADB6C6C278>}


In [68]:
absmax = max(cities, key=lambda x: max(x['prices']))

In [69]:
print(absmax)

{'province': 'British Columbia', 'city': 'cariboo', 'url': 'https://cariboo.craigslist.ca/', 'prices': <filter object at 0x000000ADB5E06320>}


In [70]:
print("Highest price in Canda: ${} found in {}, {} ".format(max(absmax['prices']), absmax['city'], absmax['province']))

ValueError: max() arg is an empty sequence

In [33]:
absmin = min(cities, key=lambda x: min(x['prices']))

In [37]:
print("Lowest price in Canda: ${} found in {}, {} ".format(max(absmin['prices']), absmin['city'], absmin['province']))

Lowest price in Canda: $3200 found in peace river country, Alberta 


In [40]:
maxavg = max(cities, key=lambda x: sum(x['prices'])/len(x['prices']))

In [43]:
print("Highest average price: ${}, found in {}, {}".format(sum(maxavg['prices'])/len(['prices']), maxavg['city'], maxavg['province']))

Highest average price: $248232443.0, found in comox valley, British Columbia


In [44]:
minavg = min(cities, key=lambda x: sum(x['prices'])/len(x['prices']))

In [45]:
print("Lowest average price: ${}, found in {}, {}".format(sum(minavg['prices'])/len(['prices']), minavg['city'], minavg['province']))

Lowest average price: $700.0, found in whitehorse, Yukon Territory
