In [None]:
from typing import List

from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
from bs4.element import Tag

def create_url(zipcode: int, sort_by: str=None, verbose: bool=False) -> str:
    """
    Generates a URL to the Zillow properties listed
    for the given zipcode.
    
    Parameters
    ---------
    zipcode : int
        The zipcode to generate the URL for
        
    sort_by : str, optional; default = None; options: {
    'high-low', 'low-high', 'newest', 'bedrooms', 'bathrooms',
    'house-size', 'lot-size', 'zest-high-low', 'zest-low-high'
    }
        Sort the listings by the given filter.
        
    Returns
    -------
    url : str
    """
    sort_options = {
        'high-low': 'priced_sort',
        'low-high': 'pricea_sort',
        'newest': 'days_sort',
        'bedrooms': 'beds_sort',
        'bathrooms': 'baths_sort',
        'house-size': 'size_sort',
        'lot-size': 'lot_sort',
        'zest-high-low': 'zest_sort',
        'zest-low-high': 'zesta_sort'
    }
    
    option = sort_options.get(sort_by, 'globalrelevanceex_sort')
    url = f'https://www.zillow.com/homes/for_sale/{zipcode}/0_singlestory/{option}'

    if verbose:
        print(url)

    return url

def get_zpids(zipcode: int, sort_by: str=None, verbose: bool=False) -> List[int]:
    """
    Extracts all Zillow Property ID's (zpid) for each listing
    in the given zipcode.
    
    Parameters
    ---------
    zipcode : int
        The zipcode to retrieve the ZPID's for
        
    sort_by : str, optional; default = None; options: {
        'high-low', 'low-high', 'newest', 'bedrooms', 'bathrooms',
        'house-size', 'lot-size', 'zest-high-low', 'zest-low-high'
    }
        Sort the listings by the given filter.
        
    Returns
    -------
    zpids : List[int]
    
    """
    url = create_url(zipcode, sort_by, verbose)
    # TODO replace with get_headers()
    headers = {'User-Agent': 'Mozilla/5.0'}

    req = Request(url, headers=headers)
    webpage = urlopen(req).read()
    # parse the HTML into a tree
    soup = BeautifulSoup(webpage)
    
    listings = soup.find(
        'ul', 
        {'class': 'photo-cards photo-cards_wow photo-cards_short'}
    )
    
    def _extract_zpid(li: Tag) -> int:
        article = li.find(
            'article',
            {'class': 'list-card list-card-short list-card_not-saved'}
        )
        if article is None:
            return None
        
        return int(article.get('id').replace('zpid_', ''))

    zpids = []
    for li in listings.find_all('li'):
        zpid = _extract_zpid(li)
        if zpid is not None:
            zpids.append(zpid)
        
    return zpids
    

# FIXME - causing incorrect encoding
def get_headers():
    headers = {
        'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'accept-encoding': 'gzip, deflate, sdch, br',
        'accept-language': 'en-GB,en;q=0.8,en-US;q=0.6,ml;q=0.4',
        'cache-control': 'max-age=0',
        'upgrade-insecure-requests': '1',
        #'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36',
        'user-agent': 'Mozilla/5.0'
    }
    return headers