<a href="https://colab.research.google.com/github/kandluis/premium-property-finder/blob/master/notebooks/analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [38]:
#@title Install Requirements { display-mode: "form" }
!pip install requests-async aiohttp mypy

# Simple mypy cell magic for Colab
from IPython.core.magic import register_cell_magic
from IPython import get_ipython
from mypy import api

from google.colab import data_table
data_table.enable_dataframe_formatter()

@register_cell_magic
def mypy(line, cell):
  for output in api.run(['-c', '\n' + cell] + line.split()):
    if output and not output.startswith('Success'):
      raise TypeError(output)
  get_ipython().run_cell(cell)

In [39]:
#@title Setup { display-mode: "form" }
%%mypy

import aiohttp
import asyncio
import dataclasses
import decimal
import enum
import json
import pandas as pd #type: ignore
import random
import re
from urllib import parse

from geographiclib import geodesic #type: ignore

from typing import Any, cast, Dict, Iterable, List, Mapping, Optional, Union
from typing_extensions import TypedDict

@dataclasses.dataclass
class Secrets:
  MAPQUEST_API_KEY: str
  ZILLOW_API_KEY: str
  CUTTLY: str
  SECRET: str 

  @staticmethod
  def default():
    """Default initializer loads from secrets."""
    with open('/content/drive/MyDrive/.secrets', 'r') as f:
      data = json.load(f)
  
    return Secrets(**data)

@dataclasses.dataclass
class Globals:
  geocodingBaseUrl: str = 'https://www.mapquestapi.com/geocoding/v1/address'
  proxyUrl: str = 'https://premium-property-finder-server.fly.dev/proxy'
  dbEndpoint: str = 'https://premium-property-finder-server.fly.dev/api'
  zillowBaseUrl: str = 'https://www.zillow.com/search/GetSearchPageState.htm'
  zillowApiBaseUrl: str = 'https://www.zillow.com/webservice'
  rentBitsApiBaseUrl: str = 'https://service.rentbits.com/api/v1/search'

  keys = Secrets.default()

  _session: Optional[aiohttp.ClientSession] = None

@dataclasses.dataclass
class Location:
  lat: float
  lng: float

async def fetch(url: str) -> Dict[str, Any]:
  """Fetches the given URL through a CORS-Anywhere proxy."""
  user_agent_list = [
   "Mozilla/5.0 (Windows NT 10.0 Win64 x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36",
   "Mozilla/5.0 (Windows NT 6.1 Win64 x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36",
   "Mozilla/5.0 (iPad CPU OS 15_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/104.0.5112.99 Mobile/15E148 Safari/604.1"
]
  reffer_list=[
    'https://stackoverflow.com/',
    'https://twitter.com/',
    'https://www.google.co.in/',
    'https://gem.gov.in/'
  ]
  fullUrl = f"{Globals.proxyUrl}/{url}"
  headers = {
    'Api-Key': Globals.keys.SECRET,
    'X-Requested-With': 'XMLHttpRequest',
    'referer': random.choice(reffer_list),
    'User-Agent': random.choice(user_agent_list),
  }
  session = Globals._session
  assert session
  async with session.get(fullUrl, headers=headers) as response:
    if response.status != 200:
      return {
        "error": f"Failed to retrieve data from {fullUrl}. Response: {response}"
      }
    return await response.json()

async def getLatLong(location: str) -> Union[Location, Dict[str, Any]]:
  """Fetches the lat/long of a location.

  Args:
    location - The geo location. Could be zip code, address, state, etc.

  Returns: The coordinates of the location if we can find it.
  """
  geoCodeUrl = f"{Globals.geocodingBaseUrl}?key={Globals.keys.MAPQUEST_API_KEY}&location={location.lower()}"
  response = await fetch(geoCodeUrl)
  if not response.get('results', []) or not response['results'][0].get('locations', []):
    response['error'] += f"\n\nSuccessful response with empty locations for location: {location}"
    return response
  
  primaryResult = response['results'][0]['locations'][0]
  return Location(**primaryResult['latLng'])

@dataclasses.dataclass
class LocationBox:
  north: float
  east: float
  south: float
  west: float

def boundingBox(lat: float, lng: float, side: float) -> LocationBox:
  """
  Computes a bounding box around the (lat, lng) point with side lenghs of
  'side' miles.

  Works only for relatively small 'side' values (compared to earth radius)
  and when we're not too close to the poles.
  
  Args:
    lat - the latitude in degrees as a decimal.
    lng - the longitude in degrees as a decimal.
    side - the side length of the bounding box in miles.

  Returns: A LocationBox with degrees (either lat or long depending on direction)
    of the lines defining the bounding box.
  """
  radiusInMeters = side * 1.60934 * 1000 / 2
  geod = geodesic.Geodesic.WGS84
  return LocationBox(
    north=geod.Direct(lat, lng, 0, radiusInMeters)['lat2'],
    east=geod.Direct(lat, lng, 90, radiusInMeters)['lon2'],
    south=geod.Direct(lat, lng, 180, radiusInMeters)['lat2'],
    west=geod.Direct(lat, lng, 270, radiusInMeters)['lon2'],
  )

@enum.unique
class HomeType(enum.Enum):
  ALL = 'All'
  SINGLE_FAMILY = 'Single Family'
  LOT = 'Lot'
  MANUFACTURED = 'Manufactured'
  TOWNHOUSE = 'Townhouse'
  MULTI_FAMILY = 'Multi Family'

@dataclasses.dataclass
class Property:
  address: str
  detailUrl: str
  imgSrc: str
  price: decimal.Decimal
  statusText: str
  type: str

  area: Optional[float] = None
  baths: Optional[int] = None
  beds: Optional[int] = None
  city: Optional[str] = None
  homeType: Optional[HomeType] = None
  livingArea: Optional[float] = None
  lotArea: Optional[float] = None
  rentzestimate: Optional[decimal.Decimal] = None
  state: Optional[str] = None
  zestimate: Optional[float] = None
  zipCode: Optional[Union[int, str]] = None
  zpid: Optional[int] = None


def addHDPResults(parsedItem: Property, home: Any) -> None:
  """Adds a single HDP property result fetched from the Zillow API for an area.

  Args:
    parsedItem - The partially parsed item to which we add HDP data.
    home - The HDP home information object.
  """
  parsedItem.baths = home.get('bathrooms')
  parsedItem.beds = home.get('bedrooms')
  parsedItem.city = home.get('city')
  parsedItem.homeType = home.get('homeType')
  parsedItem.livingArea = home.get('livingArea')
  parsedItem.price = decimal.Decimal(home.get('price', 0))
  parsedItem.rentzestimate = home.get('rentZestimate')
  parsedItem.state = home.get('state')
  parsedItem.zestimate = home.get('zestimate')
  parsedItem.zipCode = int(home['zipcode']) if 'zipcode' in home else None
  parsedItem.zpid = int(home['zpid']) if 'zpid' in home else None



def addResults(parsedItem: Property, item: Mapping[str, Any]) -> None:
  """Adds same fields as above function but when we don't have HDP data."""
  if item.get('baths') or item.get('minBaths'):
    parsedItem.baths = int(cast(str, item.get('baths') or item.get('minBaths')))
  
  if item.get('beds') or item.get('minBeds'):
    parsedItem.beds = int(cast(str, item.get('beds') or item.get('minBeds')))
  try:
    parsedItem.price = decimal.Decimal(re.sub('[^(\d.)]', '', cast(str, item.get('price', '0'))))
  except decimal.InvalidOperation:
    breakpoint()
  if item.get('zpid'):
    parsedItem.zpid = int(cast(str, item.get('zpid')))


def parseResult(item: Any) -> Property:
  """Parses a single property result fetched from the Zillow API for an area.ZillowDB

  Args:
    item - The JSON object corresponding to a single property fetched from Zillow.

   Reeturns The parsed Property object.
  """
  parsedItem = Property(
    address=item.get('address'),
    type=item.get('listingType'),
    detailUrl=item.get('detailUrl'),
    imgSrc=item.get('imgSrc'),
    statusText=item.get('statusText'),
    price=decimal.Decimal(0),
  )
  # /something/address-seperated-by-city-state-zip.
  addressComponents = item['detailUrl'].split('/')[2].split('-')
  # These are best-effort. If we have more data, it gets replaced later.
  if addressComponents:
    try:
      parsedItem.zipCode = int(addressComponents[-1])
    except ValueError:
      parsedItem.zipCode = addressComponents[-1]
  if len(addressComponents) > 1:
    parsedItem.state = addressComponents[-2]
  if len(addressComponents) > 2:
    # This is not always valid. If a city is two words, we'll only get the
    # last one! :o
    parsedItem.city = addressComponents[-3]
  parsedItem.address = ' '.join(addressComponents[:-min(3, len(addressComponents))])
  if item.get('area') or item.get('minArea'):
    parsedItem.area = float(item.get('area') or item.get('minArea'))

  if item.get('lotAreaString'):
    parsedItem.lotArea = float(re.sub('[^(\d.)]', '', item.get('lotAreaString')))

  if item.get('hdpData'):
    addHDPResults(parsedItem, item.get('hdpData', {}).get('homeInfo', {}))
  else:
    addResults(parsedItem, item)

  return parsedItem

async def fetchProperties(
  geoLocation: str,
  radius: float,
  priceFrom: int,
  priceMost: int,
) -> List[Property]:
  """Fetches a list of properties currently for sale in the area surrounding the location.

  Might return properties outside the specified radius (but not by too much).

  Args:
    location - The location name as commonly referred (eg, Google Map-able)
    radius - The radius around the location within which we wish to find properties.
    priceFrom - The minimum price of any property returned.
    priceMost - The maximum price of any proeprty returned.

  Returns: The located properties.
  """
  coords = await getLatLong(geoLocation)
  if isinstance(coords, Dict):
    return []
  box = boundingBox(coords.lat, coords.lng, radius * 2)
  if isinstance(box, Dict):
    return []
  wants = json.dumps({
    'cat1': ['mapResults'],
  }, separators=(',', ':'))
  searchQueryState = json.dumps({
    'mapBounds': dataclasses.asdict(box),
    'filterState': {
      'price': {
        'min': priceFrom,
        'max': priceMost,
      },
    },
  }, separators=(',', ':'))
  zillowUrl = f"{Globals.zillowBaseUrl}?searchQueryState={searchQueryState}&wants={wants}"
  data = await fetch(zillowUrl)
  propertyListings = data['cat1']['searchResults']['mapResults']
  return [parseResult(prop) for prop in propertyListings if prop.get('zpid') and prop.get('price')]


async def getRentBitsEstimate(loc: Location) -> Optional[decimal.Decimal]:
  """Calculates the median known rental values in the given area using the rent bits API.

  Args:
    box - The bounding box in which to search for property estimates.

  Returns: The estimated price or null if not possible to estimate.
  """
  box = boundingBox(loc.lat, loc.lng, 1);
  url = f"{Globals.rentBitsApiBaseUrl}?bounds={box.south},{box.north},{box.west},{box.east}"
  res = await fetch(url)
  if 'error' in res or not res.get('data'):
    return None
  
  prices = [decimal.Decimal(re.sub('[^(\d.)]', '', item['price'])) 
    for item in res['data'] if item.get('price')]
  if not prices:
    return None

  mid = len(prices) // 2
  sortedPrices = sorted(prices)
  return (sortedPrices[mid - 1] + sortedPrices[mid]) / 2 if len(prices) % 2 == 0 else sortedPrices[mid]


async def fetchRentalBitsEstimates(properties: Iterable[Property]) -> Dict[Union[str, int], decimal.Decimal]:
  """Fetches the rental estimates from bits rental API.

  Args:
    properties: The properties for which to try and fetch a rental estimate.

  Returns: The database containing the estimated prices for each property.
  """
  # We only do this by zip code to reduce the load on the API.
  zips = set([item.zipCode for item in properties if item.zipCode ])
  rents: Dict[Union[int, str], decimal.Decimal] = {}
  # throttle = pThrottle({ limit: 5, interval: 3000 });
  # throttled = throttle(getRentBitsEstimate);
  async def fetch(zipCode: Union[int, str]) -> None:
    loc = await getLatLong(str(zipCode))
    if isinstance(loc, Dict):
      return
    rent = await getRentBitsEstimate(loc)
    if not rent:
      return
    rents[zipCode] = rent

  _ = await asyncio.gather(*[fetch(zipCode) for zipCode in zips], return_exceptions=True)
  # At this point we know that rents will have the right values set.
  return {
    prop.zpid : rents[prop.zipCode] for prop in properties
    if prop.zpid and prop.zipCode 
  }


async def attachRentestimates(properties: Iterable[Property]) -> List[Property]:
  """Attaches the Zillow zestimate for rent to each propertiy.

  Args:
    properties - The list of properties to which we attach a rental estimate.

  Returns: An array of properties with attached rental estimates.
  """
  needRentEstimates = [
    prop for prop in properties 
    # Properties we can compute a ratio for for which we need estimates.
    if not prop.rentzestimate and prop.zpid and prop.address and prop.zipCode and prop.price
  ]
  if not needRentEstimates:
    return list(properties)
  
  rentBitsEstimates = await fetchRentalBitsEstimates(needRentEstimates);
  def transform(prop: Property) -> Property:
    if not prop.zpid:
      return prop
    prop.rentzestimate = rentBitsEstimates.get(prop.zpid, prop.rentzestimate)
    return prop
  
  return [transform(prop) for prop in properties]

async def filterAndFetchProperties(
    geoLocation: str, radius: float, priceFrom: int, priceMost: int
) -> List[Property]:
  properties = await fetchProperties(
    geoLocation,
    radius,
    priceFrom,
    priceMost,
  )
  return await attachRentestimates(properties)

In [40]:
#@title Filter Form { display-mode: "form" }
@dataclasses.dataclass
class FetchPropertiesRequest:
  geoLocation: str = 'Milpitas, CA' #@param { type: 'string'} 
  radius: float = 2 #@param { type: 'slider', min: 0.0, max: 4.0, step: 0.25 }
  priceFrom: int = 0
  priceMost: int = 1800000 #@param { type : 'slider', min: 100000, max: 3000000, step: 25000}


In [41]:
#@title Run { display-mode: "form" }
async def main() -> pd.DataFrame:
  async with aiohttp.ClientSession() as session:
    Globals._session = session
    props = await filterAndFetchProperties(**dataclasses.asdict(FetchPropertiesRequest()))
  
  def transform(prop: Property) -> Dict[str, Any]:
    d = dataclasses.asdict(prop)
    d['price'] = float(d.get('price', 0))
    return d
  
  return pd.DataFrame.from_records(
      data=[transform(prop) for prop in props]
  )

task = asyncio.create_task(main())

In [42]:
#@title Must wait a few seconds before running this for all data to be be fetched. { display-mode: "form" }
task.result()

Unnamed: 0,address,detailUrl,imgSrc,price,statusText,type,area,baths,beds,city,homeType,livingArea,lotArea,rentzestimate,state,zestimate,zipCode,zpid
0,3459 Lisbon Dr San,/homedetails/3459-Lisbon-Dr-San-Jose-CA-95132/...,https://photos.zillowstatic.com/fp/c6be42817d1...,1748000.0,House for sale,,1786.0,3.0,4.0,San Jose,SINGLE_FAMILY,1786.0,,4233,CA,1628500,95132,19486210
1,2036 Trento Loop,/homedetails/2036-Trento-Loop-Milpitas-CA-9503...,https://photos.zillowstatic.com/fp/2a4e1033eb7...,1425000.0,Townhouse for sale,,1981.0,4.0,3.0,Milpitas,TOWNHOUSE,1981.0,,4200,CA,1425648,95035,166730822
2,860 Towne Dr,/homedetails/860-Towne-Dr-Milpitas-CA-95035/51...,https://photos.zillowstatic.com/fp/a54763698fb...,838000.0,Townhouse for sale,,1158.0,3.0,2.0,Milpitas,TOWNHOUSE,1158.0,,3200,CA,878000,95035,51073158
3,1464 Edsel Dr,/homedetails/1464-Edsel-Dr-Milpitas-CA-95035/1...,https://photos.zillowstatic.com/fp/2fd4516e55c...,1228800.0,House for sale,,1100.0,2.0,3.0,Milpitas,SINGLE_FAMILY,1100.0,,3777,CA,1104300,95035,19480881
4,2073 Conway St,/homedetails/2073-Conway-St-Milpitas-CA-95035/...,https://photos.zillowstatic.com/fp/6dda1c2946d...,1388800.0,House for sale,,945.0,2.0,3.0,Milpitas,SINGLE_FAMILY,945.0,,3384,CA,1388812,95035,19473035
5,135 S Gadsden Dr,/homedetails/135-S-Gadsden-Dr-Milpitas-CA-9503...,https://photos.zillowstatic.com/fp/1d2ca113468...,1450000.0,House for sale,,1762.0,3.0,5.0,Milpitas,SINGLE_FAMILY,1762.0,,4200,CA,1450008,95035,19481095
6,1116 N Abbott Ave,/homedetails/1116-N-Abbott-Ave-Milpitas-CA-950...,https://photos.zillowstatic.com/fp/c99fc147dbf...,698000.0,Condo for sale,,1143.0,2.0,3.0,Milpitas,CONDO,1143.0,,3399,CA,699686,95035,19472070
7,2190 Bristolwood Ln San,/homedetails/2190-Bristolwood-Ln-San-Jose-CA-9...,https://photos.zillowstatic.com/fp/ce4b0e435bc...,1199000.0,House for sale,,1585.0,2.0,4.0,San Jose,SINGLE_FAMILY,1585.0,,4099,CA,1200667,95132,19485609
8,712 Carlsbad St,/homedetails/712-Carlsbad-St-Milpitas-CA-95035...,https://photos.zillowstatic.com/fp/30305fd3857...,998888.0,House for sale,,1290.0,2.0,3.0,Milpitas,SINGLE_FAMILY,1290.0,,3580,CA,999500,95035,19481897
9,130 Solar Ct,/homedetails/130-Solar-Ct-Milpitas-CA-95035/19...,https://photos.zillowstatic.com/fp/c1dabc19ee5...,1399888.0,House for sale,,1000.0,3.0,3.0,Milpitas,SINGLE_FAMILY,1000.0,,3460,CA,1399938,95035,19481041
