<a href="https://colab.research.google.com/github/lcqsigi/big-data2/blob/main/063_Zillow_For_Sale_Properties_Python_COPY.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Steps
### Get Keys
1) Get Free [Rapid API Key by creating an account](https://rapidapi.com/marketplace)

Store each key in a central and private location.

### Subscribe to APIs

1) Subscribe to [Zillow on Rapid API](https://rapidapi.com/apimaker/api/zillow-com1/)

## <font color="blue">Install Packages</font>

## <font color="blue">Imports</font>

In [1]:
from google.colab import drive, files # specific to Google Colab
import pandas as pd
import requests
import json
import time

# show all columns
pd.set_option('display.max_columns', None)

## <font color="blue">Functions</font>

## <font color="blue">Local & Constants</font>

In [2]:
############
# OPTIONAL #
############

# mount drive
drive.mount('/content/drive', force_remount=False)

# data location
file_dir = '/content/drive/My Drive/' # optional

############
# REQURIED #
############
city = 'san leandro'
state = 'ca'
search_str = city + ', ' + state
print('Search string:', search_str)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Search string: san leandro, ca


## <font color="blue">Data</font>

### Get API Keys

In [3]:
# read in api key file
df_api_keys = pd.read_csv(file_dir + 'api_keys.csv')

# get keys
rapid_api_key = df_api_keys.loc[df_api_keys['API'] =='rapid']['KEY'].iloc[0] # replace this with your own key

In [4]:
rapid_api_key

'8b63d59cecmsha5a796447533ec2p11f828jsnf7fd2c8395a5'

In [5]:
url = "https://zillow-com1.p.rapidapi.com/propertyExtendedSearch"

In [6]:
headers = {
    'x-rapidapi-host': "zillow-com1.p.rapidapi.com",
    'x-rapidapi-key': rapid_api_key
    }
import requests
req = requests.request('GET', url, headers=headers)

## <font color="green">Zillow</font>

### <font color="purple">Endpoint: Property</font>

In [7]:
# get data
url = "https://zillow-com1.p.rapidapi.com/propertyExtendedSearch"

querystring = {"location":search_str,
               "home_type":"HOUSES",
               "minPrice": "800000",
               "maxPrice": "1000000",
               "sqftMin": "1000",
               "bedsMax": "3",
               "buildYearMin": "1980"}

headers = {
    'x-rapidapi-host': "zillow-com1.p.rapidapi.com",
    'x-rapidapi-key': rapid_api_key
    }

z_for_sale_resp = requests.request("GET", url, headers=headers, params=querystring)

# transform to json
z_for_sale_resp_json = z_for_sale_resp.json()
z_for_sale_resp_json

{'resultsPerPage': 41,
 'totalPages': 0,
 'schools': {},
 'totalResultCount': 0,
 'currentPage': 1}

In [8]:
# view data
df_z_for_sale = pd.json_normalize(data=z_for_sale_resp_json['props'])
print('Num of rows:', len(df_z_for_sale))
print('Num of cols:', len(df_z_for_sale.columns))
df_z_for_sale.head()

KeyError: 'props'

In [None]:
# download file


In [None]:
# get zpids to a list
zpid_list = df_z_for_sale['zpid'].tolist()
zpid_list

In [None]:
# get property detail

# create empty list
prop_detail_list = []

# iterate through list of properties
for zpid in zpid_list:

  # end point
  url = "https://zillow-com1.p.rapidapi.com/property"

  querystring = {"zpid":zpid}

  # header
  headers = {
      'x-rapidapi-host': "zillow-com1.p.rapidapi.com",
      'x-rapidapi-key': rapid_api_key
      }

  # get property detail
  z_prop_detail_resp = requests.request("GET", url, headers=headers, params=querystring)
  z_prop_detail_resp_json = z_prop_detail_resp.json()

  # wait 1 sec based on limit
  time.sleep(1.5)

  prop_detail_list.append(z_prop_detail_resp_json)

In [None]:
# convert to dataframe
df_z_prop_detail = pd.json_normalize(prop_detail_list)
print('Num of rows:', len(df_z_prop_detail))
print('Num of cols:', len(df_z_prop_detail.columns))
df_z_prop_detail.head(2)

In [None]:
# columns of interest
detail_cols = ['streetAddress',
 'city',
 'county',
 'zipcode',
 'state',
 'price',
 'homeType',
 'timeOnZillow',
 'zestimate',
 'rentZestimate',
 'livingArea',
 'bedrooms',
 'bathrooms',
 'yearBuilt',
 'description',
 'priceHistory',
 'taxHistory',
 'zpid'
 ]

# retain limited columns for output
df_z_prop_detail_output = df_z_prop_detail[detail_cols]
df_z_prop_detail_output.head()

In [None]:
# download file
df_z_prop_detail_output.to_csv('df_z_prop_detail_output.csv')
files.download('df_z_prop_detail_output.csv')

## Zillow ZPID
Useful to retrieve zillow detailed data for ANY address

In [None]:
data = """Name,Address
house_00,"10521 Stella St; OAKLAND, CA 94605"
house_01,"6684 Banning Dr; OAKLAND, CA 94611"
house_02,"1678 Trestle Glen Rd.; OAKLAND, CA 94610"
house_03,"1402 Glendale Ave; BERKELEY, CA 94708"
house_04,"1244 Kains Ave.; BERKELEY, CA 94706"
house_05,"1904 Blake Street; BERKELEY, CA 94704"
house_06,"1829 Channing Way; BERKELEY, CA 94702 "
house_07,"1340 8th St; BERKELEY, CA 94710"
house_08,"1195 Walnut Street; BERKELEY, CA 94707"
house_09,"2316 Acton St; BERKELEY, CA 94702"
house_10,"405 Cape Cod Drive; SAN LEANDRO, CA 94578"
house_11,"4355 Terrabella Pl; OAKLAND, CA 94619"
house_12,"3910 Malcolm Ave; Oakland, CA 94605"
house_13,"3594 Kimball Way; CONCORD, CA 94518"
house_14,"1326 Babel Ln; CONCORD, CA 94518"
house_15,"5145 Garaventa DR; CONCORD, CA 94521"
house_16,"1979 Marta Dr; PLEASANT HILL, CA 94523"
"""

In [None]:
import pandas as pd
import requests
import json
import urllib
import io

In [None]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="example app")

In [None]:
df = pd.read_csv(io.StringIO(data))
df

In [None]:
df["loc"] = df["Address"].apply(geolocator.geocode)

In [None]:
df["point"]= df["loc"].apply(lambda loc: tuple(loc.point) if loc else None)

In [None]:
df[['lat', 'lon', 'altitude']] = pd.DataFrame(df['point'].to_list(), index=df.index)

In [None]:
# import the library and its Marker clusterization service
import folium
from folium.plugins import MarkerCluster
# Create a map object and center it to the avarage coordinates to m
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=2)
# if the points are too close to each other, cluster them, create a cluster overlay with MarkerCluster, add to m
marker_cluster = MarkerCluster().add_to(m)
# draw the markers and assign popup and hover texts
# add the markers the the cluster layers so that they are automatically clustered
for i,r in df.iterrows():
    location = (r["lat"], r["lon"])
    folium.Marker(location=location,
                      popup = r['Name'],
                      tooltip=r['Name'])\
    .add_to(marker_cluster)
# display the map
m

In [None]:
#https://towardsdatascience.com/pythons-geocoding-convert-a-list-of-addresses-into-a-map-f522ef513fd6#5352

# End Notebook