<a href="https://colab.research.google.com/github/lcqsigi/big-data2/blob/main/063_Zillow_For_Sale_Properties_Python3.092324.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Steps
### Get Keys
1) Get Free [Rapid API Key by creating an account](https://rapidapi.com/marketplace)

Store each key in a central and private location.

### Subscribe to APIs

1) Subscribe to [Zillow on Rapid API](https://rapidapi.com/apimaker/api/zillow-com1/)

## <font color="blue">Install Packages</font>

## <font color="blue">Imports</font>

In [1]:
from google.colab import drive, files # specific to Google Colab
import pandas as pd
import requests
import json
import time

# show all columns
pd.set_option('display.max_columns', None)

## <font color="blue">Functions</font>

## <font color="blue">Local & Constants</font>

In [2]:
############
# OPTIONAL #
############

# mount drive
drive.mount('/content/drive', force_remount=False)

# data location
file_dir = '/content/drive/My Drive/' # optional

############
# REQURIED #
############
city = 'hayward'
state = 'ca'
search_str = city + ', ' + state
print('Search string:', search_str)

Mounted at /content/drive
Search string: hayward, ca


## <font color="blue">Data</font>

### Get API Keys

In [3]:
# read in api key file
df_api_keys = pd.read_csv(file_dir + 'api_keys.csv')

# get keys
rapid_api_key = df_api_keys.loc[df_api_keys['API'] =='rapid']['KEY'].iloc[0] # replace this with your own key

In [4]:
rapid_api_key

'8b63d59cecmsha5a796447533ec2p11f828jsnf7fd2c8395a5'

In [5]:
url = "https://zillow-com1.p.rapidapi.com/propertyExtendedSearch"

In [6]:
headers = {
    'x-rapidapi-host': "zillow-com1.p.rapidapi.com",
    'x-rapidapi-key': rapid_api_key
    }
import requests
req = requests.request('GET', url, headers=headers)

## <font color="green">Zillow</font>

### <font color="purple">Endpoint: Property</font>

In [7]:
# get data
url = "https://zillow-com1.p.rapidapi.com/propertyExtendedSearch"

querystring1 = {
               "location": "94611",
               "home_type":"Single_Family",
               "minPrice": "800000",
               "maxPrice": "1150000",
               "sqftMin": "1500",
               "bedsMin": "3"
               }

querystring2 = {
               "location": "94605",
               "home_type":"Single_Family",
               "minPrice": "800000",
               "maxPrice": "1150000",
               "sqftMin": "1500",
               "bedsMin": "3"
               }

querystring3 = {
               "location": "94619",
               "home_type":"Single_Family",
               "minPrice": "800000",
               "maxPrice": "1150000",
               "sqftMin": "1500",
               "bedsMin": "3"
               }

headers = {
    'x-rapidapi-host': "zillow-com1.p.rapidapi.com",
    'x-rapidapi-key': rapid_api_key
    }

z_for_sale_resp1 = requests.request("GET", url, headers=headers, params=querystring1)
z_for_sale_resp2 = requests.request("GET", url, headers=headers, params=querystring2)
z_for_sale_resp3 = requests.request("GET", url, headers=headers, params=querystring3)

# transform to json
z_for_sale_resp_json1 = z_for_sale_resp1.json()
z_for_sale_resp_json2 = z_for_sale_resp2.json()
z_for_sale_resp_json3 = z_for_sale_resp3.json()

In [8]:
len(z_for_sale_resp_json1.get('props', []))

12

In [9]:
# view data
df_z_for_sale1 = pd.json_normalize(data=z_for_sale_resp_json1['props'])
df_z_for_sale2 = pd.json_normalize(data=z_for_sale_resp_json2['props'])
df_z_for_sale3 = pd.json_normalize(data=z_for_sale_resp_json3['props'])

df_z_for_sale = pd.concat([df_z_for_sale1,df_z_for_sale2,df_z_for_sale3], ignore_index=True)

print('Num of rows:', len(df_z_for_sale))
print('Num of cols:', len(df_z_for_sale.columns))
#df_z_for_sale.head(50)

Num of rows: 53
Num of cols: 29


In [None]:
# download file (if needed)
df_z_for_sale=df_z_for_sale.sort_values('daysOnZillow')
df_z_for_sale.to_csv('df_z_for_sale.csv')
files.download('df_z_for_sale.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [10]:
import pandas as pd
import requests
import json
import urllib
import io

In [11]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="example app")

In [12]:
df=df_z_for_sale

In [13]:
df["loc"] = df['address'].apply(geolocator.geocode)



In [14]:
df.dropna(subset=['loc'],inplace=True)

In [15]:
df["price2"] = df['price'].apply(lambda x: f'{x/10**6:.1f}M')

In [16]:
#df['Index'] = str(df.reset_index().index)
#df = df.sort_values('daysOnZillow')
df['Index'] = df.reset_index().index.astype(str)

In [17]:
df["address_price"] = df['Index'] + ', ' + df['address'] + ', ' + df['price2']

In [18]:
df["point"]= df["loc"].apply(lambda loc: tuple(loc.point) if loc else None)

In [19]:
df[['lat', 'lon', 'altitude']] = pd.DataFrame(df['point'].to_list(), index=df.index)

In [20]:
# import the library and its Marker clusterization service
import folium
from folium.plugins import MarkerCluster
# Create a map object and center it to the avarage coordinates to m
m = folium.Map(location=df[["lat", "lon"]].mean().to_list(), zoom_start=2)
# if the points are too close to each other, cluster them, create a cluster overlay with MarkerCluster, add to m
marker_cluster = MarkerCluster().add_to(m)

# Custom HTML template for the tooltip
tooltip_html = '''
<div class="tooltip"><input type="text" value="{text}" readonly onclick="this.select(); document.execCommand('copy');"><button onclick="navigator.clipboard.writeText('{text}');">Copy</button></div>
'''

# draw the markers and assign popup and hover texts
# add the markers the the cluster layers so that they are automatically clustered
for i,r in df.iterrows():
    location = (r["lat"], r["lon"])
    folium.Marker(location=location,
                  tooltip=r['address_price'])\
    .add_to(marker_cluster)
# display the map
m


In [24]:
df.iloc[19,:]

Unnamed: 0,19
dateSold,
propertyType,SINGLE_FAMILY
lotAreaValue,6272.64
address,"3487 Margarita Ave, Oakland, CA 94605"
variableData,
priceChange,-24000.0
zestimate,
imgSrc,https://photos.zillowstatic.com/fp/356565ac68c...
price,875000
detailUrl,/homedetails/3487-Margarita-Ave-Oakland-CA-946...


In [25]:
df.iloc[46,:]

Unnamed: 0,46
dateSold,
propertyType,SINGLE_FAMILY
lotAreaValue,5227.2
address,"4433 Fair Ave, Oakland, CA 94619"
variableData,
priceChange,
zestimate,
imgSrc,https://photos.zillowstatic.com/fp/0c5d3d7bb90...
price,995000
detailUrl,/homedetails/4433-Fair-Ave-Oakland-CA-94619/24...


In [None]:
#https://towardsdatascience.com/pythons-geocoding-convert-a-list-of-addresses-into-a-map-f522ef513fd6#5352

# End Notebook