In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.formula.api as smf
import matplotlib.pylab as plt
import xmltodict, json
import requests
%pylab inline

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [2]:
# adding in my api key 
with open('zillow_key.txt', 'r') as f:
     myAPI = f.readline()

In [3]:
# adding params as listed in api documentation: https://www.zillow.com/howto/api/GetDeepComps.htm
params = {'zws-id': myAPI,
         'address': '220 East 73rd Street',
         'citystatezip': '10021',
         'rentzestimate': 'True'}
# test
result = requests.get('http://www.zillow.com/webservice/GetDeepSearchResults.htm', params = params)

In [4]:
# see first X values 
result.text[100:]

'llow.com/static/xsd/SearchResults.xsd https://www.zillowstatic.com/vstatic/6ce354c/static/xsd/SearchResults.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:SearchResults="http://www.zillow.com/static/xsd/SearchResults.xsd"><request><address>220 East 73rd Street</address><citystatezip>10021</citystatezip></request><message><text>Request successfully processed</text><code>0</code></message><response><results><result><zpid>31537173</zpid><links><homedetails>https://www.zillow.com/homedetails/220-E-73rd-St-New-York-NY-10021/31537173_zpid/</homedetails><graphsanddata>http://www.zillow.com/homedetails/220-E-73rd-St-New-York-NY-10021/31537173_zpid/#charts-and-data</graphsanddata><mapthishome>http://www.zillow.com/homes/31537173_zpid/</mapthishome><comparables>http://www.zillow.com/homes/comps/31537173_zpid/</comparables></links><address><street>220 E 73rd St</street><zipcode>10021</zipcode><city>New York</city><state>NY</state><latitude>40.76972</latitude><longitude>-73.9590

In [5]:
d = xmltodict.parse(result.text)

In [6]:
# correct place to parse for building address
address = d['SearchResults:searchresults']['request']['address']

In [7]:
# results provides multiple listing for the same address. 
# Bedrooms are nested here

bedrooms = d['SearchResults:searchresults']['response']['results']['result'][0]['bedrooms']
bathrooms = d['SearchResults:searchresults']['response']['results']['result'][0]['bathrooms']

print ('This apartment at {} has {} bedrooms and {} bathrooms'.format(address, bedrooms, bathrooms))                                                                                      

This apartment at 220 East 73rd Street has 2 bedrooms and 2.0 bathrooms


In [8]:
# In each response, you can extract the date sold and price for each sale
date_sold = d['SearchResults:searchresults']['response']['results']['result'][2]['lastSoldDate']
price = d['SearchResults:searchresults']['response']['results']['result'][2]['lastSoldPrice']['#text']                                                                                      
currency = d['SearchResults:searchresults']['response']['results']['result'][2]['lastSoldPrice']['@currency']

print ('Sold on {} for {}{}'.format(date_sold,price,currency))                                                                                      

Sold on 07/22/2011 for 763687USD


In [9]:
# within each 'result,' the first position indcludes info about the unit (above)
# each successive item is a record for each time its been sold, but doesn't seem to be for different units (just any unit at that address)
d['SearchResults:searchresults']['response']['results']['result'][0].keys()

odict_keys(['zpid', 'links', 'address', 'FIPScounty', 'useCode', 'taxAssessmentYear', 'taxAssessment', 'yearBuilt', 'lotSizeSqFt', 'finishedSqFt', 'bathrooms', 'bedrooms', 'zestimate', 'rentzestimate', 'localRealEstate'])

In [10]:
# building type (ie cooperative, )
building_type = d['SearchResults:searchresults']['response']['results']['result'][0]['useCode']

# finished building sq ft - not at unit level
building_sqft = d['SearchResults:searchresults']['response']['results']['result'][0]['finishedSqFt']

# lot square feet - only one per result. also not at unit level
lot_sqft = d['SearchResults:searchresults']['response']['results']['result'][0]['lotSizeSqFt']

___

In [11]:
# do for other apt with unit
params = {'zws-id': myAPI,
         'address': '1735 York Ave',
         'citystatezip': '10128',
         'rentzestimate': 'True'}
york = requests.get('http://www.zillow.com/webservice/GetDeepSearchResults.htm', params = params)


In [12]:
# testing another
york = xmltodict.parse(york.text)

In [13]:
# testing another apt in a dumb way
address = york['SearchResults:searchresults']['request']['address']
bedrooms = york['SearchResults:searchresults']['response']['results']['result'][0]['bedrooms']
bathrooms = york['SearchResults:searchresults']['response']['results']['result'][0]['bathrooms']
# date_sold = york['SearchResults:searchresults']['response']['results']['result'][2]['lastSoldDate']
# price = york['SearchResults:searchresults']['response']['results']['result'][2]['lastSoldPrice']['#text']                                                                                      
# currency = york['SearchResults:searchresults']['response']['results']['result'][2]['lastSoldPrice']['@currency']
building_type = york['SearchResults:searchresults']['response']['results']['result'][0]['useCode']


In [14]:
print ('This Yorkville {} at {} has {} bedroom(s) and {} bathroom(s)'.format(building_type,address,bedrooms,bathrooms))                                                                                      

This Yorkville MultiFamily2To4 at 1735 York Ave has 2 bedroom(s) and 2.0 bathroom(s)


___

In [15]:
# d = {'result': {'list': {'one_more_annoying_thing': ['a', 'b', 'c']}}}
# # step 1 -- you should iteratively run the next few lines to see the outputs come up
# # as you go
# d.keys()
# # ok, "result" is some key we can just skip through

# # so then we just filter in to "result" and look at the keys of that dict
# d["result"].keys()
# # and after running this one, we see 'list' so let's just keep filtering down
# d["result"]["list"].keys()

# # and again...
# d["result"]["list"]["one_more_annoying_thing"].keys()
# #aha! this line raises an exception about a "list" not having "keys"
# # so we know we've found a list that we probably care about
# # in your case, this may be a string object instead of a list, depending
# # on the output format

# # after running this, 
# filtered = d["result"]["list"]["one_more_annoying_thing"]
# a = filtered[0]
# b = filtered[1]