# STAR PERFORMERS

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from bs4 import BeautifulSoup
import xmltodict
from pprint import pprint
import os
import csv
from api_keys import z_keys

In [2]:
ny18_df = pd.read_csv("Files/2018NYSales.csv")

In [3]:
len(ny18_df)

16369

In [5]:
ny18_df = ny18_df.rename(columns={' SALE PRICE ': 'SALE PRICE'})

In [6]:
ny18_df.columns

Index(['BOROUGH', 'NEIGHBORHOOD', 'BUILDING CLASS CATEGORY',
       'TAX CLASS AT PRESENT', 'BLOCK', 'LOT', 'EASE-MENT',
       'BUILDING CLASS AT PRESENT', 'ADDRESS', 'APARTMENT NUMBER', 'ZIP CODE',
       'RESIDENTIAL UNITS', 'COMMERCIAL UNITS', 'TOTAL UNITS',
       'LAND SQUARE FEET', 'GROSS SQUARE FEET', 'YEAR BUILT',
       'TAX CLASS AT TIME OF SALE', 'BUILDING CLASS AT TIME OF SALE',
       'SALE PRICE', 'SALE DATE'],
      dtype='object')

In [None]:
ny18_df.describe()

In [7]:
ny18_df["ADDRESS"] = ny18_df["ADDRESS"].str.strip()
ny18_df['ADDRESS'] = ny18_df['ADDRESS'].str.split(' ').apply(lambda x: '+'.join(x))
ny18_df.count()

BOROUGH                           16369
NEIGHBORHOOD                      16369
BUILDING CLASS CATEGORY           16369
TAX CLASS AT PRESENT              16369
BLOCK                             16369
LOT                               16369
EASE-MENT                         16369
BUILDING CLASS AT PRESENT         16369
ADDRESS                           16369
APARTMENT NUMBER                  16369
ZIP CODE                          16369
RESIDENTIAL UNITS                 16369
COMMERCIAL UNITS                  16369
TOTAL UNITS                       16369
LAND SQUARE FEET                  16369
GROSS SQUARE FEET                 16369
YEAR BUILT                        16369
TAX CLASS AT TIME OF SALE         16369
BUILDING CLASS AT TIME OF SALE    16369
SALE PRICE                        16369
SALE DATE                         16369
dtype: int64

In [8]:
ny18_df["SALE PRICE"] = ny18_df.loc[:,"SALE PRICE"].replace('[\$,]', '', regex=True)
ny18_df["SALE PRICE"] = ny18_df.loc[:,"SALE PRICE"].replace('[-,]', '0', regex=True)

In [9]:
# Removing home prices less than 100K to avoid a bias after preliminary research on what these lower sale prices rep.
# like: Internal family sale transfer at $0, garage sale etc.

ny18_df = ny18_df[ny18_df['SALE PRICE'].astype(float) > 100000]
ny18_df = ny18_df.drop_duplicates(subset=['ADDRESS'], keep = 'first')

In [10]:
len(ny18_df)

8951

In [11]:
ny18_df.head()

Unnamed: 0,BOROUGH,NEIGHBORHOOD,BUILDING CLASS CATEGORY,TAX CLASS AT PRESENT,BLOCK,LOT,EASE-MENT,BUILDING CLASS AT PRESENT,ADDRESS,APARTMENT NUMBER,...,RESIDENTIAL UNITS,COMMERCIAL UNITS,TOTAL UNITS,LAND SQUARE FEET,GROSS SQUARE FEET,YEAR BUILT,TAX CLASS AT TIME OF SALE,BUILDING CLASS AT TIME OF SALE,SALE PRICE,SALE DATE
0,1,ALPHABET CITY,01 ONE FAMILY DWELLINGS,1,390,61,,A4,189+EAST+7TH+STREET,,...,1,0,1,987,2183,1860,1,A4,4844809,5/22/2018
2,1,ALPHABET CITY,01 ONE FAMILY DWELLINGS,1,400,19,,A4,526+EAST+5TH+STREET,,...,1,0,1,1883,5200,1900,1,A4,6100000,12/3/2018
6,1,ALPHABET CITY,07 RENTALS - WALKUP APARTMENTS,2,379,37,,C7,133+AVENUE+D,,...,20,2,22,3481,14588,1930,2,C7,8300000,12/10/2018
7,1,ALPHABET CITY,07 RENTALS - WALKUP APARTMENTS,2,385,2,,C7,21-23+AVENUE+B,,...,12,4,16,4186,10588,1900,2,C7,10350000,9/7/2018
8,1,ALPHABET CITY,07 RENTALS - WALKUP APARTMENTS,2,389,18,,C1,200+EAST+7TH+STREET,,...,18,0,18,2271,10650,1910,2,C1,5000000,9/5/2018


In [None]:
response = requests.get(f"https://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id={z_keys}&address=635+west+42+street&citystatezip=10036&rentzestimate=true")

In [None]:
print(response)

In [None]:
#Using BeautifulSoup to convert XML text to pretty format
soup = BeautifulSoup(response.text)
print(soup.prettify())

In [None]:
doc = xmltodict.parse(response.text)['SearchResults:searchresults']['response']['results']['result']
len(doc)

In [12]:
def need_rent_estimate(address):
    return (address.get('rentzestimate')) and not(address.get('lastSoldPrice'))

In [13]:
def need_sale_estimate(address):
    return (not(address.get('rentzestimate')) and (address.get('lastSoldPrice')))

In [14]:
def get_rent_estimate(address):
    print("Entering get_rent_estimate")
    rent = address['rentzestimate']['amount']['#text']
    address = address['address']['street']
#     zipcode = address['address']['zipcode']
    var = 2
#     return pd.Series({'rent':rent, 'address':address,'zipcode':zipcode,'var':var})
    return pd.Series({'rent':rent, 'address':address,'var':var})

In [15]:
def get_property_estimate(address):
    print("Entering get_property_estimate")
    sold = address['lastSoldPrice']['#text']
    sold_date = address['lastSoldDate']
#     zestimate=address['zestimate']['amount']['#text']
    sqft = address['finishedSqFt']
    address = address['address']['street']
#     zipcode = address['address']['zipcode']
    var = 1
    return pd.Series({'sold':sold, 'var':var}) 

In [16]:
ny18_df = ny18_df.reset_index()
ny18_df.head()

Unnamed: 0,index,BOROUGH,NEIGHBORHOOD,BUILDING CLASS CATEGORY,TAX CLASS AT PRESENT,BLOCK,LOT,EASE-MENT,BUILDING CLASS AT PRESENT,ADDRESS,...,RESIDENTIAL UNITS,COMMERCIAL UNITS,TOTAL UNITS,LAND SQUARE FEET,GROSS SQUARE FEET,YEAR BUILT,TAX CLASS AT TIME OF SALE,BUILDING CLASS AT TIME OF SALE,SALE PRICE,SALE DATE
0,0,1,ALPHABET CITY,01 ONE FAMILY DWELLINGS,1,390,61,,A4,189+EAST+7TH+STREET,...,1,0,1,987,2183,1860,1,A4,4844809,5/22/2018
1,2,1,ALPHABET CITY,01 ONE FAMILY DWELLINGS,1,400,19,,A4,526+EAST+5TH+STREET,...,1,0,1,1883,5200,1900,1,A4,6100000,12/3/2018
2,6,1,ALPHABET CITY,07 RENTALS - WALKUP APARTMENTS,2,379,37,,C7,133+AVENUE+D,...,20,2,22,3481,14588,1930,2,C7,8300000,12/10/2018
3,7,1,ALPHABET CITY,07 RENTALS - WALKUP APARTMENTS,2,385,2,,C7,21-23+AVENUE+B,...,12,4,16,4186,10588,1900,2,C7,10350000,9/7/2018
4,8,1,ALPHABET CITY,07 RENTALS - WALKUP APARTMENTS,2,389,18,,C1,200+EAST+7TH+STREET,...,18,0,18,2271,10650,1910,2,C1,5000000,9/5/2018


In [17]:
ny18_df.loc[0,'BLOCK']

390

In [18]:
block =[]
lot=[]
ny_sale_amt =[]
ny_sale_date=[]
series = []
curdoc={}

for address, zipcode in zip(ny18_df['ADDRESS'],ny18_df['ZIP CODE']):
    try:
       i = 0
       print(f"https://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id={z_keys}&address={address}&citystatezip={zipcode}&rentzestimate=true")
       response_main = requests.get(f"https://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id={z_keys}&address={address}&citystatezip={zipcode}&rentzestimate=true")
#        block=ny18_Update['BLOCK']
#        lot=ny18_Update['LOT']
#        sale_amt=ny18_Update['SALE PRICE']
#        sale_date=ny18_Update['SALE DATE']
       doc = xmltodict.parse(response_main.text, force_list={'result': True})['SearchResults:searchresults']['response']['results']['result']

       while i < len(doc):
           print(f"Record {i}: {doc[i]['address']['street']}")

           if need_rent_estimate(doc[i]):
               new_series = get_rent_estimate(doc[i])
           elif need_sale_estimate(doc[i]):
               new_series = get_property_estimate(doc[i])
           else
               print("NO ESTIMATED DEFINITION ENTERED!!")
                 
           i+=1
                 
           print(f'End of record {i}')
           print('-------------------------------\n')
           series.append(new_series)
       time.sleep(30)
       df = pd.concat(series, 1,sort=True).T
    except:
       print(f"A EXCEPTION HAS OCCURRED")

https://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=X1-ZWz1gxoufbrk0b_1pt0f&address=189+EAST+7TH+STREET&citystatezip=10009&rentzestimate=true
189 E 7th St 

Entering get_property_estimate
End of record 1
-------------------------------
189 E 7th St 

End of record 2
-------------------------------
189 E 7th St 

Entering get_rent_estimate
End of record 3
-------------------------------
https://www.zillow.com/webservice/GetDeepSearchResults.htm?zws-id=X1-ZWz1gxoufbrk0b_1pt0f&address=526+EAST+5TH+STREET&citystatezip=10009&rentzestimate=true
526 E 5th St 

End of record 1
-------------------------------


KeyboardInterrupt: 

In [None]:
# xmltodict.parse(response_main.text)['SearchResults:searchresults'].keys()