# Download  Zillow data
This code downloads files from Zillow Research to keep raw files locally so we have access to same original data, for reproducibility. 

In [23]:
from urllib.request import urlretrieve
import os
import sys
sys.executable # check environment

'/Users/pelly/opt/miniconda3/envs/geo_plotnine/bin/python'

In [2]:
# base url same for rentals or prices
url_base = 'http://files.zillowstatic.com/research/public/'

# add to url specific path for price indices and files available
url_county = 'County/County_Zhvi_'
url_state  = 'State/State_Zhvi_'

# concatenate paths
url_county_base = os.path.join(url_base, url_county) 
url_state_base = os.path.join(url_base, url_state) 

## House Prices

Files downloaded on April 25, 2020, from zillow reasearch.

In [3]:
# check url
url_county_base 

'http://files.zillowstatic.com/research/public/County/County_Zhvi_'

In [4]:
# make list of complete urls
files = ['BottomTier.csv','SingleFamilyResidence.csv','TopTier.csv','AllHomes.csv']
zhvi_county = [(url_county_base + file) for file in files]
zhvi_state = [(url_state_base + file) for file in files]
zhvi_all = zhvi_county + zhvi_state  # list with paths
file_names = [s.split('/')[-1] for s in zhvi_all ]

target_dir = '../input/zhvi/'
file_path = [target_dir + file for file in file_names ] # path to target directory

file_path

['../input/zhvi/County_Zhvi_BottomTier.csv',
 '../input/zhvi/County_Zhvi_SingleFamilyResidence.csv',
 '../input/zhvi/County_Zhvi_TopTier.csv',
 '../input/zhvi/County_Zhvi_AllHomes.csv',
 '../input/zhvi/State_Zhvi_BottomTier.csv',
 '../input/zhvi/State_Zhvi_SingleFamilyResidence.csv',
 '../input/zhvi/State_Zhvi_TopTier.csv',
 '../input/zhvi/State_Zhvi_AllHomes.csv']

In [5]:
# make target dir if necessary
if not os.path.exists(target_dir):
    os.makedirs(target_dir)

In [7]:
list(zip(zhvi_all, file_path))

[('http://files.zillowstatic.com/research/public/County/County_Zhvi_BottomTier.csv',
  '../input/zhvi/County_Zhvi_BottomTier.csv'),
 ('http://files.zillowstatic.com/research/public/County/County_Zhvi_SingleFamilyResidence.csv',
  '../input/zhvi/County_Zhvi_SingleFamilyResidence.csv'),
 ('http://files.zillowstatic.com/research/public/County/County_Zhvi_TopTier.csv',
  '../input/zhvi/County_Zhvi_TopTier.csv'),
 ('http://files.zillowstatic.com/research/public/County/County_Zhvi_AllHomes.csv',
  '../input/zhvi/County_Zhvi_AllHomes.csv'),
 ('http://files.zillowstatic.com/research/public/State/State_Zhvi_BottomTier.csv',
  '../input/zhvi/State_Zhvi_BottomTier.csv'),
 ('http://files.zillowstatic.com/research/public/State/State_Zhvi_SingleFamilyResidence.csv',
  '../input/zhvi/State_Zhvi_SingleFamilyResidence.csv'),
 ('http://files.zillowstatic.com/research/public/State/State_Zhvi_TopTier.csv',
  '../input/zhvi/State_Zhvi_TopTier.csv'),
 ('http://files.zillowstatic.com/research/public/State/St

In [8]:
# download files
for path, file in zip(zhvi_all, file_path):
    urlretrieve(path, file)

## Rentals

In [9]:
# add to url specific path for price indices and files available
url_county = 'County/County_Zri_'
url_state  = 'State/State_Zri_'

# concatenate paths
url_county_base = os.path.join(url_base, url_county) 
url_state_base = os.path.join(url_base, url_state) 

In [10]:
# make list of complete urls
files = ['AllHomesPlusMultifamily_Summary.csv',
         'AllHomesPlusMultifamily.csv',
         'MultiFamilyResidenceRental.csv']
zri_county = [(url_county_base + file) for file in files]
zri_state = [(url_state_base + file) for file in files]

zri_all = zri_county + zri_state  # list with paths
file_names = [s.split('/')[-1] for s in zri_all ]
target_dir = '../input/zri/'
file_path = [target_dir + file for file in file_names ] # path to target directory

In [11]:
# make dir target dir if necessary
if not os.path.exists(target_dir):
    os.makedirs(target_dir)

In [12]:
# download files
for path, file in zip(zri_all, file_path):
    urlretrieve(path, file)
    print(file)

../input/zri/County_Zri_AllHomesPlusMultifamily_Summary.csv
../input/zri/County_Zri_AllHomesPlusMultifamily.csv
../input/zri/County_Zri_MultiFamilyResidenceRental.csv
../input/zri/State_Zri_AllHomesPlusMultifamily_Summary.csv
../input/zri/State_Zri_AllHomesPlusMultifamily.csv
../input/zri/State_Zri_MultiFamilyResidenceRental.csv


# Sale counts (transactions)

In [26]:
# base url same for rentals or prices
url_base = 'http://files.zillowstatic.com/research/public/'
target_dir = '../input2/'

# make dir target dir if necessary
if not os.path.exists(target_dir):
    os.makedirs(target_dir)

def download(geography='County', item = 'Sale_Counts'):   # county or state
    
    file = item + '_' + geography + '.csv'
    url = url_base + geography + '/' + file
    file_path = target_dir + file # path to target directory
    urlretrieve(url, file_path)
    print(url)
    print(file_path)

for geo in ['County', 'State']:
    make_url(geo, 'Sale_Counts')
    

In [None]:
url_county = 'County/County_Zri_'
url_state  = 'State/Sale_Counts_State.csv

http://files.zillowstatic.com/research/public/County/Sale_Counts_County.csv
http://files.zillowstatic.com/research/public/State/Sale_Counts_State.csv