In [43]:
# Standard imports. Note: You must pip install nasdaqdatalink 1st
import os
import pandas as pd
import nasdaqdatalink
import requests
from pathlib import Path
import shutil

In [44]:
# A function to retrieve a dataframe of counties, zips, etc
def get_regions(regions):
    region_df=nasdaqdatalink.get_table('ZILLOW/REGIONS', region_type=regions)  
    return region_df

# 1. Get the regions data from Zillow REST APIs.   
This contains a list of all counties in the US.

In [45]:
# Using get_regions to retrieve a list of counties
region_df = get_regions('county')
region_df[["county", "state"]] = region_df["region"].str.split(';', 1, expand=True)
region_df["state"] = region_df["state"].str.split(';', 1, expand=True)[0]

#
# Clean up regions data
# Remove ' County' so that we can match the Zillow data with Wikipedia data.
region_df["county"] = region_df["county"].str.replace(" County", "")

# Remove the leading blank space from the 'state' column.
region_df["state"] = region_df['state'].str[1:]

# Clean up region_id datatype.
region_df['region_id']=region_df['region_id'].astype(int)

# Check data for region_df
display(region_df.head())
display(region_df.tail())

Unnamed: 0_level_0,region_id,region_type,region,county,state
None,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,999,county,Durham County; NC; Durham-Chapel Hill,Durham,NC
1,998,county,Duplin County; NC,Duplin,NC
2,997,county,Dubois County; IN; Jasper,Dubois,IN
3,995,county,Donley County; TX,Donley,TX
4,993,county,Dimmit County; TX,Dimmit,TX


Unnamed: 0_level_0,region_id,region_type,region,county,state
None,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2886,1003,county,Elmore County; AL; Montgomery,Elmore,AL
2887,1002,county,Elbert County; GA,Elbert,GA
2888,1001,county,Elbert County; CO; Denver-Aurora-Lakewood,Elbert,CO
2889,1000,county,Echols County; GA; Valdosta,Echols,GA
2890,100,county,Bibb County; AL; Birmingham-Hoover,Bibb,AL


# 2. Get the Zillow sales data.  
In this example, we read in Zillow sales data in the form of a CSV file.  

In [46]:
# Get the Zillow sales data. 
# The actual API call using the SDK.
# Instructions can be found here https://data.nasdaq.com/databases/ZILLOW/usage/quickstart/python
# Replace 'quandl' w/ 'nasdaqdatalink
data = nasdaqdatalink.export_table('ZILLOW/DATA', indicator_id='ZSFH', region_id=list(region_df['region_id']),filename='db.zip')

DataLinkError: (Status 500) Something went wrong. Please try again. If you continue to have problems, please contact us at connect@data.nasdaq.com.

In [47]:
# Unzipping database from API call
shutil.unpack_archive('db.zip')

In [48]:
# Reading in Database
zillow_data=pd.read_csv(
    Path('ZILLOW_DATA_d5d2ff90eb7172dbde848ea36de12dfe.csv')
)

# Check the Zillow sales data
display(zillow_data.head())
display(zillow_data.tail())

Unnamed: 0,indicator_id,region_id,date,value
0,ZSFH,100,2007-11-30,123760.0
1,ZSFH,100,2007-12-31,123754.0
2,ZSFH,100,2008-01-31,123605.0
3,ZSFH,100,2008-02-29,123393.0
4,ZSFH,100,2008-03-31,123095.0


Unnamed: 0,indicator_id,region_id,date,value
669311,ZSFH,999,2022-02-28,390111.0
669312,ZSFH,999,2022-03-31,401621.0
669313,ZSFH,999,2022-04-30,411421.0
669314,ZSFH,999,2022-05-31,422028.0
669315,ZSFH,999,2022-06-30,430509.0


In [49]:
## Merge the Region dataframe with the Zillow sales data
zillow_merge_df = pd.merge(region_df, zillow_data, on=['region_id'])

# Check the merged Zillow data
zillow_merge_df.head()

Unnamed: 0,region_id,region_type,region,county,state,indicator_id,date,value
0,999,county,Durham County; NC; Durham-Chapel Hill,Durham,NC,ZSFH,1997-02-28,139430.0
1,999,county,Durham County; NC; Durham-Chapel Hill,Durham,NC,ZSFH,1997-03-31,139459.0
2,999,county,Durham County; NC; Durham-Chapel Hill,Durham,NC,ZSFH,1997-04-30,139659.0
3,999,county,Durham County; NC; Durham-Chapel Hill,Durham,NC,ZSFH,1997-05-31,139887.0
4,999,county,Durham County; NC; Durham-Chapel Hill,Durham,NC,ZSFH,1997-06-30,140303.0


# 3. Get the county coordinates data.
We couldn't find the county coordinates from Zillow, so we sourced the data from Wikipedia.   We are going to have to merge the data with Zillow based on county and state. 

In [50]:
# Read in county data with coordinates
county_coordinates_df=pd.read_csv(
    Path('counties_w_coordinates.csv')
)

# Clean up data.
# We need to rename the columns so that we can merge our Zillow data set 
# with the county coordinates data.   The dataframes will be merged against 'county' and 'state'. 
county_coordinates_df = county_coordinates_df.rename(columns={"County\xa0[2]" : "county"})
# county_coordinates_df = county_coordinates_df.rename(columns={"region" : "region"})
county_coordinates_df = county_coordinates_df.rename(columns={"State" : "state"})

# Check the county coordinates data
county_coordinates_df.head()

Unnamed: 0,Sort [1],state,FIPS,county,County Seat(s) [3],Population,Land Area,Land Area.1,Water Area,Water Area.1,Total Area,Total Area.1,Latitude,Longitude
0,,,,,,-2010,km²,mi²,km²,mi²,km²,mi²,,
1,1.0,AL,1001.0,Autauga,Prattville,54571,1539.58,594.436,25.776,9.952,1565.36,604.388,+32.536382°,–86.644490°
2,2.0,AL,1003.0,Baldwin,Bay Minette,182265,4117.52,1589.78,1133.19,437.527,5250.71,2027.31,+30.659218°,–87.746067°
3,3.0,AL,1005.0,Barbour,Clayton,27457,2291.82,884.876,50.865,19.639,2342.68,904.515,+31.870670°,–85.405456°
4,4.0,AL,1007.0,Bibb,Centreville,22915,1612.48,622.582,9.289,3.587,1621.77,626.169,+33.015893°,–87.127148°


In [51]:
# Merge the Zillow data and county coordinates data.
master_df = pd.merge(zillow_merge_df, county_coordinates_df, on=['county', 'state'])

# Check the master data
master_df

Unnamed: 0,region_id,region_type,region,county,state,indicator_id,date,value,Sort [1],FIPS,County Seat(s) [3],Population,Land Area,Land Area.1,Water Area,Water Area.1,Total Area,Total Area.1,Latitude,Longitude
0,999,county,Durham County; NC; Durham-Chapel Hill,Durham,NC,ZSFH,1997-02-28,139430.0,1922.0,37063.0,Durham,267587,740.673,285.975,30.798,11.891,771.471,297.866,+36.036589°,–78.877919°
1,999,county,Durham County; NC; Durham-Chapel Hill,Durham,NC,ZSFH,1997-03-31,139459.0,1922.0,37063.0,Durham,267587,740.673,285.975,30.798,11.891,771.471,297.866,+36.036589°,–78.877919°
2,999,county,Durham County; NC; Durham-Chapel Hill,Durham,NC,ZSFH,1997-04-30,139659.0,1922.0,37063.0,Durham,267587,740.673,285.975,30.798,11.891,771.471,297.866,+36.036589°,–78.877919°
3,999,county,Durham County; NC; Durham-Chapel Hill,Durham,NC,ZSFH,1997-05-31,139887.0,1922.0,37063.0,Durham,267587,740.673,285.975,30.798,11.891,771.471,297.866,+36.036589°,–78.877919°
4,999,county,Durham County; NC; Durham-Chapel Hill,Durham,NC,ZSFH,1997-06-30,140303.0,1922.0,37063.0,Durham,267587,740.673,285.975,30.798,11.891,771.471,297.866,+36.036589°,–78.877919°
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
628666,100,county,Bibb County; AL; Birmingham-Hoover,Bibb,AL,ZSFH,2022-02-28,161462.0,4.0,1007.0,Centreville,22915,1612.48,622.582,9.289,3.587,1621.77,626.169,+33.015893°,–87.127148°
628667,100,county,Bibb County; AL; Birmingham-Hoover,Bibb,AL,ZSFH,2022-03-31,162369.0,4.0,1007.0,Centreville,22915,1612.48,622.582,9.289,3.587,1621.77,626.169,+33.015893°,–87.127148°
628668,100,county,Bibb County; AL; Birmingham-Hoover,Bibb,AL,ZSFH,2022-04-30,163859.0,4.0,1007.0,Centreville,22915,1612.48,622.582,9.289,3.587,1621.77,626.169,+33.015893°,–87.127148°
628669,100,county,Bibb County; AL; Birmingham-Hoover,Bibb,AL,ZSFH,2022-05-31,164684.0,4.0,1007.0,Centreville,22915,1612.48,622.582,9.289,3.587,1621.77,626.169,+33.015893°,–87.127148°
