# Download link 

https://redfin-public-data.s3.us-west-2.amazonaws.com/redfin_market_tracker/zip_code_market_tracker.tsv000.gz

RHPI = [Redfin Home Price Index](https://www.redfin.com/news/redfin-home-price-index/)

### Benefits over Zillow data:
- Price per square foot data
- Number of homes sold in a period in each zip code
- Current amount of inventory per zip code
- Number of pending sales per zip
- Percent of homes selling above list price
- Percent of homes off market within two weeks
- Percent of homes with price reductions
- More property type options
- Data's all in one place

### Cons compared to Zillow data:
- Data is quarterly (Zillow is monthly)
- Unclear whether the data removes outliers such as the ZHVI (I assume so as they use median values)
- Only have data since 2012 (Zillow is since 2000)


In [2]:
import pandas as pd
import os
pd.options.display.max_columns = 60

In [2]:
data = pd.read_csv('../data/raw/zip_code_market_tracker.tsv000', sep='\t')

In [3]:
data.region = data[['region']].drop_duplicates().apply(lambda x:int(x.region[-5:]), axis=1)
data.to_csv('../data/processed/redfin_housing_data.csv.gz',index=False, compression='gzip')

In [None]:
# Cache unique zip_codes
zip_codes = data[['region']].drop_duplicates().apply(lambda x:x.region[-5:], axis=1)
zip_codes.columns = ['zip_codes']
zc_path = '../data/raw/all_zip_codes.csv'
if not os.path.exists(zc_path):
    zip_codes.to_csv(zc_path, index=False)

In [None]:
data.shape

In [None]:
data.head()

In [None]:
len(data.region.unique())

In [None]:
data.period_begin.min()

In [None]:
data.period_end.max()

In [None]:
sorted(data.columns)

In [3]:
df = pd.read_csv('../data/processed/redfin_housing_data.csv.gz')

In [4]:
df.head()

Unnamed: 0,period_begin,period_end,period_duration,region_type,region_type_id,table_id,is_seasonally_adjusted,region,city,state,state_code,property_type,property_type_id,median_sale_price,median_sale_price_mom,median_sale_price_yoy,median_list_price,median_list_price_mom,median_list_price_yoy,median_ppsf,median_ppsf_mom,median_ppsf_yoy,median_list_ppsf,median_list_ppsf_mom,median_list_ppsf_yoy,homes_sold,homes_sold_mom,homes_sold_yoy,pending_sales,pending_sales_mom,pending_sales_yoy,new_listings,new_listings_mom,new_listings_yoy,inventory,inventory_mom,inventory_yoy,months_of_supply,months_of_supply_mom,months_of_supply_yoy,median_dom,median_dom_mom,median_dom_yoy,avg_sale_to_list,avg_sale_to_list_mom,avg_sale_to_list_yoy,sold_above_list,sold_above_list_mom,sold_above_list_yoy,price_drops,price_drops_mom,price_drops_yoy,off_market_in_two_weeks,off_market_in_two_weeks_mom,off_market_in_two_weeks_yoy,parent_metro_region,parent_metro_region_metro_code,last_updated
0,2016-06-01,2016-08-31,90,zip code,2,5140,f,14425.0,,New York,NY,Townhouse,13,124200.0,-0.037209,-0.065814,129450.0,0.003488,0.013704,85.191764,-0.066823,-0.091852,94.691972,0.03804,0.055639,8.0,0.142857,-0.428571,2.0,-0.5,-0.333333,10.0,-0.090909,0.25,4.0,-0.333333,-0.2,,,,63.5,23.5,-21.5,0.994246,-0.010308,-0.002847,0.5,-0.214286,0.214286,,,,0.0,-0.5,0.0,"Rochester, NY",40380,2024-06-09 16:11:52
1,2022-12-01,2023-02-28,90,zip code,2,35093,f,81005.0,,Colorado,CO,All Residential,-1,287500.0,0.026786,-0.046276,299900.0,0.0,-0.076947,151.194292,-0.056548,-0.061303,165.93613,-0.017842,-0.031716,76.0,0.101449,-0.432836,22.0,0.1,-0.371429,79.0,-0.081395,-0.20202,72.0,-0.162791,0.058824,,,,56.0,11.0,7.0,0.970796,-0.007811,-0.026712,0.210526,-0.006865,-0.177533,,,,0.227273,-0.072727,0.141558,"Pueblo, CO",39380,2024-06-09 16:11:52
2,2023-02-01,2023-04-30,90,zip code,2,344,f,1355.0,,Massachusetts,MA,Single Family Residential,6,415000.0,1.618297,-0.087912,,,,202.8348,0.087635,-0.195793,,,,1.0,-0.5,0.0,,,,,,,,,,,,,133.0,11.5,41.0,0.976473,0.050223,-0.023747,0.0,0.0,-1.0,,,,,,,"Springfield, MA",44140,2024-06-09 16:11:52
3,2023-07-01,2023-09-30,90,zip code,2,9641,f,24176.0,,Virginia,VA,Condo/Co-op,3,575500.0,0.0,0.874593,575000.0,0.0,0.649928,322.228443,0.0,0.074795,321.948488,0.0,0.012498,1.0,0.0,0.0,,,,1.0,0.0,-0.5,,,,,,,7.0,0.0,-91.0,1.00087,0.0,0.039993,1.0,0.0,1.0,,,,,,,"Roanoke, VA",40220,2024-06-09 16:11:52
4,2017-03-01,2017-05-31,90,zip code,2,41207,f,98663.0,,Washington,WA,Townhouse,13,235000.0,0.0,0.032967,,,,169.797688,0.0,0.175073,,,,1.0,0.0,-0.75,,,,,,,,,,,,,2.0,0.0,-3.0,1.044909,0.0,0.042517,1.0,0.0,0.75,,,,,,,"Portland, OR",38900,2024-06-09 16:11:52


In [6]:
df.loc[df.region == 501]

Unnamed: 0,period_begin,period_end,period_duration,region_type,region_type_id,table_id,is_seasonally_adjusted,region,city,state,state_code,property_type,property_type_id,median_sale_price,median_sale_price_mom,median_sale_price_yoy,median_list_price,median_list_price_mom,median_list_price_yoy,median_ppsf,median_ppsf_mom,median_ppsf_yoy,median_list_ppsf,median_list_ppsf_mom,median_list_ppsf_yoy,homes_sold,homes_sold_mom,homes_sold_yoy,pending_sales,pending_sales_mom,pending_sales_yoy,new_listings,new_listings_mom,new_listings_yoy,inventory,inventory_mom,inventory_yoy,months_of_supply,months_of_supply_mom,months_of_supply_yoy,median_dom,median_dom_mom,median_dom_yoy,avg_sale_to_list,avg_sale_to_list_mom,avg_sale_to_list_yoy,sold_above_list,sold_above_list_mom,sold_above_list_yoy,price_drops,price_drops_mom,price_drops_yoy,off_market_in_two_weeks,off_market_in_two_weeks_mom,off_market_in_two_weeks_yoy,parent_metro_region,parent_metro_region_metro_code,last_updated
41144,2023-10-01,2023-12-31,90,zip code,2,1,f,501.0,,New York,NY,Single Family Residential,6,565000.0,,0.164948,540000.0,,-0.279039,,,,,,,1.0,,0.0,,,,1.0,,0.0,,,,,,,25.0,,-14.0,1.046296,,0.054476,1.0,,1.0,,,,,,,"Nassau County, NY",35004,2024-06-09 16:11:52
