In [1]:
from hdbscan import HDBSCAN
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.metrics import silhouette_score
from load_data import load
from clean_data import clean

In [2]:
years = [x for x in range(2019, 2024)]
df = load(zhvi_years=years, zori_years=years, sale_listings_years=years, sales_years=years, days_on_market_years=years,
          new_cons_years=years)

In [3]:
zip_df = clean(df, region_type='zip')
metro_df = clean(df, region_type='metro')
county_df = clean(df, region_type='county')

In [4]:
county_df.head()

Unnamed: 0,State,CountyName,ZHVI 2019,ZHVI 2020,ZHVI 2021,ZHVI 2022,ZHVI 2023,ZHVF 2024-02-29,ZHVF 2024-04-30,ZHVF 2025-01-31,...,3yr pct change Sales,1yr pct change Sale Listings,2yr pct change Sale Listings,3yr pct change Sale Listings,1yr pct change Days on Market,2yr pct change Days on Market,3yr pct change Days on Market,1yr pct change New Construction,2yr pct change New Construction,3yr pct change New Construction
0,SC,Abbeville County,105677.886596,115845.025873,140619.655294,162451.845663,169776.474991,,,,...,,0.035337,-0.021664,-0.314002,0.557483,0.25344,-0.399266,0.001354,0.173734,0.730914
1,VA,Accomack County,150579.892495,162239.538138,187085.016104,201765.598498,220096.457741,,,,...,,-0.111236,-0.215472,-0.419259,0.167708,-0.021481,-0.518827,-0.005541,-0.059307,1.007198
2,ID,Ada County,337018.459037,380146.143065,517107.331426,572448.199052,522272.92006,0.166667,1.326667,6.013333,...,-0.440906,-0.144142,0.070952,-0.25419,0.515759,1.507109,0.546784,-0.100299,-0.211189,0.130827
3,IA,Adair County,126379.939725,134099.52917,152937.43598,174119.483633,178424.932857,0.3,1.7,2.1,...,,-0.122256,-0.137794,-0.292052,0.233359,0.024885,-0.442576,-0.208611,-0.29266,-0.107234
4,KY,Adair County,78161.88984,84777.070068,101102.74635,120672.335816,123952.445044,,,,...,-0.309034,-0.05387,0.010147,-0.108652,0.408762,0.50138,-0.308473,0.017627,0.648889,0.455426


In [5]:
metro_df.head()

Unnamed: 0,Metro,ZHVI 2019,ZHVI 2020,ZHVI 2021,ZHVI 2022,ZHVI 2023,ZHVF 2024-02-29,ZHVF 2024-04-30,ZHVF 2025-01-31,ZORI 2019,...,3yr pct change Sales,1yr pct change Sale Listings,2yr pct change Sale Listings,3yr pct change Sale Listings,1yr pct change Days on Market,2yr pct change Days on Market,3yr pct change Days on Market,1yr pct change New Construction,2yr pct change New Construction,3yr pct change New Construction
0,"Akron, OH",153220.647431,162116.686062,184312.604016,202026.064127,210599.278112,0.418182,1.334091,3.675,825.888428,...,-0.191732,-0.125733,-0.166561,-0.40654,0.227848,0.069853,-0.498276,-0.57582,-0.191406,0.101064
1,"Albany-Schenectady-Troy, NY",198562.402767,209114.939344,236677.350008,261505.064555,273536.793794,0.327778,1.209259,2.793519,1205.374692,...,0.111924,-0.210396,-0.238854,-0.115167,0.079646,0.073314,-0.462555,-0.386892,-0.578488,0.102662
2,"Albuquerque, NM",232045.227078,252210.767909,289339.415054,330667.67702,332603.579518,0.042105,0.892105,4.502632,927.59025,...,-0.406599,-0.032004,-0.136764,-0.332168,0.471963,0.451613,-0.237288,0.27356,0.073951,0.23949
3,"Allentown-Bethlehem-Easton, PA-NJ",197984.948776,211334.60834,244413.855307,275745.683202,290579.795955,0.305263,1.401754,4.745614,1502.667335,...,-0.220318,-0.183163,-0.292251,-0.429299,0.183206,0.087719,-0.645714,-0.167598,-0.09697,0.40566
4,"Bakersfield, CA",197040.032884,209423.737718,245103.109015,281410.581969,284636.817264,0.2175,0.9775,3.97,1270.493917,...,-0.293087,-0.125573,-0.021591,-0.141293,0.438806,0.746377,-0.357333,-0.245325,-0.035162,0.573394


In [6]:
print('Zips: ' + str(len(zip_df)))
print('Counties: ' + str(len(county_df)))
print('Metro: ' + str(len(metro_df)))

Zips: 124315
Counties: 1623
Metro: 83
