# Imports

In [29]:
# Import libraries
import pandas as pd
import numpy as np
from pathlib import Path

In [30]:
# Set data paths
PROJECT_ROOT = Path().resolve().parent
DATA_RAW = PROJECT_ROOT / "data" / "raw"

In [31]:
# Import datasets
zhvi_path = DATA_RAW / "County_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv"
df = pd.read_csv(zhvi_path)

# Review Dataframe

In [32]:
df.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,2000-01-31,...,2025-04-30,2025-05-31,2025-06-30,2025-07-31,2025-08-31,2025-09-30,2025-10-31,2025-11-30,2025-12-31,2026-01-31
0,3101,0,Los Angeles County,county,CA,CA,"Los Angeles-Long Beach-Anaheim, CA",6,37,207378.139086,...,867632.681133,862661.764204,858520.151316,856622.139882,856586.892712,858660.997572,861457.187977,864644.681242,868121.048715,869779.688397
1,139,1,Cook County,county,IL,IL,"Chicago-Naperville-Elgin, IL-IN-WI",17,31,145233.876893,...,305530.329774,305517.804006,305528.69637,306245.743513,307136.890573,308465.410472,309734.510842,311204.053706,312760.209619,314157.004441
2,1090,2,Harris County,county,TX,TX,"Houston-The Woodlands-Sugar Land, TX",48,201,110482.270951,...,281842.195255,280848.87355,279813.187274,278960.376724,278213.144299,277666.065467,277216.424846,276987.919005,277033.758614,276889.152202
3,2402,3,Maricopa County,county,AZ,AZ,"Phoenix-Mesa-Chandler, AZ",4,13,144511.068258,...,463419.137829,461388.955195,459510.62871,457492.403994,455839.626275,454769.654599,454342.106269,454563.676008,455339.96157,456184.043231
4,2841,4,San Diego County,county,CA,CA,"San Diego-Chula Vista-Carlsbad, CA",6,73,210703.817869,...,919396.983969,914664.101695,909523.90246,905227.788816,901869.469439,900047.881347,900016.924724,900997.603262,903114.130925,904312.219896


In [33]:
df.shape

(3073, 322)

## Check for completeness and duplicates

In [34]:
df["RegionType"].value_counts(dropna=False)

RegionType
county    3073
Name: count, dtype: int64

In [35]:
df[["State", "StateName"]].drop_duplicates()

Unnamed: 0,State,StateName
0,CA,CA
1,IL,IL
2,TX,TX
3,AZ,AZ
6,NY,NY
7,FL,FL
11,WA,WA
12,NV,NV
18,MI,MI
21,MA,MA


# Filter to Florida counties

In [36]:
df_fl=df[df["State"] == "FL"].copy()

In [37]:
df_fl.shape

(67, 322)

In [38]:
df_fl["RegionName"].head()

7       Miami-Dade County
17         Broward County
25      Palm Beach County
27    Hillsborough County
28          Orange County
Name: RegionName, dtype: object

# Identify the date columns

In [39]:
# Reference column names
df_fl.columns

Index(['RegionID', 'SizeRank', 'RegionName', 'RegionType', 'StateName',
       'State', 'Metro', 'StateCodeFIPS', 'MunicipalCodeFIPS', '2000-01-31',
       ...
       '2025-04-30', '2025-05-31', '2025-06-30', '2025-07-31', '2025-08-31',
       '2025-09-30', '2025-10-31', '2025-11-30', '2025-12-31', '2026-01-31'],
      dtype='object', length=322)

In [40]:
# Get all non-date columns 
id_cols = ["RegionID", "SizeRank", "RegionName", "RegionType", "StateName", "State", "Metro", "StateCodeFIPS", "MunicipalCodeFIPS"]

In [41]:
# Get all other columns (date columns)
date_cols = []

for c in df_fl.columns:
    if c not in id_cols:
        date_cols.append(c)

# Reshape wide format to long format

In [42]:
# We want dates in rows so that time becomes a variable

In [43]:
# melt takes many date columns and stacks them vertically
zhvi_long = df_fl.melt(
    id_vars=id_cols,
    value_vars=date_cols,
    var_name="date",
    value_name="zhvi"
)

# Convert date column from string to datetime object

In [44]:
zhvi_long["date"] = pd.to_datetime(zhvi_long["date"])

In [45]:
# Verify change
zhvi_long.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20971 entries, 0 to 20970
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   RegionID           20971 non-null  int64         
 1   SizeRank           20971 non-null  int64         
 2   RegionName         20971 non-null  object        
 3   RegionType         20971 non-null  object        
 4   StateName          20971 non-null  object        
 5   State              20971 non-null  object        
 6   Metro              15963 non-null  object        
 7   StateCodeFIPS      20971 non-null  int64         
 8   MunicipalCodeFIPS  20971 non-null  int64         
 9   date               20971 non-null  datetime64[ns]
 10  zhvi               19709 non-null  float64       
dtypes: datetime64[ns](1), float64(1), int64(4), object(5)
memory usage: 1.8+ MB


# Sort Data

In [46]:
zhvi_long = zhvi_long.sort_values(["RegionName", "date"])

In [47]:
zhvi_long.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,date,zhvi
22,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-01-31,105280.379237
89,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-02-29,105518.438659
156,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-03-31,105833.697224
223,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-04-30,106220.608362
290,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-05-31,106499.65454


# Create a year column (for Census dataset merge)

In [48]:
zhvi_long["year"] = zhvi_long["date"].dt.year

In [49]:
zhvi_long.head(20)

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,date,zhvi,year
22,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-01-31,105280.379237,2000
89,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-02-29,105518.438659,2000
156,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-03-31,105833.697224,2000
223,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-04-30,106220.608362,2000
290,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-05-31,106499.65454,2000
357,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-06-30,106592.765411,2000
424,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-07-31,106842.281871,2000
491,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-08-31,107241.186029,2000
558,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-09-30,107775.546358,2000
625,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2000-10-31,108326.137802,2000


# Filter to 2020+

In [50]:
zhvi_recent = zhvi_long[zhvi_long["year"] >= 2020].copy()

# Check for duplicates and missing values

In [51]:
zhvi_recent["zhvi"].isna().sum()

np.int64(0)

In [52]:
zhvi_recent.duplicated(subset=["RegionName", "date"]).sum()

np.int64(0)

In [53]:
# Sanity check

In [54]:
zhvi_recent

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS,date,zhvi,year
16102,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2020-01-31,205945.224353,2020
16169,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2020-02-29,207639.389278,2020
16236,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2020-03-31,209186.222049,2020
16303,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2020-04-30,210416.781373,2020
16370,1509,251,Alachua County,county,FL,FL,"Gainesville, FL",12,1,2020-05-31,211241.441421,2020
...,...,...,...,...,...,...,...,...,...,...,...,...
20688,3050,1653,Washington County,county,FL,FL,,12,133,2025-09-30,212851.950315,2025
20755,3050,1653,Washington County,county,FL,FL,,12,133,2025-10-31,211852.240228,2025
20822,3050,1653,Washington County,county,FL,FL,,12,133,2025-11-30,210730.593741,2025
20889,3050,1653,Washington County,county,FL,FL,,12,133,2025-12-31,208866.861365,2025


# Exports

In [55]:
zhvi_recent.to_csv("../data/processed/zhvi_fl_long.csv", index=False)