### Data Dictionary

In [35]:
data_dict = pd.read_csv('../references/DataDictionary.csv', encoding='latin-1')
pd.set_option('max_colwidth', 1000)
display(data_dict)
pd.reset_option('max_colwidth')

Unnamed: 0,Variable,Definition
0,AgeOfInventory,"Each Wednesday, age of inventory is calculated as the median number of days all active listings as of that Wednesday have been current. These medians are then aggregated into the number reported by taking the median across weekly values."
1,DaysOnZillow,"The median days on market of homes sold within a given month, including foreclosure re-sales. The latest data is for one month prior to the current ZHVI (e.g., if the most current month for ZHVI data is January, the most current month for Days on Zillow data will be December)."
2,HomesSoldAsForeclosuresRatio,"The number of homes (per 10,000 homes) that were foreclosed upon in a given month. A foreclosure occurs when a homeowner loses their home to their lending institution or it is sold to a third party at an auction."
3,InventorySeasonallyAdjusted,A seasonally adjusted measure of the median of weekly snapshot of for-sale homes within a region for a given month
4,InventoryRaw,Median of weekly snapshot of for-sale homes within a region for a given month
5,MedianListingPricePerSqft,Median of list prices divided by the square footage of a home
6,MedianListingPrice,Median of the list price (or asking price) for homes listed on Zillow
7,MedianPctOfPriceReduction,Median of the percentage price reduction for homes with a price reduction during the month
8,MedianPriceCutDollar,Median of the price reduction for homes with a price reduction during the month
9,MedianRentalPricePerSqft,Median of the rental price per square foot of homes listed for rent on Zillow in a given region


### Import Packages

In [45]:
import pandas as pd 
import numpy as np
from zipfile import ZipFile
import json

import os

# Append the entire repo parent director so files therein can be accessed in notebook
import sys
import pathlib
sys.path.append(str(pathlib.Path().absolute().parent))

from src import helper

### Load Data

In [2]:
sorted(os.listdir('../data/raw'))

['City_time_series.csv.zip',
 'CountyCrossWalk_Zillow.csv',
 'County_time_series.csv.zip',
 'Metro_time_series.csv.zip',
 'Neighborhood_time_series.csv.zip',
 'State_time_series.csv.zip',
 'Zip_time_series.csv.zip',
 'all_available_metrics.json',
 'cities_crosswalk.csv.zip',
 'fields_per_level.json',
 'unzipped']

In [19]:
csv_dfs = {}

for filename in sorted(os.listdir('../data/raw/')):
    if '.csv.zip' in filename: 
        with ZipFile(f'../data/raw/{filename}', 'r') as a_zip:
            a_zip.extractall(f'../data/raw/unzipped/{filename[:-4]}')
            
    elif '.csv' in filename:
        csv_dfs[filename[:-4]] = pd.read_csv(f'../data/raw/{filename}')
             
for filename in sorted(os.listdir('../data/raw/unzipped/')):
    csv_dfs[filename[:-4]] = (pd.read_csv(f'../data/raw/unzipped/{filename}/{filename}', engine='python'))

In [20]:
csv_dfs.keys()

dict_keys(['CountyCrossWalk_Zillow', 'City_time_series', 'County_time_series', 'Metro_time_series', 'Neighborhood_time_series', 'State_time_series', 'Zip_time_series', 'cities_crosswalk'])

### Inspect Data
- CountyCrosswalk
- cities_crosswalk


- State_TS
- County_TS
- Metro_TS
- City_TS
- Neighborhood_TS
- Zip_TS

In [47]:
county_crosswalk = csv_dfs['CountyCrossWalk_Zillow']
city_ts = csv_dfs['City_time_series']
county_ts = csv_dfs['County_time_series']
metro_ts = csv_dfs['Metro_time_series']
neighborhood_ts = csv_dfs['Neighborhood_time_series']
state_ts = csv_dfs['State_time_series']
zip_ts = csv_dfs['Zip_time_series']
cities_crosswalk = csv_dfs['cities_crosswalk']

#### County Crosswalk

In [46]:
helper.high_level_inspect_df(county_crosswalk)

Unnamed: 0,CountyName,StateName,StateFIPS,CountyFIPS,MetroName_Zillow,CBSAName,CountyRegionID_Zillow,MetroRegionID_Zillow,FIPS,CBSACode
0,Pike,Pennsylvania,42,103,"New York, NY","New York-Newark-Jersey City, NY-NJ-PA",280,394913.0,42103,35620.0
1,Bronx,New York,36,5,"New York, NY","New York-Newark-Jersey City, NY-NJ-PA",401,394913.0,36005,35620.0
2,Essex,New Jersey,34,13,"New York, NY","New York-Newark-Jersey City, NY-NJ-PA",504,394913.0,34013,35620.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3144 entries, 0 to 3143
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   CountyName             3144 non-null   object 
 1   StateName              3144 non-null   object 
 2   StateFIPS              3144 non-null   int64  
 3   CountyFIPS             3144 non-null   int64  
 4   MetroName_Zillow       1804 non-null   object 
 5   CBSAName               1808 non-null   object 
 6   CountyRegionID_Zillow  3144 non-null   int64  
 7   MetroRegionID_Zillow   1808 non-null   float64
 8   FIPS                   3144 non-null   int64  
 9   CBSACode               1808 non-null   float64
dtypes: float64(2), int64(4), object(4)
memory usage: 245.8+ KB


None

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
StateFIPS,3144.0,30.293257,15.148534,1.0,18.0,29.0,45.0,56.0
CountyFIPS,3144.0,103.888995,108.263119,1.0,35.0,79.0,133.0,840.0
CountyRegionID_Zillow,3144.0,2115.513041,17602.486078,66.0,868.75,1669.5,2478.25,698720.0
MetroRegionID_Zillow,1808.0,407681.163717,66877.217822,394297.0,394520.75,394798.0,395030.0,753929.0
FIPS,3144.0,30397.145992,15167.003099,1001.0,18178.5,29178.0,45083.5,56045.0
CBSACode,1808.0,29644.441372,11478.65735,10100.0,19100.0,29800.0,39510.0,49820.0


CountyName : 1853 unique values.
StateName : 51 unique values.
MetroName_Zillow : 914 unique values.
CBSAName : 917 unique values.


#### Cities Crosswalk

In [48]:
helper.high_level_inspect_df(cities_crosswalk)

Unnamed: 0,Unique_City_ID,City,County,State
0,oak_grovechristianky,Oak Grove,Christian,KY
1,jarvisburgcurritucknc,Jarvisburg,Currituck,NC
2,mcminnvilleyamhillor,McMinnville,Yamhill,OR


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25341 entries, 0 to 25340
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Unique_City_ID  25341 non-null  object
 1   City            25341 non-null  object
 2   County          25341 non-null  object
 3   State           25341 non-null  object
dtypes: object(4)
memory usage: 792.0+ KB


None

Unnamed: 0,count,unique,top,freq
Unique_City_ID,25341,25341,maquonknoxil,1
City,25341,16609,Franklin,28
County,25341,1652,Jefferson,392
State,25341,51,PA,2692


Unique_City_ID : 25341 unique values.
City : 16609 unique values.
County : 1652 unique values.
State : 51 unique values.


#### State time series

In [49]:
helper.high_level_inspect_df(state_ts)

Unnamed: 0,Date,RegionName,DaysOnZillow_AllHomes,InventorySeasonallyAdjusted_AllHomes,InventoryRaw_AllHomes,MedianListingPricePerSqft_1Bedroom,MedianListingPricePerSqft_2Bedroom,MedianListingPricePerSqft_3Bedroom,MedianListingPricePerSqft_4Bedroom,MedianListingPricePerSqft_5BedroomOrMore,...,ZHVI_BottomTier,ZHVI_CondoCoop,ZHVI_MiddleTier,ZHVI_SingleFamilyResidence,ZHVI_TopTier,ZRI_AllHomes,ZRI_AllHomesPlusMultifamily,ZriPerSqft_AllHomes,Zri_MultiFamilyResidenceRental,Zri_SingleFamilyResidenceRental
0,1996-04-30,Alabama,,,,,,,,,...,45600.0,99500.0,79500.0,79000.0,140200.0,,,,,
1,1996-04-30,Arizona,,,,,,,,,...,67100.0,78900.0,103600.0,107500.0,168700.0,,,,,
2,1996-04-30,Arkansas,,,,,,,,,...,38400.0,70300.0,64400.0,64500.0,115200.0,,,,,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13212 entries, 0 to 13211
Data columns (total 82 columns):
 #   Column                                                         Non-Null Count  Dtype  
---  ------                                                         --------------  -----  
 0   Date                                                           13212 non-null  object 
 1   RegionName                                                     13212 non-null  object 
 2   DaysOnZillow_AllHomes                                          4845 non-null   float64
 3   InventorySeasonallyAdjusted_AllHomes                           4896 non-null   float64
 4   InventoryRaw_AllHomes                                          4896 non-null   float64
 5   MedianListingPricePerSqft_1Bedroom                             3586 non-null   float64
 6   MedianListingPricePerSqft_2Bedroom                             4534 non-null   float64
 7   MedianListingPricePerSqft_3Bedroom                        

None

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
DaysOnZillow_AllHomes,4845.0,110.117363,27.474195,49.250000,90.250000,108.500000,126.750000,251.625000
InventorySeasonallyAdjusted_AllHomes,4896.0,33292.662786,34926.704422,972.000000,9828.500000,21712.500000,47453.000000,260687.000000
InventoryRaw_AllHomes,4896.0,33299.013685,35014.163714,911.000000,9756.250000,21289.000000,46891.000000,268055.000000
MedianListingPricePerSqft_1Bedroom,3586.0,182.470536,99.809488,57.142857,125.688504,162.754210,202.629803,627.551020
MedianListingPricePerSqft_2Bedroom,4534.0,135.490657,74.250919,60.000000,92.158518,121.300723,152.242031,550.639135
...,...,...,...,...,...,...,...,...
ZRI_AllHomes,4254.0,1321.329807,371.006175,799.000000,1047.000000,1210.000000,1474.000000,2690.000000
ZRI_AllHomesPlusMultifamily,4336.0,1318.055581,368.783157,799.000000,1036.000000,1210.000000,1477.000000,2653.000000
ZriPerSqft_AllHomes,4336.0,0.929661,0.284986,0.560000,0.728000,0.862000,1.074000,2.294000
Zri_MultiFamilyResidenceRental,4336.0,1233.050277,369.426763,713.000000,959.750000,1126.000000,1399.500000,2606.000000


Date : 261 unique values.
RegionName : 52 unique values.


In [60]:
states = state_ts['RegionName'].unique().tolist()

for state in states:
    print(f'There are {state_ts[state_ts["RegionName"] == state].shape[0]} records in {state}.')

There are 261 records in Alabama.
There are 261 records in Arizona.
There are 261 records in Arkansas.
There are 261 records in California.
There are 261 records in Colorado.
There are 261 records in Connecticut.
There are 261 records in Delaware.
There are 261 records in Florida.
There are 261 records in Georgia.
There are 261 records in Hawaii.
There are 261 records in Idaho.
There are 261 records in Illinois.
There are 261 records in Indiana.
There are 261 records in Iowa.
There are 261 records in Kansas.
There are 261 records in Kentucky.
There are 261 records in Louisiana.
There are 261 records in Maine.
There are 261 records in Maryland.
There are 261 records in Massachusetts.
There are 261 records in Michigan.
There are 261 records in Minnesota.
There are 261 records in Mississippi.
There are 261 records in Missouri.
There are 261 records in Nebraska.
There are 261 records in Nevada.
There are 261 records in NewHampshire.
There are 261 records in NewJersey.
There are 261 records

#### County time series

In [50]:
helper.high_level_inspect_df(county_ts)

Unnamed: 0,Date,RegionName,DaysOnZillow_AllHomes,InventorySeasonallyAdjusted_AllHomes,InventoryRaw_AllHomes,MedianListingPricePerSqft_1Bedroom,MedianListingPricePerSqft_2Bedroom,MedianListingPricePerSqft_3Bedroom,MedianListingPricePerSqft_4Bedroom,MedianListingPricePerSqft_5BedroomOrMore,...,ZHVI_BottomTier,ZHVI_CondoCoop,ZHVI_MiddleTier,ZHVI_SingleFamilyResidence,ZHVI_TopTier,ZRI_AllHomes,ZRI_AllHomesPlusMultifamily,ZriPerSqft_AllHomes,Zri_MultiFamilyResidenceRental,Zri_SingleFamilyResidenceRental
0,1996-04-30,10001,,,,,,,,,...,65600.0,,85700.0,86000.0,129700.0,,,,,
1,1996-04-30,10003,,,,,,,,,...,81900.0,,116800.0,116900.0,181400.0,,,,,
2,1996-04-30,1003,,,,,,,,,...,70600.0,,110600.0,106800.0,192300.0,,,,,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 518791 entries, 0 to 518790
Data columns (total 82 columns):
 #   Column                                                         Non-Null Count   Dtype  
---  ------                                                         --------------   -----  
 0   Date                                                           518791 non-null  object 
 1   RegionName                                                     518791 non-null  int64  
 2   DaysOnZillow_AllHomes                                          43692 non-null   float64
 3   InventorySeasonallyAdjusted_AllHomes                           177024 non-null  float64
 4   InventoryRaw_AllHomes                                          177024 non-null  float64
 5   MedianListingPricePerSqft_1Bedroom                             9432 non-null    float64
 6   MedianListingPricePerSqft_2Bedroom                             66693 non-null   float64
 7   MedianListingPricePerSqft_3Bedroom             

None

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
RegionName,518791.0,30740.207658,15197.121398,1001.000000,18087.00000,29209.000000,45075.000000,56045.000000
DaysOnZillow_AllHomes,43692.0,112.776313,33.901225,10.000000,89.25000,110.125000,131.750000,827.000000
InventorySeasonallyAdjusted_AllHomes,177024.0,853.768884,1830.864948,6.000000,131.00000,320.000000,813.000000,39750.000000
InventoryRaw_AllHomes,177024.0,853.934393,1835.198584,6.000000,130.00000,319.000000,810.000000,41502.000000
MedianListingPricePerSqft_1Bedroom,9432.0,246.774545,185.323088,42.471591,137.48622,187.849166,293.333333,1768.488746
...,...,...,...,...,...,...,...,...
ZRI_AllHomes,150077.0,1163.598939,347.136641,567.000000,938.00000,1096.000000,1278.000000,4445.000000
ZRI_AllHomesPlusMultifamily,151251.0,1157.171807,339.960046,559.000000,936.00000,1091.000000,1271.000000,4548.000000
ZriPerSqft_AllHomes,139194.0,0.790006,0.264212,0.352000,0.64200,0.738000,0.864000,5.212000
Zri_MultiFamilyResidenceRental,97008.0,1083.978713,341.784842,423.000000,856.00000,1013.000000,1219.000000,4796.000000


Date : 261 unique values.


#### Metro time series

In [54]:
helper.high_level_inspect_df(metro_ts)

Unnamed: 0,Date,RegionName,AgeOfInventory,DaysOnZillow_AllHomes,InventorySeasonallyAdjusted_AllHomes,InventoryRaw_AllHomes,InventorySeasonallyAdjusted_BottomTier,InventorySeasonallyAdjusted_MiddleTier,InventorySeasonallyAdjusted_TopTier,MedianListingPricePerSqft_1Bedroom,...,ZHVI_BottomTier,ZHVI_CondoCoop,ZHVI_MiddleTier,ZHVI_SingleFamilyResidence,ZHVI_TopTier,ZRI_AllHomes,ZRI_AllHomesPlusMultifamily,ZriPerSqft_AllHomes,Zri_MultiFamilyResidenceRental,Zri_SingleFamilyResidenceRental
0,1996-04-30,10180,,,,,,,,,...,,,,,,,,,,
1,1996-04-30,10220,,,,,,,,,...,,,28600.0,28600.0,,,,,,
2,1996-04-30,10300,,,,,,,,,...,53000.0,,78500.0,78600.0,124800.0,,,,,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211182 entries, 0 to 211181
Data columns (total 95 columns):
 #   Column                                                         Non-Null Count   Dtype  
---  ------                                                         --------------   -----  
 0   Date                                                           211182 non-null  object 
 1   RegionName                                                     211182 non-null  object 
 2   AgeOfInventory                                                 24696 non-null   float64
 3   DaysOnZillow_AllHomes                                          24982 non-null   float64
 4   InventorySeasonallyAdjusted_AllHomes                           68352 non-null   float64
 5   InventoryRaw_AllHomes                                          68352 non-null   float64
 6   InventorySeasonallyAdjusted_BottomTier                         39467 non-null   float64
 7   InventorySeasonallyAdjusted_MiddleTier         

None

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
AgeOfInventory,24696.0,90.707766,28.587343,15.000,70.00,88.000,109.000,221.000
DaysOnZillow_AllHomes,24982.0,112.709322,31.178244,25.000,91.00,111.125,131.250,827.000
InventorySeasonallyAdjusted_AllHomes,68352.0,4508.221983,65064.674744,19.000,272.00,618.000,1553.000,2316024.000
InventoryRaw_AllHomes,68352.0,4509.104269,65138.626469,18.000,271.00,615.000,1546.000,2444011.000
InventorySeasonallyAdjusted_BottomTier,39467.0,1764.748803,20065.256544,8.000,119.00,246.000,657.000,586866.000
...,...,...,...,...,...,...,...,...
ZRI_AllHomes,58028.0,1135.414645,315.022427,567.000,935.00,1080.000,1240.000,3716.000
ZRI_AllHomesPlusMultifamily,58221.0,1128.008348,307.588897,559.000,933.00,1073.000,1234.000,3674.000
ZriPerSqft_AllHomes,56812.0,0.775399,0.212946,0.392,0.64,0.730,0.854,2.618
Zri_MultiFamilyResidenceRental,46731.0,1019.547859,287.047627,498.000,835.00,959.000,1130.000,3153.000


Date : 261 unique values.
RegionName : 911 unique values.


#### City time series

In [51]:
helper.high_level_inspect_df(city_ts)

Unnamed: 0,Date,RegionName,InventorySeasonallyAdjusted_AllHomes,InventoryRaw_AllHomes,MedianListingPricePerSqft_1Bedroom,MedianListingPricePerSqft_2Bedroom,MedianListingPricePerSqft_3Bedroom,MedianListingPricePerSqft_4Bedroom,MedianListingPricePerSqft_5BedroomOrMore,MedianListingPricePerSqft_AllHomes,...,ZHVI_BottomTier,ZHVI_CondoCoop,ZHVI_MiddleTier,ZHVI_SingleFamilyResidence,ZHVI_TopTier,ZRI_AllHomes,ZRI_AllHomesPlusMultifamily,ZriPerSqft_AllHomes,Zri_MultiFamilyResidenceRental,Zri_SingleFamilyResidenceRental
0,1996-04-30,abbottstownadamspa,,,,,,,,,...,,,,,108700.0,,,,,
1,1996-04-30,aberdeenbinghamid,,,,,,,,,...,,,,,168400.0,,,,,
2,1996-04-30,aberdeenharfordmd,,,,,,,,,...,81300.0,137900.0,109600.0,108600.0,147900.0,,,,,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3762566 entries, 0 to 3762565
Data columns (total 81 columns):
 #   Column                                                         Dtype  
---  ------                                                         -----  
 0   Date                                                           object 
 1   RegionName                                                     object 
 2   InventorySeasonallyAdjusted_AllHomes                           float64
 3   InventoryRaw_AllHomes                                          float64
 4   MedianListingPricePerSqft_1Bedroom                             float64
 5   MedianListingPricePerSqft_2Bedroom                             float64
 6   MedianListingPricePerSqft_3Bedroom                             float64
 7   MedianListingPricePerSqft_4Bedroom                             float64
 8   MedianListingPricePerSqft_5BedroomOrMore                       float64
 9   MedianListingPricePerSqft_AllHomes            

None

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
InventorySeasonallyAdjusted_AllHomes,771744.0,163.438201,451.979813,3.000000,37.000000,72.000000,154.000000,29395.000000
InventoryRaw_AllHomes,771744.0,163.469371,452.845056,2.000000,37.000000,72.000000,154.000000,30031.000000
MedianListingPricePerSqft_1Bedroom,7670.0,260.912038,169.798120,26.422764,152.631579,211.220755,326.499714,1129.518072
MedianListingPricePerSqft_2Bedroom,74574.0,167.764079,118.391115,18.564356,95.744681,133.341651,195.039238,1313.762626
MedianListingPricePerSqft_3Bedroom,250633.0,145.076803,93.495189,19.561061,93.259047,120.592248,163.556465,2071.403891
...,...,...,...,...,...,...,...,...
ZRI_AllHomes,1098167.0,1402.930083,750.948172,470.000000,997.000000,1227.000000,1555.000000,23325.000000
ZRI_AllHomesPlusMultifamily,1100850.0,1391.670705,728.289036,470.000000,993.000000,1222.000000,1545.000000,22846.000000
ZriPerSqft_AllHomes,1009397.0,0.910898,0.336117,0.344000,0.698000,0.832000,1.020000,6.496000
Zri_MultiFamilyResidenceRental,450219.0,1322.360111,528.027246,465.000000,963.000000,1220.000000,1529.000000,15852.000000


Date : 261 unique values.
RegionName : 16636 unique values.


#### Neighborhood time series

In [52]:
helper.high_level_inspect_df(neighborhood_ts)

Unnamed: 0,Date,RegionName,InventorySeasonallyAdjusted_AllHomes,InventoryRaw_AllHomes,MedianListingPricePerSqft_1Bedroom,MedianListingPricePerSqft_2Bedroom,MedianListingPricePerSqft_3Bedroom,MedianListingPricePerSqft_4Bedroom,MedianListingPricePerSqft_5BedroomOrMore,MedianListingPricePerSqft_AllHomes,...,ZHVI_BottomTier,ZHVI_CondoCoop,ZHVI_MiddleTier,ZHVI_SingleFamilyResidence,ZHVI_TopTier,ZRI_AllHomes,ZRI_AllHomesPlusMultifamily,ZriPerSqft_AllHomes,Zri_MultiFamilyResidenceRental,Zri_SingleFamilyResidenceRental
0,1996-04-30,10007,,,,,,,,,...,,,,,147300.0,,,,,
1,1996-04-30,10329,,,,,,,,,...,231300.0,,,,,,,,,
2,1996-04-30,104898,,,,,,,,,...,50500.0,,72500.0,72500.0,106200.0,,,,,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1695038 entries, 0 to 1695037
Data columns (total 76 columns):
 #   Column                                                         Dtype  
---  ------                                                         -----  
 0   Date                                                           object 
 1   RegionName                                                     object 
 2   InventorySeasonallyAdjusted_AllHomes                           float64
 3   InventoryRaw_AllHomes                                          float64
 4   MedianListingPricePerSqft_1Bedroom                             float64
 5   MedianListingPricePerSqft_2Bedroom                             float64
 6   MedianListingPricePerSqft_3Bedroom                             float64
 7   MedianListingPricePerSqft_4Bedroom                             float64
 8   MedianListingPricePerSqft_5BedroomOrMore                       float64
 9   MedianListingPricePerSqft_AllHomes            

None

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
InventorySeasonallyAdjusted_AllHomes,201408.0,78.207301,116.279778,3.000000,28.000000,46.000000,84.000000,2514.000000
InventoryRaw_AllHomes,201408.0,78.220940,116.704730,3.000000,28.000000,46.000000,84.000000,2617.000000
MedianListingPricePerSqft_1Bedroom,2780.0,485.845341,448.100647,68.869835,217.983871,319.072749,517.241379,2364.491150
MedianListingPricePerSqft_2Bedroom,11473.0,353.179001,416.904876,34.556230,133.858268,233.004670,390.674821,3129.560400
MedianListingPricePerSqft_3Bedroom,34393.0,195.918099,260.892989,24.088094,97.716438,133.602869,196.592398,4617.817244
...,...,...,...,...,...,...,...,...
ZRI_AllHomes,554742.0,1609.351226,1007.138431,407.000000,1035.000000,1347.000000,1885.000000,35922.000000
ZRI_AllHomesPlusMultifamily,554828.0,1581.790773,974.824301,409.000000,1022.000000,1332.000000,1847.000000,34955.000000
ZriPerSqft_AllHomes,546398.0,1.129954,0.592957,0.212000,0.742000,0.944000,1.334000,9.062000
Zri_MultiFamilyResidenceRental,335732.0,1431.808993,689.340929,397.000000,935.000000,1256.000000,1739.000000,7765.000000


Date : 261 unique values.
RegionName : 7449 unique values.


#### Zip time series

In [53]:
helper.high_level_inspect_df(zip_ts)

Unnamed: 0,Date,RegionName,InventorySeasonallyAdjusted_AllHomes,InventoryRaw_AllHomes,MedianListingPricePerSqft_1Bedroom,MedianListingPricePerSqft_2Bedroom,MedianListingPricePerSqft_3Bedroom,MedianListingPricePerSqft_4Bedroom,MedianListingPricePerSqft_5BedroomOrMore,MedianListingPricePerSqft_AllHomes,...,ZHVI_BottomTier,ZHVI_CondoCoop,ZHVI_MiddleTier,ZHVI_SingleFamilyResidence,ZHVI_TopTier,ZRI_AllHomes,ZRI_AllHomesPlusMultifamily,ZriPerSqft_AllHomes,Zri_MultiFamilyResidenceRental,Zri_SingleFamilyResidenceRental
0,1996-04-30,1001,,,,,,,,,...,68700.0,67000.0,101900.0,107000.0,124800.0,,,,,
1,1996-04-30,1002,,,,,,,,,...,97000.0,81300.0,135200.0,145800.0,213600.0,,,,,
2,1996-04-30,1005,,,,,,,,,...,85400.0,,101200.0,101200.0,125700.0,,,,,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4383885 entries, 0 to 4383884
Data columns (total 76 columns):
 #   Column                                                         Dtype  
---  ------                                                         -----  
 0   Date                                                           object 
 1   RegionName                                                     int64  
 2   InventorySeasonallyAdjusted_AllHomes                           float64
 3   InventoryRaw_AllHomes                                          float64
 4   MedianListingPricePerSqft_1Bedroom                             float64
 5   MedianListingPricePerSqft_2Bedroom                             float64
 6   MedianListingPricePerSqft_3Bedroom                             float64
 7   MedianListingPricePerSqft_4Bedroom                             float64
 8   MedianListingPricePerSqft_5BedroomOrMore                       float64
 9   MedianListingPricePerSqft_AllHomes            

None

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
RegionName,4383885.0,46837.261770,28833.342816,745.000000,21830.000000,44669.000000,72718.000000,99901.000000
InventorySeasonallyAdjusted_AllHomes,1051104.0,123.049029,117.287520,3.000000,46.000000,88.000000,160.000000,2560.000000
InventoryRaw_AllHomes,1051104.0,123.072143,118.017261,2.000000,46.000000,88.000000,160.000000,2639.000000
MedianListingPricePerSqft_1Bedroom,8194.0,322.914946,301.027216,24.793388,155.056896,230.835830,380.292972,2128.129602
MedianListingPricePerSqft_2Bedroom,86554.0,188.769142,186.646266,21.045918,97.826937,139.332358,215.616002,2746.913580
...,...,...,...,...,...,...,...,...
ZRI_AllHomes,1337362.0,1429.687799,719.716319,411.000000,1006.000000,1246.000000,1613.000000,18375.000000
ZRI_AllHomesPlusMultifamily,1339353.0,1414.117267,691.326536,411.000000,1000.000000,1238.000000,1598.000000,17808.000000
ZriPerSqft_AllHomes,1253969.0,0.948460,0.424044,0.276000,0.698000,0.834000,1.052000,6.496000
Zri_MultiFamilyResidenceRental,723542.0,1325.079733,568.827402,414.000000,933.000000,1194.000000,1549.000000,15891.000000


Date : 261 unique values.
