## **Zillow Housing Data Statistics**

### **Data & Dependencies load**

In [1]:
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

pd.options.display.max_rows = 100
pd.options.display.max_columns = None

In [2]:
# Data Load

zhvi = pd.read_pickle('../data/zhvi.pkl')
zori = pd.read_pickle('../data/zori.pkl')

In [3]:
# Add year, month, week number, day number, and day name
zhvi['Year'] = zhvi['Date'].dt.year
zhvi['Month'] = zhvi['Date'].dt.month
zhvi['Week'] = zhvi['Date'].dt.isocalendar().week
zhvi['DayNumber'] = zhvi['Date'].dt.day
zhvi['DayName'] = zhvi['Date'].dt.day_name()

zori['Year'] = zori['Date'].dt.year
zori['Month'] = zori['Date'].dt.month
zori['Week'] = zori['Date'].dt.isocalendar().week
zori['DayNumber'] = zori['Date'].dt.day
zori['DayName'] = zori['Date'].dt.day_name()

In [4]:
# ZHVI Head
zhvi.head()

Unnamed: 0,Region,State,Date,ZHVI,Type,Year,Month,Week,DayNumber,DayName
1,"New York, NY",NY,2000-01-31,190767.9,Condo,2000,1,5,31,Monday
2,"Los Angeles, CA",CA,2000-01-31,177240.45,Condo,2000,1,5,31,Monday
3,"Chicago, IL",IL,2000-01-31,126942.5,Condo,2000,1,5,31,Monday
4,"Dallas, TX",TX,2000-01-31,88627.72,Condo,2000,1,5,31,Monday
5,"Houston, TX",TX,2000-01-31,83915.41,Condo,2000,1,5,31,Monday


In [5]:
# ZORI Head
zori.head()

Unnamed: 0,Region,State,Date,ZORI,Type,Year,Month,Week,DayNumber,DayName
1,"New York, NY",NY,2015-01-31,2440.7,Single Family,2015,1,5,31,Saturday
2,"Los Angeles, CA",CA,2015-01-31,2577.07,Single Family,2015,1,5,31,Saturday
3,"Chicago, IL",IL,2015-01-31,1527.58,Single Family,2015,1,5,31,Saturday
4,"Dallas, TX",TX,2015-01-31,1372.17,Single Family,2015,1,5,31,Saturday
5,"Houston, TX",TX,2015-01-31,1496.71,Single Family,2015,1,5,31,Saturday


In [6]:
# Insights

# ZHVI
print("================== ZHVI ==================\n")
print(f" ==> Unique values in State column are {zhvi['State'].unique()}\n")
print(f" ==> Unique values in Type column are {zhvi['Type'].unique()}\n")

# ZORI
print("================== ZORI ==================\n")
print(f" ==> Unique values in State column are {zori['State'].unique()}\n")
print(f" ==> Unique values in Type column are {zori['Type'].unique()}\n")


 ==> Unique values in State column are ['NY' 'CA' 'IL' 'TX' 'VA' 'PA' 'FL' 'GA' 'MA' 'AZ' 'MI' 'WA' 'MN' 'CO'
 'MD' 'MO' 'NC' 'OR' 'OH' 'NV' 'TN' 'RI' 'WI' 'OK' 'KY' 'LA' 'UT' 'CT'
 'HI' 'NE' 'SC' 'AR' 'IA' 'ME' 'DE' 'AK' 'NJ' 'AL' 'WV' 'KS' 'VT' 'ID'
 'NH' 'IN' 'NM' 'WY' 'MT' 'SD' 'MS' 'ND']

 ==> Unique values in Type column are ['Condo' 'One Bedroom' 'Single Family' 'Two Bedroom' 'Three Bedroom']


 ==> Unique values in State column are ['NY' 'CA' 'IL' 'TX' 'VA' 'PA' 'FL' 'GA' 'MA' 'AZ' 'MI' 'WA' 'MN' 'CO'
 'MD' 'MO' 'NC' 'OR' 'OH' 'NV' 'IN' 'TN' 'RI' 'WI' 'OK' 'KY' 'LA' 'UT'
 'CT' 'AL' 'HI' 'NE' 'SC' 'NM' 'ID' 'AR' 'IA' 'KS' 'MS' 'DE' 'AK' 'NJ'
 'MT' 'WV' 'SD' 'WY' 'ME' 'NH' 'VT' 'ND']

 ==> Unique values in Type column are ['Single Family' 'Multi Family']



### **Exploratory Data Analysis**

#### **1. Line Plot of ZHVI and ZORI over Time**

In [7]:
import plotly.express as px

# Line plot for ZHVI
# fig_zhvi = px.line(zhvi, x='Date', y='ZHVI', title='ZHVI over Time',
#                    labels={'Date': 'Date', 'ZHVI': 'ZHVI Value'}, height=600)
# fig_zhvi.update_xaxes(rangeslider_visible=True)
# fig_zhvi.update_layout(title_x=0.5, xaxis_title='Date', yaxis_title='ZHVI Value')
# fig_zhvi.update_layout(
#     margin=dict(l=20, r=20, t=20, b=20),
# )
# fig_zhvi.show()

In [8]:
# Line plot for ZORI
# fig_zori = px.line(zori, x='Date', y='ZORI', title='ZORI over Time',
#                    labels={'Date': 'Date', 'ZORI': 'ZORI Value'}, height=600)
# fig_zori.update_xaxes(rangeslider_visible=True)
# fig_zori.update_layout(title_x=0.5, xaxis_title='Date', yaxis_title='ZORI Value')
# fig_zori.update_layout(
#     margin=dict(l=20, r=20, t=20, b=20),
# )
# fig_zori.show()

#### **2. Violin Plot of ZHVI and ZORI by State**

In [9]:
# Violin plot for ZHVI by State
# fig_zhvi_state = px.violin(sample_data_state, x='State', y='ZHVI', title='ZHVI Distribution by State',
#                            box=True, points='outliers', height=600)
# fig_zhvi_state.update_layout(title_x=0.5, xaxis_title='State', yaxis_title='ZHVI Value')

# fig_zhvi_state.update_layout(
#     margin=dict(l=20, r=20, t=20, b=20),
# )

In [10]:
# Violin plot for ZORI by State
# fig_zori_state = px.violin(sample_data_state, x='State', y='ZORI', title='ZORI Distribution by State',
#                            box=True, points='outliers', height=600)
# fig_zori_state.update_layout(title_x=0.5, xaxis_title='State', yaxis_title='ZORI Value')

# fig_zori_state.update_layout(
#     margin=dict(l=20, r=20, t=20, b=20),
# )

#### **3. Bar Chart of Average ZHVI and ZORI by State**

In [11]:
# Calculate average ZHVI and ZORI by State
# avg_zhvi_state = sample_data_state.groupby('State')['ZHVI'].mean().reset_index()
# avg_zori_state = sample_data_state.groupby('State')['ZORI'].mean().reset_index()

# Bar chart for average ZHVI by State
# fig_avg_zhvi_state = px.bar(avg_zhvi_state, x='State', y='ZHVI', title='Average ZHVI by State',
#                             height=600)
# fig_avg_zhvi_state.update_layout(title_x=0.5, xaxis_title='State', yaxis_title='Average ZHVI Value')

In [12]:
# Bar chart for average ZORI by State
# fig_avg_zori_state = px.bar(avg_zori_state, x='State', y='ZORI', title='Average ZORI by State',
#                             height=600)
# fig_avg_zori_state.update_layout(title_x=0.5, xaxis_title='State', yaxis_title='Average ZORI Value')

#### **4. Scatter Plot of ZHVI v/s ZORI**

In [13]:
# Scatter plot of ZHVI vs. ZORI
# fig_scatter = px.scatter(sample_data_state, x='ZHVI', y='ZORI', title='ZHVI v/s ZORI',
#                          height=600, trendline='ols')

# fig_scatter.update_layout(title_x=0.5, xaxis_title='ZHVI Value', yaxis_title='ZORI Value')

#### **5. Box Plot of ZHVI and ZORI by Type**

In [14]:
# Box plot for ZHVI by Type
# fig_zhvi_type = px.box(zhvi, x='Type', y='ZHVI', title='ZHVI Distribution by Type',
#                        height=600)
# fig_zhvi_type.update_layout(title_x=0.5, xaxis_title='Type', yaxis_title='ZHVI Value')

In [15]:
# Box plot for ZORI by Type
# fig_zori_type = px.box(zori, x='Type', y='ZORI', title='ZORI Distribution by Type',
#                        height=600)
# fig_zori_type.update_layout(title_x=0.5, xaxis_title='Type', yaxis_title='ZORI Value')

#### **6. Heatmap of Average ZHVI or ZORI by Year and Month**

In [16]:
# Calculate average ZHVI by Year and Month
# avg_zhvi_ym = zhvi.groupby([pd.Grouper(key='Date', freq='M')])['ZHVI'].mean().reset_index()
# avg_zhvi_ym['Year'] = avg_zhvi_ym['Date'].dt.year
# avg_zhvi_ym['Month'] = avg_zhvi_ym['Date'].dt.month

# # Heatmap for average ZHVI by Year and Month
# fig_heatmap_zhvi = px.densitity_mapbox(avg_zhvi_ym, z='ZHVI', lat='Year', lon='Month',
#                                        radius=10, center=dict(lat=0, lon=180), zoom=0,
#                                        mapbox_style='stamen-toner', title='Average ZHVI by Year and Month')
# fig_heatmap_zhvi.update_layout(title_x=0.5)