## Imports

In [1]:
# Imports

import pandas as pd
import numpy as np
from uszipcode import SearchEngine
from sklearn import preprocessing
import folium
from folium import plugins

from state_heatmapwithtime import *

In [2]:
# Reading in the provided CSV file

df = pd.read_csv("zillow_data.csv")
df.head()

Unnamed: 0,RegionID,RegionName,City,State,Metro,CountyName,SizeRank,1996-04,1996-05,1996-06,...,2017-07,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04
0,84654,60657,Chicago,IL,Chicago,Cook,1,334200.0,335400.0,336500.0,...,1005500,1007500,1007800,1009600,1013300,1018700,1024400,1030700,1033800,1030600
1,90668,75070,McKinney,TX,Dallas-Fort Worth,Collin,2,235700.0,236900.0,236700.0,...,308000,310000,312500,314100,315000,316600,318100,319600,321100,321800
2,91982,77494,Katy,TX,Houston,Harris,3,210400.0,212200.0,212200.0,...,321000,320600,320200,320400,320800,321200,321200,323000,326900,329900
3,84616,60614,Chicago,IL,Chicago,Cook,4,498100.0,500900.0,503100.0,...,1289800,1287700,1287400,1291500,1296600,1299000,1302700,1306400,1308500,1307000
4,93144,79936,El Paso,TX,El Paso,El Paso,5,77300.0,77300.0,77300.0,...,119100,119400,120000,120300,120300,120300,120300,120500,121000,121500


## Pre-Processing

In [3]:
# Setting index to zipcode
df = df.set_index(df["RegionName"])

# Reserving the state information, for later mapping
state_list = df["State"]

In [4]:
# Dropping metadata columns
df = df.drop(columns=["RegionID", "RegionName", "City", "State",
                      "Metro", "CountyName", "SizeRank"])

In [5]:
# Grabbing only data from 2009 onwards
df = df.loc[:,"2009-01":]

In [6]:
# Dropping any rows with null values
df.dropna(axis=0, inplace=True)

# Sanity Check
df.isnull().sum().sum()

0

In [7]:
# Creating an array of the normalized data
df_norm = preprocessing.normalize(df)

# Setting that array as a dataframe, with the same row/column labels as before
df_scaled = pd.DataFrame(df_norm, index=df.index, columns=df.columns)

In [8]:
# Exploring the data, post-processing
print(df_scaled.shape)
df_scaled.head()

(14136, 112)


Unnamed: 0_level_0,2009-01,2009-02,2009-03,2009-04,2009-05,2009-06,2009-07,2009-08,2009-09,2009-10,...,2017-07,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04
RegionName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
60657,0.089611,0.089206,0.08868,0.088001,0.087114,0.086172,0.085493,0.085077,0.084836,0.084782,...,0.110111,0.11033,0.110363,0.11056,0.110965,0.111557,0.112181,0.112871,0.11321,0.11286
75070,0.078717,0.078445,0.07825,0.078056,0.077783,0.077667,0.077667,0.077745,0.077822,0.077861,...,0.119787,0.120564,0.121537,0.122159,0.122509,0.123131,0.123715,0.124298,0.124881,0.125154
77494,0.082427,0.082227,0.082127,0.08216,0.08226,0.082461,0.082594,0.082728,0.082828,0.082862,...,0.107252,0.107119,0.106985,0.107052,0.107185,0.107319,0.107319,0.107921,0.109224,0.110226
60614,0.090794,0.090146,0.089388,0.08845,0.087291,0.08609,0.085135,0.084445,0.083976,0.083789,...,0.109918,0.109739,0.109713,0.110062,0.110497,0.110702,0.111017,0.111332,0.111511,0.111383
79936,0.099165,0.098839,0.098431,0.098105,0.097615,0.097126,0.0968,0.096555,0.096392,0.096147,...,0.097126,0.097371,0.09786,0.098105,0.098105,0.098105,0.098105,0.098268,0.098675,0.099083


In [9]:
# Adding the state information back 
df_scaled["State"] = state_list
df_scaled.head()

Unnamed: 0_level_0,2009-01,2009-02,2009-03,2009-04,2009-05,2009-06,2009-07,2009-08,2009-09,2009-10,...,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04,State
RegionName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
60657,0.089611,0.089206,0.08868,0.088001,0.087114,0.086172,0.085493,0.085077,0.084836,0.084782,...,0.11033,0.110363,0.11056,0.110965,0.111557,0.112181,0.112871,0.11321,0.11286,IL
75070,0.078717,0.078445,0.07825,0.078056,0.077783,0.077667,0.077667,0.077745,0.077822,0.077861,...,0.120564,0.121537,0.122159,0.122509,0.123131,0.123715,0.124298,0.124881,0.125154,TX
77494,0.082427,0.082227,0.082127,0.08216,0.08226,0.082461,0.082594,0.082728,0.082828,0.082862,...,0.107119,0.106985,0.107052,0.107185,0.107319,0.107319,0.107921,0.109224,0.110226,TX
60614,0.090794,0.090146,0.089388,0.08845,0.087291,0.08609,0.085135,0.084445,0.083976,0.083789,...,0.109739,0.109713,0.110062,0.110497,0.110702,0.111017,0.111332,0.111511,0.111383,IL
79936,0.099165,0.098839,0.098431,0.098105,0.097615,0.097126,0.0968,0.096555,0.096392,0.096147,...,0.097371,0.09786,0.098105,0.098105,0.098105,0.098105,0.098268,0.098675,0.099083,TX


## Visualization Time!

In [16]:
# Using my defined function, found in state_heatmapwithtime.py, to get state-
# level data in the correct format to create a Folium HeatMapWithTime
fl_data = getdata_stateheatmapwithtime(df_scaled, "FL")

In [17]:
# Creating the folium map, centered at FL
fl_map = folium.Map(location=[27.6648, -81.5158],
                   zoom_start=6, prefer_canvas=True)

plugins.HeatMapWithTime(fl_data).add_to(fl_map)

fl_map

In [12]:
# Trying again with a different state
ga_data = getdata_stateheatmapwithtime(df_scaled, "GA")

In [15]:
# Creating a new folium map, this time for Georgia
ga_map = folium.Map(location=[32.3656, -82.9001],
                       zoom_start=7, prefer_canvas=True)

plugins.HeatMapWithTime(ga_data).add_to(ga_map)

ga_map