In [1]:
#Import Python Libraries
import pandas as pd
import folium
import geopandas as gpd
from folium.features import GeoJsonPopup, GeoJsonTooltip
import streamlit as st
from streamlit_folium import folium_static

#@st.cache
def read_csv(path):
    return pd.read_csv(path, compression='gzip', sep='\t', quotechar='"')

housing_price_df = read_csv('../input/state_market_tracker.tsv000.gz')
housing_price_df = housing_price_df[['period_begin','period_end','period_duration','property_type','median_sale_price','median_sale_price_yoy','homes_sold','state_code']]
housing_price_df = housing_price_df[(housing_price_df['period_begin']>='2022-01-01') & (housing_price_df['period_begin']<='2023-09-01')].reset_index(drop=True)

In [2]:
hp_df = housing_price_df.copy()

In [3]:
hp_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4574 entries, 0 to 4573
Data columns (total 8 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   period_begin           4574 non-null   object 
 1   period_end             4574 non-null   object 
 2   period_duration        4574 non-null   int64  
 3   property_type          4574 non-null   object 
 4   median_sale_price      4574 non-null   int64  
 5   median_sale_price_yoy  4574 non-null   float64
 6   homes_sold             4574 non-null   int64  
 7   state_code             4574 non-null   object 
dtypes: float64(1), int64(3), object(4)
memory usage: 286.0+ KB


In [7]:
#@st.cache
def read_file(path):
    return gpd.read_file(path)

#Read the geojson file
gdf = read_file('../input/georef-united-states-of-america-state.geojson')

In [8]:
gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 56 entries, 0 to 55
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype   
---  ------           --------------  -----   
 0   geo_point_2d     56 non-null     object  
 1   year             56 non-null     object  
 2   ste_code         56 non-null     object  
 3   ste_name         56 non-null     object  
 4   ste_area_code    56 non-null     object  
 5   ste_type         56 non-null     object  
 6   ste_stusps_code  56 non-null     object  
 7   ste_fp_code      0 non-null      float64 
 8   ste_gnis_code    56 non-null     object  
 9   geometry         56 non-null     geometry
dtypes: float64(1), geometry(1), object(8)
memory usage: 4.5+ KB


In [9]:
#Merge the housing market data and geojson file into one dataframe
df_final = gdf.merge(hp_df, left_on="ste_stusps_code", right_on="state_code", how="outer").reset_index(drop=True)
df_final = df_final[['period_begin','period_end','period_duration','property_type','median_sale_price','median_sale_price_yoy','homes_sold',
                     'state_code', 'ste_code','ste_name', 'ste_area_code', 'ste_type', 'ste_stusps_code','geometry']]
df_final = df_final[~df_final['period_begin'].isna()].reset_index(drop=True)

In [10]:
df = df_final.copy()

In [None]:
#Add sidebar to the app
st.sidebar.markdown("### Redfin Housing Data")
st.sidebar.markdown("This app is built using Streamlit to help visualize activity in the U.S. real estate market. All data from: https://www.redfin.com/news/data-center/")
st.sidebar.markdown("Developed by Robert Schell: https://github.com/schellrw")
#Add title and subtitle to the main interface of the app
st.title("U.S. Real Estate Activity Heatmap")
st.markdown("Where are the hottest housing markets in the U.S.? Select the housing market metrics you are interested in and your insights are just a couple clicks away. Hover over the map to view more details.")

#Create three columns/filters
col1, col2, col3 = st.columns(3)

with col1:
     period_list = df_final["period_begin"].unique().tolist()
     period_list.sort(reverse=True)
     year_month = st.selectbox("Snapshot Month", period_list, index=0)

with col2:
     prop_type = st.selectbox(
                "View by Property Type", ['All Residential', 'Single Family Residential', 'Townhouse','Condo/Co-op','Single Units Only','Multi-Family (2-4 Unit)'] , index=0)

with col3:
     metrics = st.selectbox("Select Housing Metrics", ["median_sale_price","median_sale_price_yoy", "homes_sold"], index=0)

#Update the data frame accordingly based on user input
df = df[df["period_begin"]==year_month]
df = df[df["property_type"]==prop_type]
df = df[['period_begin','period_end','period_duration','property_type',metrics,'state_code','ste_code','ste_name','ste_area_code','ste_type','ste_stusps_code','geometry']]

#st.write(df)

#Initiate a folium map
m = folium.Map(location=[40, -100], zoom_start=4,tiles=None)
folium.TileLayer('CartoDB positron',name="Light Map",control=False).add_to(m)

#Plot Choropleth map using folium
choropleth1 = folium.Choropleth(
    geo_data='./us-state-boundaries.geojson',       # Geojson file for the United States
    name='Choropleth Map of U.S. Housing Prices',
    data=df,                                        # df from the data preparation and user selection
    columns=['state_code', metrics],                # 'state code' and 'metrics' are the two columns in the dataframe that we use to grab the median sales price for each state and plot it in the choropleth map
    key_on='feature.properties.ste_stusps_code',    # key in the geojson file that we use to grab the geometries for each state in order to add the geographical boundary layers to the map
    fill_color='YlGn',
    nan_fill_color="White",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Housing Market Metrics',
    highlight=True,
    line_color='black').geojson.add_to(m)

#Add tooltips to the map
geojson1 = folium.features.GeoJson(
               data=df,
               name='United States Housing Prices',
               smooth_factor=2,
               style_function=lambda x: {'color':'black','fillColor':'transparent','weight':0.5},
               tooltip=folium.features.GeoJsonTooltip(
                   fields=['period_begin',
                           'period_end',
                           'name',
                           metrics,],
                   aliases=["Period Begin:",
                            'Period End:',
                            'State:',
                            metrics+":"],
                   localize=True,
                   sticky=False,
                   labels=True,
                   style="""
                       background-color: #F0EFEF;
                       border: 2px solid black;
                       border-radius: 3px;
                       box-shadow: 3px;
                   """,
                   max_width=800,),
                    highlight_function=lambda x: {'weight':3,'fillColor':'grey'},
                   ).add_to(m)
#folium_static(m)

In [13]:
#Import Python Libraries
import pandas as pd
import folium
import geopandas as gpd
from folium.features import GeoJsonPopup, GeoJsonTooltip
import streamlit as st
from streamlit_folium import folium_static

@st.cache_data
def read_csv(path):
    return pd.read_csv(path, compression='gzip', sep='\t', quotechar='"')

housing_price_df = read_csv('../input/state_market_tracker.tsv000.gz')
housing_price_df = housing_price_df[['period_begin','period_end','period_duration','property_type','median_sale_price','median_sale_price_yoy','homes_sold','state_code']]
housing_price_df = housing_price_df[(housing_price_df['period_begin']>='2022-01-01') & (housing_price_df['period_begin']<='2023-07-01')]

@st.cache_data
def read_file(path):
    return gpd.read_file(path)

#Read the geojson file
# gdf = read_file('./input/georef-united-states-of-america-state.geojson')
gdf = read_file('../input/georef-united-states-of-america-state.geojson')

#Merge the housing market data and geojson file into one dataframe
df_final = gdf.merge(housing_price_df, left_on="ste_stusps_code", right_on="state_code", how="outer").reset_index(drop=True)
df_final = df_final[['period_begin','period_end','period_duration','property_type','median_sale_price','median_sale_price_yoy','homes_sold',
                     'state_code','ste_code','ste_name','ste_area_code','ste_type','ste_stusps_code','geometry']]
df_final = df_final[~df_final['period_begin'].isna()].reset_index(drop=True)

2023-09-03 14:20:31.062 No runtime found, using MemoryCacheStorageManager


2023-09-03 14:20:32.412 No runtime found, using MemoryCacheStorageManager


In [14]:
df_final.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 4574 entries, 0 to 4573
Data columns (total 14 columns):
 #   Column                 Non-Null Count  Dtype   
---  ------                 --------------  -----   
 0   period_begin           4574 non-null   object  
 1   period_end             4574 non-null   object  
 2   period_duration        4574 non-null   float64 
 3   property_type          4574 non-null   object  
 4   median_sale_price      4574 non-null   float64 
 5   median_sale_price_yoy  4574 non-null   float64 
 6   homes_sold             4574 non-null   float64 
 7   state_code             4574 non-null   object  
 8   ste_code               4574 non-null   object  
 9   ste_name               4574 non-null   object  
 10  ste_area_code          4574 non-null   object  
 11  ste_type               4574 non-null   object  
 12  ste_stusps_code        4574 non-null   object  
 13  geometry               4574 non-null   geometry
dtypes: float64(4), geometry(1), obje

In [15]:
df_final.head(2)

Unnamed: 0,period_begin,period_end,period_duration,property_type,median_sale_price,median_sale_price_yoy,homes_sold,state_code,ste_code,ste_name,ste_area_code,ste_type,ste_stusps_code,geometry
0,2023-04-01,2023-04-30,30.0,All Residential,597700.0,-0.080654,6814.0,WA,[53],[Washington],USA,state,WA,"POLYGON ((-117.03235 48.99920, -117.13490 48.9..."
1,2022-02-01,2022-02-28,30.0,All Residential,579800.0,0.154859,7148.0,WA,[53],[Washington],USA,state,WA,"POLYGON ((-117.03235 48.99920, -117.13490 48.9..."


In [16]:
df_slim = df_final.drop(['ste_code', 'ste_name', 'ste_area_code', 'ste_type', 'ste_stusps_code'], axis=1)
df_slim = df_slim.rename(columns={'property_type':"Type of Property",'median_sale_price':"Median Sale Price",'median_sale_price_yoy':"Median Sale Price YoY",
                            'homes_sold':"Homes Sold",'state_code':"State"})

In [17]:
df_slim.head(2)

Unnamed: 0,period_begin,period_end,period_duration,Type of Property,Median Sale Price,Median Sale Price YoY,Homes Sold,State,geometry
0,2023-04-01,2023-04-30,30.0,All Residential,597700.0,-0.080654,6814.0,WA,"POLYGON ((-117.03235 48.99920, -117.13490 48.9..."
1,2022-02-01,2022-02-28,30.0,All Residential,579800.0,0.154859,7148.0,WA,"POLYGON ((-117.03235 48.99920, -117.13490 48.9..."
