Trying to get an idea of listings and each area. Main categories are:
- state: invh doesn't have rentals in every state
- market_name: This seems to be a custom category they add
- city: invh only really targets certain cities
- zipcode: Grouping by zipcode is probably too granular
- formattedAddress: This will most likely go unused, should be unique for each listing

In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

import plotly.io as pio
pio.renderers.default='notebook'

In [2]:
raw_rental_df = pd.read_parquet('data/clean_housing_data.parquet')

In [3]:
cleaned_rental_df = raw_rental_df.loc[:, ['date', 'id', 'formattedAddress', 'market_name', 'city', 'state', 'zipcode', 'market_rent']].set_index(['date', 'id'])

In [4]:
cleaned_rental_df

Unnamed: 0_level_0,Unnamed: 1_level_0,formattedAddress,market_name,city,state,zipcode,market_rent
date,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-06-12 22:51:48.000000,41b87620-08b5-46e9-8f8c-9687d52ed977,"1238 W 55Th St\nLos Angeles, CA 90037",Southern California,Los Angeles,CA,90037.0,3299.0
2022-06-12 22:51:48.000000,98bd5caa-1717-4398-a66d-b1a5452cba1b,"8947 S Denker Ave\nLos Angeles, CA 90047",Southern California,Los Angeles,CA,90047.0,3575.0
2022-06-12 22:51:48.000000,ff6c3e1c-0667-4613-bb42-f77a499cf7c7,"8916 David Ave\nLos Angeles, CA 90034",Southern California,Los Angeles,CA,90034.0,3999.0
2022-06-12 22:51:48.000000,42dbbad0-884b-457d-bde4-6508f8d943a5,"636 N Lamer St\nBurbank, CA 91506",Southern California,Burbank,CA,91506.0,4399.0
2022-06-12 22:51:48.000000,494a5455-db37-4d66-a67f-e8c780c4095e,"11063 Kittridge St\nNorth Hollywood, CA 91606",Southern California,North Hollywood,CA,91606.0,3899.0
...,...,...,...,...,...,...,...
2023-03-27 07:07:32.904720,94b6abf3-a170-4b13-b06d-73204c55b3d7,"9310 SW 166th St\nPalmetto Bay, FL 33157",South Florida/Miami,Palmetto Bay,FL,33157.0,3275.0
2023-03-27 07:07:32.904720,720d8702-9a70-408d-8a3c-9bbbffe1e23c,"8220 SW 132nd St\nMiami, FL 33156",South Florida/Miami,Miami,FL,33156.0,4599.0
2023-03-27 07:07:32.904720,6a2fb105-cad9-4bfe-b3c7-93fddfe4461c,"9364 SW 171st Ter\nPalmetto Bay, FL 33157",South Florida/Miami,Palmetto Bay,FL,33157.0,3565.0
2023-03-27 07:07:32.904720,ea00017f-4488-4f95-ab82-a062b3715532,"20211 NE 15th Ave\nMiami, FL 33179",South Florida/Miami,Miami,FL,33179.0,3165.0


In [5]:
unique_rental_df = cleaned_rental_df[~cleaned_rental_df.index.get_level_values(1).duplicated(keep='last')]

In [6]:
unique_rental_df

Unnamed: 0_level_0,Unnamed: 1_level_0,formattedAddress,market_name,city,state,zipcode,market_rent
date,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-06-12 22:51:48.000000,43c4a8db-006f-4cc5-b6e8-a5cab730a2c2,"5201 Fireside Ranch Ave\nLas Vegas, NV 89131",Las Vegas,Las Vegas,NV,89131.0,2121.0
2022-06-12 22:51:48.000000,a0b586d6-4e1d-48c7-8ae0-64ac63328019,"6706 Flamewood Dr\nArlington, TX 76001",Dallas,Arlington,TX,76001.0,2127.0
2022-06-12 22:51:48.000000,f827584e-5a7a-4df1-a9ec-306d2a4b95db,"17532 SW 137th Ct\nMiami, FL 33177",South Florida/Miami,Miami,FL,33177.0,2899.0
2022-06-12 22:51:48.000000,cfc9498e-f79b-4ca4-8524-04796a5d01b9,"2340 SW 68th Ter\nMiramar, FL 33023",South Florida/Miami,Miramar,FL,33023.0,2330.0
2022-06-13 07:22:05.000000,7e5906ca-f127-443e-884d-a6f1ce8de15a,"4763 W Avenue L8\nLancaster, CA 93536",Southern California,Lancaster,CA,93536.0,2495.0
...,...,...,...,...,...,...,...
2023-03-27 07:07:32.904720,94b6abf3-a170-4b13-b06d-73204c55b3d7,"9310 SW 166th St\nPalmetto Bay, FL 33157",South Florida/Miami,Palmetto Bay,FL,33157.0,3275.0
2023-03-27 07:07:32.904720,720d8702-9a70-408d-8a3c-9bbbffe1e23c,"8220 SW 132nd St\nMiami, FL 33156",South Florida/Miami,Miami,FL,33156.0,4599.0
2023-03-27 07:07:32.904720,6a2fb105-cad9-4bfe-b3c7-93fddfe4461c,"9364 SW 171st Ter\nPalmetto Bay, FL 33157",South Florida/Miami,Palmetto Bay,FL,33157.0,3565.0
2023-03-27 07:07:32.904720,ea00017f-4488-4f95-ab82-a062b3715532,"20211 NE 15th Ave\nMiami, FL 33179",South Florida/Miami,Miami,FL,33179.0,3165.0


In [7]:
region_rental_count = unique_rental_df.groupby(['state', 'market_name', 'city', 'zipcode'])['market_rent'].count()
region_rental_mean = unique_rental_df.groupby(['state', 'market_name', 'city', 'zipcode'])['market_rent'].mean()

In [9]:
# Not including zip codes to avoid confusion and clutter...
# Using px because go requires a weird structure
fig = px.treemap(
    region_rental_mean, 
    path= [
        px.Constant("USA"), 
        region_rental_mean.index.get_level_values(0), 
        region_rental_mean.index.get_level_values(1), 
        region_rental_mean.index.get_level_values(2),
        # region_rental_mean.index.get_level_values(3),
    ], 
    values= region_rental_count.values, 
    color= region_rental_mean.values, maxdepth= 4)

fig.update_traces(
    hovertemplate= ("Location: %{id}<br>"
                    "Listings: %{value}<br>"
                    "Market Rent: %{color:.2f}<br>")
)

fig.update_coloraxes(
    colorbar_title = "Market Rent"
)

fig.update_layout(
    title= "<b>INVH Listings and Market Rent by Location</b>",
)

# fig.write_html("reports/listings_rent_by_location")
fig.show()