In [38]:
#Import libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go

#Read the cluster data
df_VA=pd.read_csv('df_VA2.csv',index_col=[0])
Fields=[
'region',
'period_begin',
'period_end',
'period_duration',
'parent_metro_region',
'property_type',
'median_sale_price',
'median_sale_price_yoy',
'homes_sold',
'new_listings_yoy',
'median_dom',
'median_dom_yoy',
'avg_sale_to_list',
'cluster_nbr',
'0',
'1',
'2'
]
#Only select fields that are needed in this visualization
df_VA=df_VA[Fields]

In [49]:
#Rename fields to make them easier to understand
df = df_VA.rename({'region': 'Neighborhood', '0': 'PC1','1':'PC2','2':'PC3'}, axis=1)
df['cluster_nbr'] = df['cluster_nbr'].apply(str) #Change data type for cluster_nbr

#Create summary statistics for clusters. Calucating medians for the following metrics at cluster level
df1=df.groupby(['cluster_nbr'])['cluster_nbr'].count()
df2=df.groupby(['cluster_nbr'])['homes_sold','median_sale_price','median_sale_price_yoy','new_listings_yoy','avg_sale_to_list','median_dom','median_dom_yoy'].median()
df_summary=pd.concat([df1, df2],axis=1)
df_summary = df_summary.rename({'cluster_nbr': '# of Neighborhoods','homes_sold':'Median Homes Sold','median_sale_price':'Median Sale Price','median_sale_price_yoy':'Median Sale Price YoY','new_listings_yoy':'Median New Listings YoY','avg_sale_to_list':'Median Sale-to-List Ratio','median_dom':'Median Days on Market','median_dom_yoy':'Median Days on Market YoY (days)'}, axis=1)
df_summary


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0_level_0,# of Neighborhoods,Median Homes Sold,Median Sale Price,Median Sale Price YoY,Median New Listings YoY,Median Sale-to-List Ratio,Median Days on Market,Median Days on Market YoY (days)
cluster_nbr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,39,56.0,305000.0,0.13813,0.121951,1.01812,17.0,-5.0
1,9,28.0,268000.0,0.208571,0.0,1.000201,26.5,-5.5
2,20,30.0,262450.0,0.152129,0.757353,1.014511,19.0,-8.0
3,23,40.0,392000.0,0.023023,-0.055556,1.006661,22.0,-8.0
4,25,30.0,410000.0,0.150442,0.055556,1.031888,8.0,-7.0


In [42]:
#Add cluster description to the dataframe
def f(row):
    if row['cluster_nbr'] == '0':
        val = 'Mid range, Steady appreciation, Inventory rising'
    elif row['cluster_nbr'] == '1':
        val = 'Rising prices, Affordable price, Longest Days on Market'
    elif row['cluster_nbr'] == '2':
        val = 'Increased inventory, Affordable but prices increasing'
    elif row['cluster_nbr'] == '3':
        val = 'Pricey, Value holding steady.'   
    elif row['cluster_nbr'] == '4':
        val = 'Pricey, Values Increasing, Very quick to sell'  
    else:
        val = 'NA'
    return val

df['cluster_desc']= df.apply(f, axis=1)

In [43]:
#Define a function to assign a unique color code to each cluster. 
#You can choose any color code you want by tweaking the val parameter
def color(row):
    if row['cluster_nbr'] == '0':
        val = '#0984BD'
    elif row['cluster_nbr'] == '1':
        val = '#E12906'
    elif row['cluster_nbr'] == '2':
        val = '#08E9E7'
    elif row['cluster_nbr'] == '3':
        val = '#E18A06'   
    elif row['cluster_nbr'] == '4':
        val = '#0C861A'  
    else:
        val = 'NA'
    return val
#Apply the function to each data point in the data frame
df['color']= df.apply(color, axis=1)

In [45]:
#Create the 'size' column for bubble size
from sklearn.preprocessing import MinMaxScaler
minmax_scaler=MinMaxScaler()
scaled_features=minmax_scaler.fit_transform(df[['median_sale_price_yoy']])
df['size']=pd.DataFrame(scaled_features)

#Styling changes to reduce decimal places to 2
df['avg_sale_to_list'] = pd.Series([round(val, 2) for val in df['avg_sale_to_list']], index = df.index)
df['homes_sold'] = pd.Series([round(val, 0) for val in df['homes_sold']], index = df.index)
df['median_dom'] = pd.Series([round(val, 1) for val in df['median_dom']], index = df.index)


#Styling changes to change the data format to percentages
df['median_sale_price_yoy'] = pd.Series(["{0:.1f}%".format(val * 100) for val in df['median_sale_price_yoy']], index = df.index)
df['new_listings_yoy'] = pd.Series(["{0:.1f}%".format(val * 100) for val in df['new_listings_yoy']], index = df.index) 

In [46]:
#Add 'text' column for hover-over tooltips
#You can customize what fields or information you want to show in tooltips in the code below
hover_text = []
for index, row in df.iterrows():
    hover_text.append(('Cluster Description:<br>{cluster_desc}<br><br>'+
                    'Neighborhood: {Neighborhood}<br>'+
                    'Metro: {parent_metro_region}<br>'+ 
                    'Homes Sold: {homes_sold}<br>'+
                    'Median Sales Price: ${median_sale_price}<br>'+
                    'Median Sales Price YoY: {median_sale_price_yoy}<br>'+
                    'New Listings YoY: {new_listings_yoy}<br>'+
                    'Median Days on Market: {median_dom}<br>'+
                    'Avg Sales-to-Listing Price: {avg_sale_to_list}'
                    ).format(
                    cluster_desc=row['cluster_desc'],
                    Neighborhood=row['Neighborhood'],
                    parent_metro_region=row['parent_metro_region'],
                    homes_sold=row['homes_sold'],
                    median_sale_price=row['median_sale_price'],
                    median_sale_price_yoy=row['median_sale_price_yoy'],
                    new_listings_yoy=row['new_listings_yoy'],
                    median_dom=row['median_dom'],
                    avg_sale_to_list=row['avg_sale_to_list']))
df['text'] = hover_text

In [48]:
# Dictionary with dataframes for each cluster
cluster_name=df["cluster_nbr"].unique()
cluster_data = {cluster: df.loc[df["cluster_nbr"] == cluster].copy()
                            for cluster in cluster_name}

layout = go.Layout(
    title_text='NC Neighborhoods Housing Market Segments', 
    title_x=0.5,
    xaxis = go.XAxis(
        title = 'Shorter Days on Market <-------------> Longer Days on Market',
        showticklabels=False),
    yaxis = go.YAxis(
        title = 'Lower Supply <------------> Higher Supply',
        showticklabels=False
    )
)

# Create figure
fig = go.Figure(layout=layout)
for cluster_name, cluster in cluster_data.items():
    fig.add_trace(go.Scatter(
        x=cluster['PC1'], y=cluster['PC2'],
        marker = dict(color=cluster['color']),
        name=cluster_name, text=cluster['text'],
        marker_size=cluster['size']
        ))

# Tune marker appearance and layout
sizeref = 2.*max(df['size'])/(18**2) 
fig.update_traces(mode='markers', marker=dict(sizemode='area',sizeref=sizeref, line_width=2))

fig.update_layout(showlegend=False)
fig.show()


plotly.graph_objs.XAxis is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.XAxis
  - plotly.graph_objs.layout.scene.XAxis



plotly.graph_objs.YAxis is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.layout.YAxis
  - plotly.graph_objs.layout.scene.YAxis


