In [1]:
import pandas as pd
import numpy as np

import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff

from plotly.subplots import make_subplots
from ipywidgets import widgets

In [2]:
housing_data = pd.read_csv(r'C:\Users\markc\OneDrive\Documents\Python\100Hours\Regression\House Prices\train.csv')

median_price = 250000 #median price of a home in the US
num_features = ['LotArea', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'GarageArea', 'SalePrice']
cat_features = ['OverallQual', 'OverallCond', 'Neighborhood', 'TotRmsAbvGrd', 'BedroomAbvGr']

housing_df = housing_data.loc[:, num_features + cat_features]

housing_df = housing_df.loc[~((housing_df['GrLivArea'] > 4000) & (housing_df['SalePrice'] < median_price))]
housing_df = housing_df.loc[~((housing_df['LotArea'] > 150000) & (housing_df['SalePrice'] < median_price))]

In [3]:
#What does the boston housing market look like

fig = go.Figure()

fig.add_trace(
    go.Histogram(
        x=housing_df['SalePrice'].values
    )
)

fig.update_layout(
    title=dict(
        text='Distribution of Sales Price',
        x=0.5,
        yanchor='top',
        xanchor='center'),
    xaxis_title='Sale Price'
)

In [4]:
#bar chart of saleprice bins

bins = [0, 100000, 200000, 300000, 400000, 500000, 600000, 700000, np.inf]
bin_labels = ['<100k', '100-200k', '200-300k', '300-400k', '400-500k', '500-600k', '600-700k', '>700k']

bins_u250 = [0, median_price, np.inf]
bin_labels_u250 = ['<250k', '>250k']

bin_df = pd.cut(housing_df['SalePrice'], bins, labels=bin_labels).value_counts().sort_index().reset_index()
bin_df.columns = ['SalePrice', 'Count']

bin_df_u250 = pd.cut(housing_df['SalePrice'], bins_u250, labels=bin_labels_u250).value_counts().sort_index().reset_index()
bin_df_u250.columns = ['SalePrice', 'Count']

fig = make_subplots(1, 2, subplot_titles=['Sale Price', '<250k or >250k'])

fig.add_trace(
    go.Bar(
        x=bin_df['Count'],
        y=bin_df['SalePrice'],
        orientation='h',
        marker=dict(
            color='rgba(100, 172, 232, 0.6)',
            line=dict(
                color='rgba(0, 76, 138, 1.0)',
                width=1
            )
        )
    ),
    col=1,
    row=1
)

fig.add_trace(
    go.Bar(
        x=bin_df_u250['Count'],
        y=bin_df_u250['SalePrice'],
        orientation='h',
        marker=dict(
            color=['rgba(165, 236, 113, 0.6)', 'rgba(243, 119, 114, 0.6)'],
            line=dict(
                color=['rgba(77, 181, 0, 1.0)', 'rgba(156, 7, 0, 1.0)'],
                width=1
            )
        )
    ),
    col=2,
    row=1
)

fig.update_layout(showlegend=False)

#of the 1460 properties 1243 are within our budget

In [5]:
#What do the houses look like?

#OverallQual
qual_df = housing_df[['OverallQual', 'SalePrice']]
qual_df.loc[qual_df['OverallQual'] < 5, 'OverallQual'] = '<5'

x_box = ['<5', 5, 6, 7, 8, 9, 10]
yqual_box = []

box_colors = [px.colors.diverging.RdYlBu[0], px.colors.diverging.RdYlBu[2], px.colors.diverging.RdYlBu[4], px.colors.diverging.RdYlGn[7], px.colors.diverging.curl[3], px.colors.diverging.RdYlBu[8], px.colors.diverging.RdYlBu[10]]

rating_color_mapping = dict(zip(x_box, box_colors))

for qual_rating in x_box:
    y_values = qual_df.loc[qual_df['OverallQual'] == qual_rating, 'SalePrice'].values
    yqual_box.append(y_values)

fig = go.Figure()

for xd, yd, col in zip(x_box, yqual_box, box_colors):
    fig.add_trace(
        go.Box(
            y=yd,
            name=xd,
            marker=dict(
                color=col
            )
        )
    )

arrow_annotation=dict(
        x=1,
        y=250000,
        xref="paper",
        yref="y",
        text="Median US House Price",
        showarrow=True,
        font=dict(
            size=10,
            color="black"
            ),
        align="center",
        arrowhead=2,
        arrowsize=1,
        arrowwidth=2,
        arrowcolor="#636363",
        ax=0,
        ay=35,
        bordercolor="black",
        borderwidth=2,
        borderpad=4,
        bgcolor="#ffffff",
        opacity=0.8
        )

fig.update_layout(
    title=dict(
        text='Distribution of Sales Price by Quality',
        x=0.5,
        yanchor='top',
        xanchor='center'
        ),
    xaxis=dict(
        title='House Quality Rating', 
        type='category'
        ),
    yaxis=dict(
        tick0=0,
        dtick=100000
        ),
        shapes=[dict(
            xref= 'paper', 
            x0= 0, 
            x1= 1, 
            yref= 'y', 
            y0= 250000, 
            y1= 250000, 
            line=dict(
                color="Red", 
                width=4, 
                dash="dot")
            )
        ],
    annotations=[arrow_annotation]
)

In [6]:
#OverallCond
cond_df = housing_df[['OverallCond', 'SalePrice']]
cond_df.loc[cond_df['OverallCond'] < 5, 'OverallCond'] = '<5'

x_box = ['<5', 5, 6, 7, 8, 9, 10]
ycond_box = []

for qual_rating in x_box:
    y_values = cond_df.loc[cond_df['OverallCond'] == qual_rating, 'SalePrice'].values
    ycond_box.append(y_values)

fig = go.Figure()

for xd, yd, col in zip(x_box, ycond_box, box_colors):
    fig.add_trace(
        go.Box(
            y=yd,
            name=xd,
            marker=dict(
                color=col
            )
        )
    )

arrow_annotation=dict(
        x=1,
        y=250000,
        xref="paper",
        yref="y",
        text="Median US House Price",
        showarrow=True,
        font=dict(
            size=10,
            color="black"
            ),
        align="center",
        arrowhead=2,
        arrowsize=1,
        arrowwidth=2,
        arrowcolor="#636363",
        ax=0,
        ay=-35,
        bordercolor="black",
        borderwidth=2,
        borderpad=4,
        bgcolor="#ffffff",
        opacity=0.8
        )

fig.update_layout(
    title=dict(
        text='Distribution of Sales Price by Condition',
        x=0.5,
        yanchor='top',
        xanchor='center'
        ),
    xaxis=dict(
        title='House Condition Rating', 
        type='category'
        ),
    yaxis=dict(
        tick0=0,
        dtick=100000
        ),
        shapes=[dict(
            xref= 'paper', 
            x0= 0, 
            x1= 1, 
            yref= 'y', 
            y0= 250000, 
            y1= 250000, 
            line=dict(
                color="Red", 
                width=4, 
                dash="dot")
            )
        ],
    annotations=[arrow_annotation]
)

In [106]:
 #grouped bar chart of houses <250k and >250k for each rating

qual_u250_group = qual_df.loc[qual_df['SalePrice'] <= median_price, 'OverallQual'].value_counts().reindex(x_box).reset_index().fillna(0)
qual_o250_group = qual_df.loc[qual_df['SalePrice'] > median_price, 'OverallQual'].value_counts().reindex(x_box).reset_index().fillna(0)

fig = go.Figure()

fig.add_trace(
    go.Bar(
        x=qual_u250_group['index'],
        y=qual_u250_group['OverallQual'],
        name='<250k',
        marker=dict(
            color='rgba(165, 236, 113, 0.6)',
            line=dict(
                color='rgba(77, 181, 0, 1.0)', 
                width=1
            )
        )
    )
)

fig.add_trace(
    go.Bar(
        x=qual_o250_group['index'],
        y=qual_o250_group['OverallQual'],
        name='>250k',
        marker=dict(
            color='rgba(243, 119, 114, 0.6)', 
            line=dict(
                color='rgba(156, 7, 0, 1.0)', 
                width=1
            )
        )
    )
)

fig.update_layout(title=dict(
    text='Overall Quality Above / Below Median US House Price',
    x=0.5),
    xaxis_title='Overall Quality',
    barmode='group')

fig.update_xaxes(type='category')


In [103]:
qual_u250_group

Unnamed: 0,index,OverallQual
0,<5,140.0
1,5,396.0
2,6,368.0
3,7,273.0
4,8,62.0
5,9,1.0
6,10,0.0


In [107]:
 #grouped bar chart of houses <250k and >250k for each rating

cond_u250_group = cond_df.loc[cond_df['SalePrice'] <= median_price, 'OverallCond'].value_counts().reindex(x_box).reset_index().fillna(0)
cond_o250_group = cond_df.loc[cond_df['SalePrice'] > median_price, 'OverallCond'].value_counts().reindex(x_box).reset_index().fillna(0)

fig = go.Figure()

fig.add_trace(
    go.Bar(
        x=cond_u250_group['index'],
        y=cond_u250_group['OverallCond'],
        name='<250k',
        marker=dict(
            color='rgba(165, 236, 113, 0.6)', 
            line=dict(
                color='rgba(77, 181, 0, 1.0)', 
                width=1
            )
        )
    )
)

fig.add_trace(
    go.Bar(
        x=cond_o250_group['index'],
        y=cond_o250_group['OverallCond'],
        name='>250k',
        marker=dict(
            color='rgba(243, 119, 114, 0.6)', 
            line=dict(
                color='rgba(156, 7, 0, 1.0)', 
                width=1
            )
        )
    )
)

fig.update_layout(title=dict(
    text='Overall Condition Above / Below Median US House Price',
    x=0.5),
    xaxis_title='Overall Condition',
    barmode='group')

fig.update_xaxes(type='category')

In [9]:
#size of houses under <250k

house_size_df = housing_df[['TotRmsAbvGrd', 'BedroomAbvGr', 'SalePrice']]
house_size_df = house_size_df.loc[house_size_df['SalePrice'] <= median_price]
house_size_df['Counter'] = 1

total_bedrooms = [x for x in range(0, 7)]
total_rooms = [x for x in range(1, 13)]

heatmap_df = pd.DataFrame(index=total_bedrooms, columns=total_rooms)

for i in range(len(total_bedrooms)):
    for j in range(len(total_rooms)):
        heatmap_df.loc[heatmap_df.index==i, j + 1] = len(house_size_df.loc[(house_size_df['BedroomAbvGr'] == i) & (house_size_df['TotRmsAbvGrd'] == j)])

heatmap_df = heatmap_df.fillna(0)

fig = go.Figure()

fig.add_trace(
    go.Heatmap(
        z=heatmap_df.values, 
        x=list(heatmap_df.columns), 
        y=list(heatmap_df.index), 
        colorscale='Burg', 
        showscale=True,
        xgap=1,
        ygap=1
    )
)

fig.update_xaxes(title='Total Rooms', type='category')
fig.update_yaxes(title='Total Bedrooms', type='category')
fig.update_layout(title=dict(
    text='Bedrooms to Rooms Ratio for Houses <250k',
    x=0.5)
    )


In [10]:
#Top Neighborhoods by Quality

neighborhood_df = housing_df[['Neighborhood', 'OverallQual', 'SalePrice']]
avg_quality = neighborhood_df.groupby(['Neighborhood'])['OverallQual'].mean().sort_values(ascending=False).reset_index()

fig = go.Figure()

fig.add_trace(
    go.Bar(
        x=avg_quality['Neighborhood'],
        y=avg_quality['OverallQual']
    )
)

fig.update_layout(
    title=dict(
        text='Average Quality by Neighborhood',
        x=0.5),
        xaxis_title='Neighborhood',
        yaxis_title='Average Quality'
)

In [11]:
#Average Quality for top 10 neighborhoods

top_10_neighborhoods = list(avg_quality['Neighborhood'][:10])
neighborhood_top10 = neighborhood_df.loc[neighborhood_df['Neighborhood'].isin(top_10_neighborhoods)]

bullet_df = neighborhood_top10.groupby(['Neighborhood'])['OverallQual'].mean().sort_values(ascending=False).reset_index()

bullet_y = bullet_df['Neighborhood'].values
bullet_x = bullet_df['OverallQual'].values

pos_y = [[0.84, 0.90], [0.75, 0.82], [0.66, 0.72], [0.57, 0.63], [0.48, 0.54], [0.39, 0.45], [0.30, 0.36], [0.21, 0.27], [0.12, 0.18], [0.03, 0.09]]

step_names = ['Poor', 'Average', 'Good', 'Excellent']

steps = [{'range': [0, 4], 'color': px.colors.sequential.Burg[0]},
        {'range': [4, 6], 'color': px.colors.sequential.Burg[1]},
        {'range': [6, 8], 'color': px.colors.sequential.Burg[2]},
        {'range': [8, 10], 'color': px.colors.sequential.Burg[3]}]

#remove ticklabels from the graphs
gauge = {'shape' : 'bullet',
        'axis' : {'range' : [None, 10], 'ticks' : "", 'showticklabels' : False},
        'steps' : steps,
        'bar' : {'color' : px.colors.sequential.Burg[5]}
        }

fig = go.Figure()

for i in range(len(top_10_neighborhoods)):
    fig.add_trace(
        go.Indicator(
            value = bullet_x[i],
            domain = {'x': [0.1, 1], 'y': pos_y[i]},
            title = {'text' : bullet_y[i], 'font' : {'size' : 10}},
            mode = 'gauge',
            gauge = gauge
            )
)

#update the tick labels for the last trace so it shows 1-10
fig.update_traces(gauge = {
            'shape' : 'bullet',
            'axis' : {'range' : [None, 10], 'dtick' : 1, 'showticklabels' : True},
            'steps' : steps,
            'bar' : {'color' : px.colors.sequential.Burg[5]}},
            selector=dict(value = bullet_x[-1])
)

fig.update_layout(title= {'text' : 'Quality Rating for Top 10 Neighborhoods',
                            'x' : 0.5,
                            'y' : 0.8}, 
                            xaxis_title='Quality Rating 1-10', 
                            yaxis_title='Neighborhood')

#custom legend
for i in range(len(steps)):
    fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines',
                            marker=dict(size=0, color=px.colors.sequential.Burg[i]), 
                            showlegend=True, name=step_names[i]
                        )
    )

fig.update_layout(plot_bgcolor='white', xaxis = {'showticklabels' : False}, yaxis = {'showticklabels' : False}, legend_title='<b>Quality Rating</b>')


In [12]:
#distribution of neighborhoods

neighborhoods_dist = top_10_neighborhoods[::-1]
distplot_df = neighborhood_df[['Neighborhood', 'SalePrice']]

colours = px.colors.qualitative.Pastel[:10]
neighborhood_colour_mapping = dict(zip(neighborhoods_dist, colours))

annotations = []  
annotation_pos = [0.97, 0.88, 0.78, 0.66, 0.56, 0.45, 0.32, 0.22, 0.12, 0.02]

fig = make_subplots(10, 1, shared_xaxes=True)

for i in range(len(neighborhoods_dist)):
    #plot distplot
    x = [distplot_df.loc[distplot_df['Neighborhood'].isin([neighborhoods_dist[i]]), 'SalePrice'].values]
    fig_ff = ff.create_distplot(x, [neighborhoods_dist[i]], show_hist=False, show_rug=False)
    
    fig.add_trace(
        go.Scatter(
            fig_ff['data'][0], 
            marker_color=neighborhood_colour_mapping[neighborhoods_dist[i]]
        ),
        col=1,
        row=i+1
    )

    #create yaxis labels as annotation
    rating = str(round(avg_quality.loc[avg_quality['Neighborhood'] == neighborhoods_dist[i], 'OverallQual'].item(), 2))
    text = neighborhoods_dist[i] + ' - <b>' + rating + '</b>'

    annotation=dict(
                x=0.1,
                y=annotation_pos[i],
                showarrow=False,
                text=text,
                xref="paper",
                yref="paper"
            )
    annotations.append(annotation)

#add arrow annotation
arrow_annotaion =dict(
        x=250000,
        y=1,
        xref="x",
        yref="paper",
        text="Median US House Price",
        showarrow=True,
        font=dict(
            size=10,
            color="black"
            ),
        align="center",
        arrowhead=2,
        arrowsize=1,
        arrowwidth=2,
        arrowcolor="#636363",
        ax=100,
        ay=5,
        bordercolor="black",
        borderwidth=2,
        borderpad=4,
        bgcolor="#ffffff",
        opacity=0.8
        )
annotations.append(arrow_annotaion)

fig.update_yaxes(showticklabels=False)
fig.update_xaxes(domain=[0.3, 1], tick0=0, dtick=100000)
fig.update_layout(title={'text' : 'Distribution of House Prices by Neighborhood ranked by OverallQual',
                        'x' : 0.5},
                  showlegend=False, plot_bgcolor='white',
                  annotations=annotations, 
                  #add red line at 250k
                  shapes=[dict(yref= 'paper', y0= 0, y1= 1, xref= 'x', x0= 250000, x1= 250000, line=dict(
                    color="Red",
                    width=4,
                    dash="dot"))])

fig.show()

In [13]:
#interactive graph for each of the top 10 neighborhoods

interactive_df = housing_df[['Neighborhood', 'GrLivArea', 'GarageArea', 'LotArea', 'TotalBsmtSF', 'TotRmsAbvGrd', 'BedroomAbvGr', 'OverallQual', 'SalePrice']]
interactive_df.loc[interactive_df['OverallQual'] < 5, 'OverallQual'] = '<5'
interactive_u250 = interactive_df.loc[interactive_df['SalePrice'] <= median_price]

plot_df = interactive_u250.loc[interactive_u250['Neighborhood'].isin([top_10_neighborhoods[0]])]
house_num_df = interactive_u250.groupby(['Neighborhood'])['SalePrice'].count().reset_index()

int_bins = [0, 50000, 100000, 150000, 200000, np.inf]
int_bin_labels = ['<50k', '50-100k', '100-150k', '150-200k', '200-250k']

subplot_titles = ['House Prices', 'GrLivArea', 'TotalBsmtSF', 'Bedrooms',  'GarageArea', 'LotArea']
area_plots = ['GrLivArea', 'TotalBsmtSF', 'GarageArea', 'LotArea']
area_plot_cols = [2, 3, 2, 3]
area_plot_rows = [1, 1, 2, 2]

#create base chart
int_fig = go.FigureWidget(
   make_subplots(2, 3, subplot_titles=subplot_titles)
)

#barchart - house prices
int_bar_price = pd.cut(plot_df['SalePrice'], bins=int_bins, labels=int_bin_labels).value_counts().sort_index().reset_index()
int_bar_price.columns = ['SalePrice', 'Count']

int_fig.add_trace(
    go.Bar(
        x=int_bar_price['Count'],
        y=int_bar_price['SalePrice'],
        orientation='h',
        marker=dict(
            color=neighborhood_colour_mapping[top_10_neighborhoods[0]],
        ),
        showlegend=False
    ),
    col=1,
    row=1
)

#barchart - bedrooms 
int_bar_bedrooms = plot_df['BedroomAbvGr'].value_counts().sort_index().reset_index()
int_bar_bedrooms.columns = ['Bedrooms', 'Count']

int_fig.add_trace(
    go.Bar(
        x=int_bar_bedrooms['Count'],
        y=int_bar_bedrooms['Bedrooms'],
        orientation='h',
        marker=dict(
            color=neighborhood_colour_mapping[top_10_neighborhoods[0]]
        ),
        showlegend=False
    ),
    col=1,
    row=2
)

#scatter plots
for plot, row, col in zip(area_plots, area_plot_rows, area_plot_cols):

    int_fig.add_trace(
        go.Scatter(
            x=plot_df[plot].values,
            y=plot_df['SalePrice'].values,
            marker=dict(
                color=plot_df['OverallQual'].map(rating_color_mapping).values,
                ),
            mode='markers',
            showlegend=False
        ),
        col=col,
        row=row
    )

#custom legend
for i in range(len(box_colors)):
    int_fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers',
                            marker=dict(size=8, color=box_colors[i]), 
                            showlegend=True, name=x_box[i]
                        )
    )

int_rating = str(round(avg_quality.loc[avg_quality['Neighborhood'] == top_10_neighborhoods[0], 'OverallQual'].item(), 2))
num_houses = str(house_num_df.loc[house_num_df['Neighborhood'] == top_10_neighborhoods[0], 'SalePrice'].item())

int_fig.update_layout(
    title=dict(
        text='<b>Review of ' + top_10_neighborhoods[0] + ' (' + str(int_rating) + ')<br>Number of Houses: ' + str(num_houses),
        x=0.5),
    showlegend=True,
    legend_title_text='<b>Quality Rating:</b>'
)


#create interactive element

neighborhood_widget = widgets.Dropdown(
    options=top_10_neighborhoods,
    value=top_10_neighborhoods[0],
    description='Neighborhood:'
)

def validate():
    if neighborhood_widget.value in top_10_neighborhoods:
        return True
    else: 
        return False

def response(change):
    if validate():
         
        df = interactive_u250.loc[interactive_u250['Neighborhood'].isin([neighborhood_widget.value])]
         
        int_bar_price = pd.cut(df['SalePrice'], bins=int_bins, labels=int_bin_labels).value_counts().sort_index().reset_index()
        int_bar_price.columns = ['SalePrice', 'Count']

        int_bar_bedrooms = df['BedroomAbvGr'].value_counts().sort_index().reset_index()
        int_bar_bedrooms.columns = ['Bedrooms', 'Count']

            
        with int_fig.batch_update():
            
            #house prices
            int_fig.data[0]['y'] = int_bar_price['SalePrice']
            int_fig.data[0]['x'] = int_bar_price['Count']
            int_fig.data[0]['marker']['color'] = neighborhood_colour_mapping[neighborhood_widget.value]

            #bedrooms prices
            int_fig.data[1]['y'] = int_bar_bedrooms['Bedrooms']
            int_fig.data[1]['x'] = int_bar_bedrooms['Count']
            int_fig.data[1]['marker']['color'] = neighborhood_colour_mapping[neighborhood_widget.value]           

            #scatter plots
            scatterplots_ix = [2, 3, 4, 5]
            for plot, row, col, ix in zip(area_plots, area_plot_rows, area_plot_cols, scatterplots_ix):
                int_fig.data[ix]['x'] = df[plot].values
                int_fig.data[ix]['y'] = df['SalePrice'].values
                int_fig.data[ix]['marker']['color'] = df['OverallQual'].map(rating_color_mapping).values


            int_rating = str(round(avg_quality.loc[avg_quality['Neighborhood'] == neighborhood_widget.value, 'OverallQual'].item(), 2))
            num_houses = str(house_num_df.loc[house_num_df['Neighborhood'] == neighborhood_widget.value, 'SalePrice'].item())

            int_fig.update_layout(
                title=dict(
                    text='<b>Review of ' + neighborhood_widget.value + ' (' + str(int_rating) + ')<br>Number of Houses: ' + str(num_houses),
                    x=0.5,
                    y=0.9
                ),
            )

neighborhood_widget.observe(response, names='value')

container = widgets.HBox([neighborhood_widget])
widgets.VBox([container, int_fig])


VBox(children=(HBox(children=(Dropdown(description='Neighborhood:', options=('NridgHt', 'StoneBr', 'NoRidge', …

In [14]:
#Find your ideal home

bedrooms = [1, 2, 3, 4, 5]
subplot_titles = ['GrLivArea', 'TotalBsmtSF', 'GarageArea', 'LotArea']
area_plots = ['GrLivArea', 'TotalBsmtSF', 'GarageArea', 'LotArea']
area_plot_cols = [1, 2, 1, 2]
area_plot_rows = [1, 1, 2, 2]

symbols = ['circle', 'square', 'diamond', 'cross', 'triangle-up', 'pentagon', 'square', 'diamond-wide', 'hourglass', 'bowtie']

symbol_mapping = dict(zip(top_10_neighborhoods, symbols))

house_widget = widgets.SelectMultiple(
    options=top_10_neighborhoods,
    value=[top_10_neighborhoods[0]],
    description='Neighborhood:'
)

rating_widget = widgets.SelectMultiple(
    options=x_box,
    value=[x_box[3]],
    description='Rating:'
)

bedroom_widget = widgets.SelectMultiple(
    options=bedrooms,
    value=[bedrooms[2]],
    description='Bedrooms:'
)

#create base chart
int_fig2 = go.FigureWidget(
   make_subplots(2, 2, subplot_titles=subplot_titles)
)

house_df = interactive_u250.loc[(interactive_u250['Neighborhood'].isin(list(house_widget.value))) & \
                                 (interactive_u250['OverallQual'].isin(list(rating_widget.value))) & \
                                 (interactive_u250['BedroomAbvGr'].isin(list(bedroom_widget.value)))]

#scatter plots
for plot, row, col in zip(area_plots, area_plot_rows, area_plot_cols):

    int_fig2.add_trace(
        go.Scatter(
            x=house_df[plot].values,
            y=house_df['SalePrice'].values,
            marker=dict(
                color=house_df['OverallQual'].map(rating_color_mapping).values,
                size=12
                ),
            marker_symbol=house_df['Neighborhood'].map(symbol_mapping).values,
            mode='markers',
            showlegend=False,
            customdata = np.stack([house_df['Neighborhood'].values, house_df['OverallQual'].values, house_df['BedroomAbvGr'].values, house_df.index.values]).T,
            hovertemplate=
            "<b>%{customdata[0]}</b><br>" +
            "Price: %{y:$,.0f}<br>" +
            plot + ": %{x:.0f}<br>" +
            "OverallQuality: %{customdata[1]}<br>" +
            "Bedrooms: %{customdata[2]}<br>" +
            "House Index: %{customdata[3]}<extra></extra>"
        ),
        col=col,
        row=row
    )

int_fig2.update_layout(title=dict(text="House Features by Neighborhood / Rating / Bedrooms", x=0.5))
int_fig2.add_annotation(text="<b>Chart Interpretation: </b>Select options from dropdown. Hover on data point to view more more information",
                  xref="paper", yref="paper",
                  x=0, y=-0.2, showarrow=False)

def response(change):
        
    df = interactive_u250.loc[(interactive_u250['Neighborhood'].isin(list(house_widget.value))) & \
                                (interactive_u250['OverallQual'].isin(list(rating_widget.value))) & \
                                (interactive_u250['BedroomAbvGr'].isin(list(bedroom_widget.value)))]
        
    with int_fig2.batch_update():          

        #scatter plots
        scatterplots_ix = [0, 1, 2, 3]
        for plot, row, col, ix in zip(area_plots, area_plot_rows, area_plot_cols, scatterplots_ix):
            int_fig2.data[ix]['x'] = df[plot].values
            int_fig2.data[ix]['y'] = df['SalePrice'].values
            int_fig2.data[ix]['marker']['color'] = df['OverallQual'].map(rating_color_mapping).values
            int_fig2.data[ix]['marker']['symbol'] = df['Neighborhood'].map(symbol_mapping).values
            int_fig2.data[ix]['customdata'] = np.stack([df['Neighborhood'].values, df['OverallQual'].values, df['BedroomAbvGr'].values, df.index.values]).T



        int_rating = str(round(avg_quality.loc[avg_quality['Neighborhood'] == neighborhood_widget.value, 'OverallQual'].item(), 2))
        num_houses = str(house_num_df.loc[house_num_df['Neighborhood'] == neighborhood_widget.value, 'SalePrice'].item())

house_widget.observe(response, names='value')
rating_widget.observe(response, names='value')
bedroom_widget.observe(response, names='value')

container = widgets.HBox([house_widget, rating_widget, bedroom_widget])
widgets.VBox([container, int_fig2])

VBox(children=(HBox(children=(SelectMultiple(description='Neighborhood:', index=(0,), options=('NridgHt', 'Sto…

In [92]:
comparison_cols = ['1stFlrSF', '2ndFlrSF', 'GrLivArea', 'TotalBsmtSF', 'GarageArea', 'LotArea', 'BedroomAbvGr', 'TotRmsAbvGrd', 'OverallQual', 'SalePrice']

#tart index
index1 = 1226
index2 = 340

comparison_df_all = housing_df[comparison_cols]
comparison_df_u250 = comparison_df_all.loc[comparison_df_all['SalePrice'] <= median_price]

y_pos = [1.0, 0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1, 0]
y_annotation_pos = [0.84, 0.56, 0.26, 0]


index_widget1 = widgets.Text(
    description='House 1: ',
    value=str(index1),
    options=list(comparison_df_all.index)
)

index_widget2 = widgets.Text(
    description='House 2: ',
    value=str(index2),
    options=list(comparison_df_all.index)
)

data_widget = widgets.Dropdown(
    description='Data',
    value='Houses <250k',
    options=['Houses <250k', 'All Houses']
)

house_ix1 = comparison_df_u250.loc[comparison_df_u250.index == int(index_widget1.value)]
house_ix2 = comparison_df_u250.loc[comparison_df_u250.index == int(index_widget2.value)]
neighborhood_ix1 = housing_df.loc[housing_df.index == int(index_widget1.value), 'Neighborhood'].item()
neighborhood_ix2 = housing_df.loc[housing_df.index == int(index_widget2.value), 'Neighborhood'].item()
rating_ix1 = str(round(avg_quality.loc[avg_quality['Neighborhood'] == neighborhood_ix1, 'OverallQual'].item(), 2))
rating_ix2 = str(round(avg_quality.loc[avg_quality['Neighborhood'] == neighborhood_ix2, 'OverallQual'].item(), 2))

int_fig3 = go.FigureWidget(make_subplots(4, 2), layout=dict(width=500))

for i in range(len(comparison_cols)):
    int_fig3.add_trace(go.Indicator(
        mode="number",
        value=house_ix1.loc[:, comparison_cols[i]].item(),
        domain={'x': [0, 0.2], 'y': [y_pos[i+1], y_pos[i]]},
        number={'font' : {'size' : 16}}))

    int_fig3.add_trace(go.Indicator(
        mode="number",
        value=0,
        title=dict(text=' - ' + comparison_cols[i] + ' - ', font=dict(size=16)),
        domain={'x': [0, 0.35], 'y': [y_pos[i+1], y_pos[i] - 0.075]},
        number={'font' : {'size' : 1, 'color' : 'white'}}))

    int_fig3.add_trace(go.Indicator(
        mode="number+delta",
        value=house_ix2.loc[:, comparison_cols[i]].item(),
        domain={'x': [0, 0.55], 'y': [y_pos[i+1], y_pos[i]]},
        delta={'reference': house_ix1.loc[:, comparison_cols[i]].item(), 'relative': True, 'position' : "right", 'font' : {'size' : 14}},
        number={'font' : {'size' : 16}}))

for i in range(len(area_plots)):
    int_fig3.add_trace(
        go.Scatter(
            x=comparison_df_u250[area_plots[i]],
            y=comparison_df_u250['SalePrice'],
            marker=dict(
               color='rgba(100, 172, 232, 0.6)'),
            mode='markers',
            showlegend=False
        ),
    row=i+1,
    col=2
    )

    int_fig3.add_trace(
        go.Scatter(
            x=[house_ix1.loc[:, area_plots[i]].item()],
            y=[house_ix1.loc[:, 'SalePrice'].item()],
            marker=dict(
                color=neighborhood_colour_mapping[neighborhood_ix1], 
                line=dict(
                    color='black', 
                    width=1)),
            mode='markers',
            showlegend=False
        ),
    row=i+1,
    col=2
    )

    int_fig3.add_trace(
        go.Scatter(
            x=[house_ix2.loc[:, area_plots[i]].item()],
            y=[house_ix2.loc[:, 'SalePrice'].item()],
            marker=dict(
                color=neighborhood_colour_mapping[neighborhood_ix2],  
                line=dict(
                    color='black', 
                    width=1)),
            mode='markers',
            showlegend=False
        ),
    row=i+1,
    col=2
    )

    int_fig3.add_annotation(
        x=1.0,
        y=y_annotation_pos[i],
        xref='paper',
        yref='paper',
        text='<b>' + area_plots[i] + ' vs SalePrice </b>',
        showarrow=False
    )

int_fig3.add_annotation(
    x=0.08,
    y=1.04,
    xref='paper',
    yref='paper',
    text='<b>' + index_widget1.value + ' in ' + neighborhood_ix1 + ' (' + rating_ix1 + ')',
    font=dict(
        size=16,
        color=neighborhood_colour_mapping[neighborhood_ix1]),
    showarrow=False
)

int_fig3.add_annotation(
    x=0.24,
    y=1.04,
    xref='paper',
    yref='paper',
    text='<b>' + index_widget2.value + ' in ' + neighborhood_ix2 + ' (' + rating_ix2 + ')',
    font=dict(
        size=16,
        color=neighborhood_colour_mapping[neighborhood_ix2]),
    showarrow=False
)

int_fig3.add_annotation(text="<b>Note: </b>If you enter in an index value for a property not in the top 10 neighborhoods the graph will not update",
                  xref="paper", yref="paper",
                  x=0, y=-0.05, showarrow=False)

#Add interactive element

def validate():
    
    if index_widget1.value == '' or index_widget2.value == '':
        return False
    
    if int(index_widget1.value) in comparison_df_u250.index and int(index_widget2.value) in comparison_df_u250.index:
        
        neighborhood_val1 = housing_df.loc[housing_df.index == int(index_widget1.value), 'Neighborhood'].item()
        neighborhood_val2 = housing_df.loc[housing_df.index == int(index_widget2.value), 'Neighborhood'].item()

        if neighborhood_val1 in top_10_neighborhoods and neighborhood_val2 in top_10_neighborhoods:
            return True
        
        else: return False
    else: 
        return False

def response(change):
    if validate():
        
        if data_widget.value == 'Houses <250k':
            house_ix1_int = comparison_df_u250.loc[comparison_df_u250.index == int(index_widget1.value)]
            house_ix2_int = comparison_df_u250.loc[comparison_df_u250.index == int(index_widget2.value)]
            data_df = comparison_df_u250.copy()
        else: 
            house_ix1_int = comparison_df_all.loc[comparison_df_all.index == int(index_widget1.value)]
            house_ix2_int = comparison_df_all.loc[comparison_df_all.index == int(index_widget2.value)]
            data_df = comparison_df_all.copy()
          
        with int_fig3.batch_update():
            neighborhood_ix1_int = housing_df.loc[housing_df.index == int(index_widget1.value), 'Neighborhood'].item()
            neighborhood_ix2_int = housing_df.loc[housing_df.index == int(index_widget2.value), 'Neighborhood'].item()
            rating_ix1_int = str(round(avg_quality.loc[avg_quality['Neighborhood'] == neighborhood_ix1_int, 'OverallQual'].item(), 2))
            rating_ix2_int = str(round(avg_quality.loc[avg_quality['Neighborhood'] == neighborhood_ix2_int, 'OverallQual'].item(), 2))
            
            
            for i in range(len(comparison_cols)):
                #update left hand indicator
                int_fig3['data'][3*i]['value'] = house_ix1_int.loc[:, comparison_cols[i]].item()
                #update right hand indicator
                int_fig3['data'][2 + 3*i]['value'] = house_ix2_int.loc[:, comparison_cols[i]].item()
                int_fig3['data'][2 + 3*i]['delta']['reference'] = house_ix1_int.loc[:, comparison_cols[i]].item()
            
            for i in range(len(area_plots)):
                #update scatter plots
                int_fig3['data'][30 + 3*i]['x'] = data_df[area_plots[i]]
                int_fig3['data'][30 + 3*i]['y'] = data_df['SalePrice']
               
                #update markers for left hand indicator
                int_fig3['data'][31 + 3*i]['x'] = [house_ix1_int.loc[:, area_plots[i]].item()]
                int_fig3['data'][31 + 3*i]['y'] = [house_ix1_int.loc[:, 'SalePrice'].item()]
                int_fig3['data'][31 + 3*i]['marker']['color'] = neighborhood_colour_mapping[neighborhood_ix1_int]

                #update markers for right hand indicator
                int_fig3['data'][32 + 3*i]['x'] = [house_ix2_int.loc[:, area_plots[i]].item()]
                int_fig3['data'][32 + 3*i]['y'] = [house_ix2_int.loc[:, 'SalePrice'].item()]
                int_fig3['data'][32 + 3*i]['marker']['color'] = neighborhood_colour_mapping[neighborhood_ix2_int]

            #update title of left hand indicator
            int_fig3['layout']['annotations'][4]['text'] = '<b>' + index_widget1.value + ' in ' + neighborhood_ix1_int + ' (' + rating_ix1_int + ')'
            int_fig3['layout']['annotations'][4]['font']['color'] = neighborhood_colour_mapping[neighborhood_ix1_int]
            
            #update title of right hand indicator
            int_fig3['layout']['annotations'][5]['text'] = '<b>' + index_widget2.value + ' in ' + neighborhood_ix2_int + ' (' + rating_ix2_int + ')'
            int_fig3['layout']['annotations'][5]['font']['color'] = neighborhood_colour_mapping[neighborhood_ix2_int]

int_fig3.update_layout(margin=dict(t=20, b=20, r=200, l=20))
index_widget1.observe(response, names='value')
index_widget2.observe(response, names='value')
data_widget.observe(response, names='value')

container1 = widgets.HBox([index_widget1, index_widget2, data_widget])
widgets.VBox([container1, int_fig3])


VBox(children=(HBox(children=(Text(value='1226', description='House 1: '), Text(value='340', description='Hous…

In [88]:
def Validate():

    if neighborhood_ix2  and neighborhood_ix1 in top_10_neighborhoods:
        return True
    else:
        return False

In [100]:
neighborhood_ix1 and neighborhood_ix2 in top_10_neighborhoods

False

In [91]:
neighborhood_Ix2

'Timber'