In [3]:
import pandas as pd
import numpy as np

import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff

from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot

init_notebook_mode(connected=True)

housing_data = pd.read_csv(r'C:\Users\markc\OneDrive\Documents\Python\100Hours\Regression\House Prices\train.csv')

num_features = ['LotArea', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'GarageArea', 'SalePrice']
cat_features = ['OverallQual', 'OverallCond', 'Neighborhood', 'BldgType', 'TotRmsAbvGrd', 'BedroomAbvGr', 'YearBuilt', 'MoSold', 'YrSold']

housing_df = housing_data.loc[:, num_features + cat_features]

housing_df.head()

Unnamed: 0,LotArea,TotalBsmtSF,1stFlrSF,2ndFlrSF,GrLivArea,GarageArea,SalePrice,OverallQual,OverallCond,Neighborhood,BldgType,TotRmsAbvGrd,BedroomAbvGr,YearBuilt,MoSold,YrSold
0,8450,856,856,854,1710,548,208500,7,5,CollgCr,1Fam,8,3,2003,2,2008
1,9600,1262,1262,0,1262,460,181500,6,8,Veenker,1Fam,6,3,1976,5,2007
2,11250,920,920,866,1786,608,223500,7,5,CollgCr,1Fam,6,3,2001,9,2008
3,9550,756,961,756,1717,642,140000,7,5,Crawfor,1Fam,7,3,1915,2,2006
4,14260,1145,1145,1053,2198,836,250000,8,5,NoRidge,1Fam,9,4,2000,12,2008


In [4]:
#What does the boston housing market look like
x_hist = housing_df['SalePrice'].values

fig = go.Figure()

fig.add_trace(
    go.Histogram(
        x=x_hist
    )
)

fig.update_layout(
    title=dict(
        text='Distribution of Sales Price',
        x=0.5,
        yanchor='top',
        xanchor='center'),
    xaxis_title='Sale Price'
)

iplot(fig)

In [5]:
#What do the houses look like?

#OverallQual
qual_df = housing_df[['OverallQual', 'SalePrice']]
qual_df.loc[qual_df['OverallQual'] < 5, 'OverallQual'] = '<5'

x_box = ['<5', 5, 6, 7, 8, 9, 10]
yqual_box = []

for qual_rating in x_box:
    y_values = qual_df.loc[qual_df['OverallQual'] == qual_rating, 'SalePrice'].values
    yqual_box.append(y_values)

fig = go.Figure()

for xd, yd in zip(x_box, yqual_box):
    fig.add_trace(
        go.Box(
            y=yd,
            name=xd
        )
    )

fig.update_layout(
    title=dict(
        text='Distribution of Sales Price by Quality',
        x=0.5,
        yanchor='top',
        xanchor='center'
        ),
    xaxis=dict(
        title='House Quality Rating', 
        type='category'
        ),
    yaxis=dict(
        tick0=0,
        dtick=100000
        )
)

iplot(fig)

In [6]:
#OverallCond
cond_df = housing_df[['OverallCond', 'SalePrice']]
cond_df.loc[cond_df['OverallCond'] < 5, 'OverallCond'] = '<5'

x_box = ['<5', 5, 6, 7, 8, 9, 10]
ycond_box = []

for qual_rating in x_box:
    y_values = cond_df.loc[cond_df['OverallCond'] == qual_rating, 'SalePrice'].values
    ycond_box.append(y_values)

fig = go.Figure()

for xd, yd in zip(x_box, ycond_box):
    fig.add_trace(
        go.Box(
            y=yd,
            name=xd
        )
    )

fig.update_layout(
    title=dict(
        text='Distribution of Sales Price by Condition',
        x=0.5,
        yanchor='top',
        xanchor='center'
        ),
    xaxis=dict(
        title='House Condition Rating', 
        type='category'
        ),
    yaxis=dict(
        tick0=0,
        dtick=100000
        )
)

iplot(fig)

In [7]:
#size of houses

house_size_df = housing_df[['TotRmsAbvGrd', 'BedroomAbvGr']]
house_size_df['Counter'] = 1

total_bedrooms = [x for x in range(0, 7)]
total_rooms = [x for x in range(1, 13)]

heatmap_df = pd.DataFrame(index=total_bedrooms, columns=total_rooms)

for i in range(len(total_bedrooms)):
    for j in range(len(total_rooms)):
        heatmap_df.loc[heatmap_df.index==i, j + 1] = len(house_size_df.loc[(house_size_df['BedroomAbvGr'] == i) & (house_size_df['TotRmsAbvGrd'] == j)])
heatmap_df = heatmap_df.fillna(0)

x_heatmap = list(heatmap_df.columns)
y_heatmap = list(heatmap_df.index)
z_heatmap = heatmap_df.values

fig = go.Figure()

fig.add_trace(
    go.Heatmap(
        z=z_heatmap, 
        x=x_heatmap, 
        y=y_heatmap, 
        colorscale='Burg', 
        showscale=True,
        xgap=1,
        ygap=1
    )
)

fig.update_xaxes(title='Total Rooms', type='category')
fig.update_yaxes(title='Total Bedrooms', type='category')
fig.update_layout(height=1000, width=1600)

iplot(fig)