In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.colors as cl
import kaleido

# import excel file
df = pd.read_excel('HouseData-All.xlsx', sheet_name='Combined')
df['Floor_Type'] = df['Floor_Type'].str.capitalize()
df.head()



In [None]:
# Fill in built year with the first value by each ZPID
df['Built Year'] = df.groupby('ZPID')['Built Year'].ffill()
df['Built Year'] = df['Built Year'].astype(int)

# '*' values in the 'Floor_Type' column are replaced with blank
df['Floor_Type'] = df['Floor_Type'].replace('*', None)

df['Count'] = 1

# drop rows with empty values in column 'Floor_Type'
df = df.dropna(subset=['Floor_Type'])

# change values that start with 'Bed' to 'bedroom' using regex
df['Room'] = df['Room'].replace(r'^Bed.*', 'Bedroom', regex=True)


In [None]:
import numpy as np
# new column in df for built year >= 2020
df['New_House'] = np.where(df['Built Year'] >= 2020, 'New(>=2020)', 'Old(<2020)')
df.head()



In [None]:
df_comparison = df.groupby(['Room','Home_Type','Home_Price','New_House', 'Floor_Type'])['Count'].sum().reset_index()

# pivot the table to have the floor type as columns
df_comparison = df_comparison.pivot_table(index=['Room','Home_Type','Home_Price','New_House'], columns='Floor_Type', values='Count').reset_index()
df_comparison = df_comparison.fillna(0)
df_comparison

In [None]:
df_comparison_bedroom = df_comparison[df_comparison['Room'] == 'Bedroom']

# Group by New_House
df_comparison_bedroom_newhouse = df_comparison_bedroom.groupby('New_House')[['Bare','Carpet','Rug']].sum().reset_index()
df_comparison_bedroom_newhouse['Bare%'] = (df_comparison_bedroom_newhouse['Bare'] / (df_comparison_bedroom_newhouse['Bare'] + df_comparison_bedroom_newhouse['Carpet'] + df_comparison_bedroom_newhouse['Rug']) * 100).round(1)
df_comparison_bedroom_newhouse['Carpet%'] = (df_comparison_bedroom_newhouse['Carpet'] / (df_comparison_bedroom_newhouse['Bare'] + df_comparison_bedroom_newhouse['Carpet'] + df_comparison_bedroom_newhouse['Rug']) * 100).round(1)
df_comparison_bedroom_newhouse['Rug%'] = (df_comparison_bedroom_newhouse['Rug'] / (df_comparison_bedroom_newhouse['Bare'] + df_comparison_bedroom_newhouse['Carpet'] + df_comparison_bedroom_newhouse['Rug']) * 100).round(1)
df_comparison_bedroom_newhouse.head()
df_comparison_bedroom_newhouse


In [None]:
fig_new_vs_old= px.bar(df_comparison_bedroom_newhouse,
             x='New_House',
             y=['Bare%','Rug%','Carpet%'],
             barmode='stack',
             color_discrete_sequence=px.colors.qualitative.Pastel,
             title='Bedroom Floor Type Comparison between New and Old Houses',
             labels={'value':'Percentage of Floor Type', 'variable':'Floor Type', 'x':'Region'},
             text_auto='auto')

fig_new_vs_old.update_layout(legend_traceorder='reversed', title_x=0.5)
fig_new_vs_old.show()
fig_new_vs_old.write_image('New vs Old Houses.png')

In [None]:
# Group by Home_Type
# if Home_Type is 'Apartment', replace it with 'Condo'
df_comparison_bedroom['Home_Type'] = df_comparison_bedroom['Home_Type'].replace('APARTMENT', 'CONDO')

df_comparison_bedroom_hometype = df_comparison_bedroom.groupby('Home_Type')[['Bare','Carpet','Rug']].sum().reset_index()
df_comparison_bedroom_hometype['Bare%'] = (df_comparison_bedroom_hometype['Bare'] / (df_comparison_bedroom_hometype['Bare'] + df_comparison_bedroom_hometype['Carpet'] + df_comparison_bedroom_hometype['Rug']) * 100).round(1)
df_comparison_bedroom_hometype['Carpet%'] = (df_comparison_bedroom_hometype['Carpet'] / (df_comparison_bedroom_hometype['Bare'] + df_comparison_bedroom_hometype['Carpet'] + df_comparison_bedroom_hometype['Rug']) * 100).round(1)
df_comparison_bedroom_hometype['Rug%'] = (df_comparison_bedroom_hometype['Rug'] / (df_comparison_bedroom_hometype['Bare'] + df_comparison_bedroom_hometype['Carpet'] + df_comparison_bedroom_hometype['Rug']) * 100).round(1)
df_comparison_bedroom_hometype.head()

In [None]:
fig_hometype= px.bar(df_comparison_bedroom_hometype,
             x='Home_Type',
             y=['Bare%','Rug%','Carpet%'],
             barmode='stack',
             color_discrete_sequence=px.colors.qualitative.Pastel,
             title='Bedroom Floor Type Comparison between Home Types',
             labels={'value':'Percentage of Floor Type', 'variable':'Floor Type', 'x':'Region'},
             text_auto='auto')

fig_hometype.update_layout(legend_traceorder='reversed', title_x=0.5)
fig_hometype.show()
fig_hometype.write_image('hometypes comparison.png')