In [None]:
import pandas as pd
import plotly.express as px

# import excel file
df = pd.read_excel('HouseData-All.xlsx', sheet_name='Combined')
df['Floor_Type'] = df['Floor_Type'].str.capitalize()
df.head()


In [None]:
# Fill in built year with the first value by each ZPID
df['Built Year'] = df.groupby('ZPID')['Built Year'].ffill()
df['Built Year'] = df['Built Year'].astype(int)

# '*' values in the 'Floor_Type' column are replaced with blank
df['Floor_Type'] = df['Floor_Type'].replace('*', None)

df['Count'] = 1
df

In [None]:
# drop rows with empty values in column 'Floor_Type'
df = df.dropna(subset=['Floor_Type'])
df

In [None]:
# change values that start with 'Bed' to 'bedroom' using regex
df['Room'] = df['Room'].replace(r'^Bed.*', 'Bedroom', regex=True)
df

In [None]:
# sunburst chart of floor types
# https://plotly.com/python/sunburst-charts/
fig_sunburst_ALL = px.sunburst(df, path=['Room','Floor_Type'], values='Count',width=750, height=750, title='Floor Types by Room (All Regions)')
fig_sunburst_ALL

In [None]:
fig_sunburst_Region = px.sunburst(df, path=['Region','Room','Floor_Type'], values='Count',width=750, height=750, title='Floor Types by Region')
fig_sunburst_Region

In [None]:
df_Northeast = df[df['Region'] == 'Northeast']
df_South = df[df['Region'] == 'South']
df_West = df[df['Region'] == 'West']
df_Midwest = df[df['Region'] == 'Midwest']


In [None]:
fig_map_All = px.choropleth(df, color="Region",locations="State", locationmode="USA-states", scope="usa",
                        color_discrete_map={'Northeast':'#EF553B','South':'#AB63FA','Midwest':'#636EFA','West':'#00CC96'})
# fig_map_All update_layout(color
fig_map_All.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig_map_All.show()

In [None]:
fig_sunburst_Northeast = px.sunburst(df_Northeast, path=['Region','Division','Room','Floor_Type'], values='Count', width=750, height=750, title='Northeast Region Floor Types')
fig_sunburst_Northeast

In [None]:
fig_map_NE = px.choropleth(df_Northeast, color="Division",locations="State", locationmode="USA-states", scope="usa")
                        # color_discrete_map={'Northeast':'#EF553B','South':'#AB63FA','Midwest':'#636EFA','West':'#00CC96'})
# fig_map_All update_layout(color
fig_map_NE.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig_map_NE.show()

In [None]:
fig_sunburst_South = px.sunburst(df_South, path=['Region','Division','Room','Floor_Type'], values='Count', width=750, height=750, title='South Region Floor Types')
fig_sunburst_South

In [None]:
fig_map_South = px.choropleth(df_South, color="Division",locations="State", locationmode="USA-states", scope="usa",
                        color_discrete_map={'West South Central':'#EF553B','South Atlantic':'#636EFA','East South Central':'#00CC96'})
# fig_map_All update_layout(color
fig_map_South.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig_map_South.show()

In [None]:
fig_sunburst_West = px.sunburst(df_West, path=['Region','Division','Room','Floor_Type'], values='Count', width=750, height=750, title='West Region Floor Types')
fig_sunburst_West

In [None]:
fig_map_West = px.choropleth(df_West, color="Division",locations="State", locationmode="USA-states", scope="usa")
                        # color_discrete_map={'West South Central':'#EF553B','South Atlantic':'#636EFA','East South Central':'#00CC96'})
# fig_map_All update_layout(color
fig_map_West.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig_map_West.show()

In [None]:
fig_sunburst_Midwest = px.sunburst(df_Midwest, path=['Region','Division','Room','Floor_Type'], values='Count', width=750, height=750, title='Midwest Region Floor Types')
fig_sunburst_Midwest

In [None]:
fig_map_Midwest = px.choropleth(df_Midwest, color="Division",locations="State", locationmode="USA-states", scope="usa")
                        # color_discrete_map={'West South Central':'#EF553B','South Atlantic':'#636EFA','East South Central':'#00CC96'})
# fig_map_All update_layout(color
fig_map_Midwest.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig_map_Midwest.show()

In [None]:
# add 0 in front of the zipcodes that are less than 5 digits
df['Zipcode'] = df['Zipcode'].astype(str)
df['Zipcode'] = df['Zipcode'].str.zfill(5)
df


In [None]:
df_bedroom_zip = df[df['Room'] == 'Bedroom'].groupby(['Zipcode','Floor_Type'])['Count'].sum().reset_index()
df_bedroom_zip



In [None]:
bed_total_count = df_bedroom_zip.groupby('Zipcode')['Count'].sum()
bed_carpet_count = df_bedroom_zip[df_bedroom_zip['Floor_Type'] == 'Carpet'].groupby('Zipcode')['Count'].sum()
bed_carpet_percentage = (bed_carpet_count / bed_total_count) * 100
bed_carpet_percentage = bed_carpet_percentage.fillna(0)
bed_carpet_percentage

In [None]:
fig=px.choropleth(locations=bed_carpet_percentage.index, locationmode="USA-states", color=bed_carpet_percentage.values, scope="usa", color_continuous_scale=px.colors.sequential.YlOrRd, labels={'color':'Percentage of Bedrooms with Carpet'})
# fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
