In [None]:
import pandas as pd
import plotly.express as px

# import excel file
df = pd.read_excel('HouseData-All.xlsx', sheet_name='Combined')
df['Floor_Type'] = df['Floor_Type'].str.capitalize()
df.head()


In [None]:
# Fill in built year with the first value by each ZPID
df['Built Year'] = df.groupby('ZPID')['Built Year'].ffill()
df['Built Year'] = df['Built Year'].astype(int)

# '*' values in the 'Floor_Type' column are replaced with blank
df['Floor_Type'] = df['Floor_Type'].replace('*', None)

df['Count'] = 1

df = df.dropna(subset=['Floor_Type'])

df['Room'] = df['Room'].replace(r'^Bed.*', 'Bedroom', regex=True)

df.head()

In [None]:
df_Northeast = df[df['Region'] == 'Northeast']
df_South = df[df['Region'] == 'South']
df_West = df[df['Region'] == 'West']
df_Midwest = df[df['Region'] == 'Midwest']


In [None]:
fig_map_All = px.choropleth(df, color="Region",locations="State", locationmode="USA-states", scope="usa",
                        color_discrete_map={'Northeast':'#EF553B','South':'#AB63FA','Midwest':'#636EFA','West':'#00CC96'},
                        title='Researched States')
# fig_map_All update_layout(color
fig_map_All.update_layout(margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
fig_map_All.show()
fig_map_All.write_image('Map - All Regions.png')

In [None]:
fig_map_NE = px.choropleth(df_Northeast, color="Division",locations="State", locationmode="USA-states", scope="usa", title='Northeast Regions')
                        # color_discrete_map={'Northeast':'#EF553B','South':'#AB63FA','Midwest':'#636EFA','West':'#00CC96'})
# fig_map_All update_layout(color
fig_map_NE.update_layout(margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
fig_map_NE.add_scattergeo(
    locations=df_Northeast['State'],
    locationmode="USA-states",
    text=df_Northeast['State'],
    mode='text',
    textfont=dict(
        family="Avenir Light",
        size=10,

    ),
)
fig_map_NE.show()
fig_map_NE.write_image('Map - Northeast.png')

In [None]:
fig_map_Midwest = px.choropleth(df_Midwest, color="Division",locations="State", locationmode="USA-states", scope="usa", title='Midwest Regions')
                        # color_discrete_map={'West South Central':'#EF553B','South Atlantic':'#636EFA','East South Central':'#00CC96'})
# fig_map_All update_layout(color
fig_map_Midwest.update_layout(margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
fig_map_Midwest.add_scattergeo(
    locations=df_Midwest['State'],
    locationmode="USA-states",
    text=df_Midwest['State'],
    mode='text',
    textfont=dict(
        family="Avenir Light",
        size=10,

    ),
)
fig_map_Midwest.show()
fig_map_Midwest.write_image('Map - Midwest.png')


In [None]:
fig_map_South = px.choropleth(df_South, color="Division",locations="State", locationmode="USA-states", scope="usa", title='South Regions')
                        # color_discrete_map={'West South Central':'#EF553B','South Atlantic':'#636EFA','East South Central':'#00CC96'})
# fig_map_All update_layout(color
fig_map_South.update_layout(margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
fig_map_South.add_scattergeo(
    locations=df_South['State'],
    locationmode="USA-states",
    text=df_South['State'],
    mode='text',
    textfont=dict(
        family="Avenir Light",
        size=10,

    ),
)
fig_map_South.show()
fig_map_South.write_image('Map - South.png')


In [None]:
fig_map_West = px.choropleth(df_West, color="Division",locations="State", locationmode="USA-states", scope="usa", title='West Regions')
                        # color_discrete_map={'West West Central':'#EF553B','West Atlantic':'#636EFA','East West Central':'#00CC96'})
# fig_map_All update_layout(color
fig_map_West.update_layout(margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
fig_map_West.add_scattergeo(
    locations=df_West['State'],
    locationmode="USA-states",
    text=df_West['State'],
    mode='text',
    textfont=dict(
        family="Avenir Light",
        size=10,

    ),
)
fig_map_West.show()
fig_map_West.write_image('Map - West.png')


In [None]:
fig_sunburst_ALL = px.sunburst(df, path=['Room', 'Floor_Type'], values='Count', width=750, height=750,
                               title='Floor Types by Room (All Regions)')


fig_sunburst_Region = px.sunburst(df, path=['Region', 'Room', 'Floor_Type'], values='Count', width=750, height=750,
                                  title='Floor Types by Region')

fig_sunburst_ALL.write_image('Sunburst_All.png')
fig_sunburst_Region.write_image('Sunburst_Region.png')

In [None]:
# sum the count of each room type
df_sum = df.groupby(['Room','ZPID','Region','Division','State','Home_Type','Home_Price','Sq_ft','Built Year','Floor_Type'])['Count'].sum().reset_index()
df_sum = df_sum.pivot_table(index=['Room','ZPID','Region','Division','State','Home_Type','Home_Price','Sq_ft','Built Year'], columns='Floor_Type', values='Count').reset_index()
# NaN to 0
df_sum = df_sum.fillna(0)
df_sum



In [None]:
# by Room for all regions
df_bedroom = df_sum[df_sum['Room'] == 'Bedroom']
df_living = df_sum[df_sum['Room'] == 'Living']
df_dining = df_sum[df_sum['Room'] == 'Dining']
df_kitchen = df_sum[df_sum['Room'] == 'Kitchen']
df_family = df_sum[df_sum['Room'] == 'Family']
df_basement = df_sum[df_sum['Room'] == 'Basement']




In [None]:
# Bedroom

df_bedroom['Bare%'] = (df_bedroom['Bare'] / (df_bedroom['Bare'] + df_bedroom['Carpet'] + df_bedroom['Rug']) * 100).round(1)
df_bedroom['Carpet%'] = (df_bedroom['Carpet'] / (df_bedroom['Bare'] + df_bedroom['Carpet'] + df_bedroom['Rug']) * 100).round(1)
df_bedroom['Rug%'] = (df_bedroom['Rug'] / (df_bedroom['Bare'] + df_bedroom['Carpet'] + df_bedroom['Rug']) * 100).round(1)
df_bedroom.head()

In [None]:
df_bedroom_division = df_bedroom.groupby(['Room','Region','Division','State'])[['Bare','Carpet','Rug']].sum().reset_index()
df_bedroom_division['Bare%'] = (df_bedroom_division['Bare'] / (df_bedroom_division['Bare'] + df_bedroom_division['Carpet'] + df_bedroom_division['Rug']) * 100).round(1)
df_bedroom_division['Carpet%'] = (df_bedroom_division['Carpet'] / (df_bedroom_division['Bare'] + df_bedroom_division['Carpet'] + df_bedroom_division['Rug']) * 100).round(1)
df_bedroom_division['Rug%'] = (df_bedroom_division['Rug'] / (df_bedroom_division['Bare'] + df_bedroom_division['Carpet'] + df_bedroom_division['Rug']) * 100).round(1)
df_bedroom_division.head()
# two decimal places




In [None]:
fig_map_division_carpet = px.choropleth(df_bedroom_division, color="Carpet%",locations="State", locationmode="USA-states", scope="usa", color_continuous_scale=px.colors.sequential.YlOrRd, labels={'color':'Percentage of Bedrooms with Carpet'}, title='Carpeted Bedrooms % by State')
                        # color_discrete_map={'Northeast':'#EF553B','South':'#AB63FA','Midwest':'#636EFA','West':'#00CC96'})
# fig_map_All update_layout(color
fig_map_division_carpet.update_layout(margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
# fig_map_division_carpet.add_scattergeo(
#     locations=df_bedroom_division['State'],
#     locationmode="USA-states",
#     text=df_bedroom_division['Carpet%'],
#     mode='text',
#     # textfont=dict(
#     #     family="Avenir Light",
#     #     size=10,
#     #
#     # ),
# )
fig_map_division_carpet.show()
fig_map_division_carpet.write_image('Map - Carpeted Bedrooms Percent by State.png')

In [None]:
# Living Room

df_living['Bare%'] = (
            df_living['Bare'] / (df_living['Bare'] + df_living['Carpet'] + df_living['Rug']) * 100).round(1)
df_living['Carpet%'] = (
            df_living['Carpet'] / (df_living['Bare'] + df_living['Carpet'] + df_living['Rug']) * 100).round(1)
df_living['Rug%'] = (df_living['Rug'] / (df_living['Bare'] + df_living['Carpet'] + df_living['Rug']) * 100).round(
    1)

df_living_division = df_living.groupby(['Room', 'Region', 'Division', 'State'])[['Bare', 'Carpet', 'Rug']].sum().reset_index()
df_living_division['Bare%'] = (df_living_division['Bare'] / (
            df_living_division['Bare'] + df_living_division['Carpet'] + df_living_division['Rug']) * 100).round(1)
df_living_division['Carpet%'] = (df_living_division['Carpet'] / (
            df_living_division['Bare'] + df_living_division['Carpet'] + df_living_division['Rug']) * 100).round(1)
df_living_division['Rug%'] = (df_living_division['Rug'] / (
            df_living_division['Bare'] + df_living_division['Carpet'] + df_living_division['Rug']) * 100).round(1)
df_living_division
# two decimal places




In [None]:
fig_map_division_carpet = px.choropleth(df_living_division, color="Carpet%",locations="State", locationmode="USA-states", scope="usa", color_continuous_scale=px.colors.sequential.YlOrRd, labels={'color':'Percentage of Bedrooms with Carpet'}, title='Carpeted Living Room % by State')
                        # color_discrete_map={'Northeast':'#EF553B','South':'#AB63FA','Midwest':'#636EFA','West':'#00CC96'})
# fig_map_All update_layout(color
fig_map_division_carpet.update_layout(margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
# fig_map_division_carpet.add_scattergeo(
#     locations=df_living_division['State'],
#     locationmode="USA-states",
#     text=df_living_division['Carpet%'],
#     mode='text',
#     # textfont=dict(
#     #     family="Avenir Light",
#     #     size=10,
#     #
#     # ),
# )
fig_map_division_carpet.show()
fig_map_division_carpet.write_image('Map - Carpeted Living Rooms Percent by State.png')

In [None]:
# Basement

df_basement['Bare%'] = (
            df_basement['Bare'] / (df_basement['Bare'] + df_basement['Carpet'] + df_basement['Rug']) * 100).round(1)
df_basement['Carpet%'] = (
            df_basement['Carpet'] / (df_basement['Bare'] + df_basement['Carpet'] + df_basement['Rug']) * 100).round(1)
df_basement['Rug%'] = (df_basement['Rug'] / (df_basement['Bare'] + df_basement['Carpet'] + df_basement['Rug']) * 100).round(
    1)

df_basement_division = df_basement.groupby(['Room', 'Region', 'Division', 'State'])[['Bare', 'Carpet', 'Rug']].sum().reset_index()
df_basement_division['Bare%'] = (df_basement_division['Bare'] / (
            df_basement_division['Bare'] + df_basement_division['Carpet'] + df_basement_division['Rug']) * 100).round(1)
df_basement_division['Carpet%'] = (df_basement_division['Carpet'] / (
            df_basement_division['Bare'] + df_basement_division['Carpet'] + df_basement_division['Rug']) * 100).round(1)
df_basement_division['Rug%'] = (df_basement_division['Rug'] / (
            df_basement_division['Bare'] + df_basement_division['Carpet'] + df_basement_division['Rug']) * 100).round(1)
df_basement_division
# two decimal places




In [None]:
fig_map_division_carpet = px.choropleth(df_basement_division, color="Carpet%",locations="State", locationmode="USA-states", scope="usa", color_continuous_scale=px.colors.sequential.YlOrRd, labels={'color':'Percentage of Bedrooms with Carpet'}, title='Carpeted Basement % by State')
                        # color_discrete_map={'Northeast':'#EF553B','South':'#AB63FA','Midwest':'#636EFA','West':'#00CC96'})
# fig_map_All update_layout(color
fig_map_division_carpet.update_layout(margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
# fig_map_division_carpet.add_scattergeo(
#     locations=df_basement_division['State'],
#     locationmode="USA-states",
#     text=df_basement_division['Carpet%'],
#     mode='text',
#     # textfont=dict(
#     #     family="Avenir Light",
#     #     size=10,
#     #
#     # ),
# )
fig_map_division_carpet.show()
fig_map_division_carpet.write_image('Map - Carpeted Basement % by State.png')

In [None]:
# Median House Value
df_median_house_value = df_sum.groupby(['State'])['Home_Price'].median().reset_index()
df_median_house_value['Home_Price'] = df_median_house_value['Home_Price'].round(0)
df_median_house_value.head()




In [None]:
fig_map_median_houseprice = px.choropleth(df_median_house_value, color="Home_Price",locations="State", locationmode="USA-states", scope="usa", color_continuous_scale=px.colors.sequential.YlGnBu, labels={'color':'Percentage of Bedrooms with Carpet'}, title='Median House Price by State')
                        # color_discrete_map={'Northeast':'#EF553B','South':'#AB63FA','Midwest':'#636EFA','West':'#00CC96'})
# fig_map_All update_layout(color
fig_map_median_houseprice.update_layout(margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
# fig_map_median_houseprice.add_scattergeo(
#     locations=df_median_house_value['State'],
#     locationmode="USA-states",
#     text=df_median_house_value['Home_Price'],
#     mode='text',
#     textfont=dict(
#         color='black',
#
#     ),
# )
fig_map_median_houseprice.show()
fig_map_median_houseprice.write_image('Map - Median House Price by State.png')

In [None]:
# Median House Value
df_avg_built_year = df_sum.groupby(['State'])['Built Year'].mean().reset_index()
df_avg_built_year['Built Year'] = df_avg_built_year['Built Year'].round(0)
df_avg_built_year.head()




In [None]:
# select CA, TX, FL, NY, IL, OH, AZ, NJ,MA, WA, MD, GA, MN
df_avg_built_year = df_avg_built_year[df_avg_built_year['State'].isin(['CA', 'TX', 'FL', 'NY', 'IL', 'OH', 'AZ', 'NJ', 'MA', 'WA', 'MD', 'GA', 'MN', 'MO', 'PA', 'TN'])]
df_avg_built_year

In [None]:
fig_map_built_year = px.choropleth(df_avg_built_year, color="Built Year",locations="State", locationmode="USA-states", scope="usa", color_continuous_scale=px.colors.sequential.RdBu, labels={'color':'Percentage of Bedrooms with Carpet'}, title='Avg. House Built Year by State')
                        # color_discrete_map={'Northeast':'#EF553B','South':'#AB63FA','Midwest':'#636EFA','West':'#00CC96'})
# fig_map_All update_layout(color
fig_map_built_year.update_layout(margin={"r":0,"t":50,"l":0,"b":0}, title_x=0.5)
fig_map_built_year.add_scattergeo(
    locations=df_avg_built_year['State'],
    locationmode="USA-states",
    text=df_avg_built_year['Built Year'],
    mode='text',
    textfont=dict(
        color='Green',

    ),
)
fig_map_built_year.show()
fig_map_built_year.write_image('Map - Avg. Built Year.png')