In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import re

%matplotlib inline
init_notebook_mode(connected=True) 

# Physical House Occupancy Characteristics

In [113]:
state_abbreviations = {
'Alabama': 'AL',
'Alaska': 'AK',
'Arizona': 'AZ',
'Arkansas': 'AR',
'California': 'CA',
'Colorado': 'CO',
'Connecticut': 'CT',
'Delaware': 'DE',
'District of Columbia': 'DC',
'Florida': 'FL',
'Georgia': 'GA',
'Hawaii': 'HI',
'Idaho': 'ID',
'Illinois': 'IL',
'Indiana': 'IN',
'Iowa': 'IA',
'Kansas': 'KS',
'Kentucky': 'KY',
'Louisiana': 'LA',
'Maine': 'ME',
'Maryland': 'MD',
'Massachusetts': 'MA',
'Michigan': 'MI',
'Minnesota': 'MN',
'Mississippi': 'MS',
'Missouri': 'MO',
'Montana': 'MT',
'Nebraska': 'NE',
'Nevada': 'NV',
'New Hampshire': 'NH',
'New Jersey': 'NJ',
'New Mexico': 'NM',
'New York': 'NY',
'North Carolina': 'NC',
'North Dakota': 'ND',
'Ohio': 'OH',
'Oklahoma': 'OK',
'Oregon': 'OR',
'Pennsylvania': 'PA',
'Rhode Island': 'RI',
'South Carolina': 'SC',
'South Dakota': 'SD',
'Tennessee': 'TN',
'Texas': 'TX',
'Utah': 'UT',
'Vermont': 'VT',
'Virginia': 'VA',
'Washington': 'WA',
'West Virginia': 'WV',
'Wisconsin': 'WI',
'Wyoming': 'WY',
'Puerto Rico': 'PR'
}

In [114]:
def convert_value(value):
    if '%' in value:
        return float(value.replace('%', '')) / 100  # Convert percentage to a decimal
    else:
        return int(value.replace(',', ''))  # Remove commas and convert to integer

In [115]:
def clean_house_char_headers(val):
    if isinstance(val, str):
        if 'Occupied' in val:
            val = val.split("!!")[0]
            val = val + "_total"
        elif 'Percent occupied housing units' in val:
            val = val.split("!!")[0]
            val = val + "_total_percent"
        elif 'Owner-occupied housing'in val:
            val = val.split("!!")[0]
            val = val + "_owner"
        elif 'Percent owner-occupied housing units' in val:
            val = val.split("!!")[0]
            val = val + "_own_percent"
        elif 'Renter-occupied housing units' in val:
            val = val.split("!!")[0]
            val = val + "_renter"
        elif 'Percent renter-occupied' in val:
            val = val.split("!!")[0]
            val = val + "_rent_percent"
        else:
            val = val.split("!!")[0]
        return val
    else:
        return val

In [116]:
house_char_data = pd.read_csv('./Data/Physical_Housing_Occup.csv', index_col=0)
house_char_data = house_char_data.rename(columns=clean_house_char_headers)
house_char_data.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Occupied housing units,2016448.0,2016448,1416333.0,1416333,600115.0,600115,274574.0,274574,181586.0,181586,...,176863.0,176863,66458.0,66458,1289311.0,1289311,869635.0,869635,419676.0,419676
UNITS IN STRUCTURE,,,,,,,,,,,...,,,,,,,,,,
"1, detached",1436137.0,71.2%,1205520.0,85.1%,230617.0,38.4%,170997.0,62.3%,149053.0,82.1%,...,147580.0,83.4%,20001.0,30.1%,890441.0,69.1%,703191.0,80.9%,187250.0,44.6%
"1, attached",41268.0,2.0%,23036.0,1.6%,18232.0,3.0%,22604.0,8.2%,13565.0,7.5%,...,7390.0,4.2%,4919.0,7.4%,145191.0,11.3%,83088.0,9.6%,62103.0,14.8%
2 apartments,35683.0,1.8%,1255.0,0.1%,34428.0,5.7%,15026.0,5.5%,4122.0,2.3%,...,873.0,0.5%,3457.0,5.2%,34016.0,2.6%,14680.0,1.7%,19336.0,4.6%


In [117]:
units_in_struc = house_char_data.iloc[[2,3,4,5,6,7,8]]
units_in_struc.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"1, detached",1436137,71.2%,1205520,85.1%,230617,38.4%,170997,62.3%,149053,82.1%,...,147580,83.4%,20001,30.1%,890441,69.1%,703191,80.9%,187250,44.6%
"1, attached",41268,2.0%,23036,1.6%,18232,3.0%,22604,8.2%,13565,7.5%,...,7390,4.2%,4919,7.4%,145191,11.3%,83088,9.6%,62103,14.8%
2 apartments,35683,1.8%,1255,0.1%,34428,5.7%,15026,5.5%,4122,2.3%,...,873,0.5%,3457,5.2%,34016,2.6%,14680,1.7%,19336,4.6%
3 or 4 apartments,57324,2.8%,2986,0.2%,54338,9.1%,20093,7.3%,2783,1.5%,...,198,0.1%,10707,16.1%,33814,2.6%,7645,0.9%,26169,6.2%
5 to 9 apartments,75649,3.8%,2717,0.2%,72932,12.2%,15293,5.6%,2599,1.4%,...,384,0.2%,7442,11.2%,59961,4.7%,16203,1.9%,43758,10.4%


In [147]:
def data_cleanup(df):

    df_dict = df.to_dict()
    cleaned_dict = {state: {key.strip(): convert_value(value) for key, value in data.items()} for state, data in df_dict.items()}

    # Create nested dictionary for each state to combine data by state
    new_dict = {}
    for state_attr, attr_values in cleaned_dict.items():
        state, attribute = state_attr.split("_", 1)
        if state not in new_dict:
            new_dict[state] = {}
        if attribute not in new_dict[state]:
            new_dict[state][attribute] = {}
        for attr, value in attr_values.items():
            new_dict[state][attribute][attr] = value

    # Create category by total units in state, homeowner units and renter units
    total_unit_lst = [{k: v.get('total')} for k, v in new_dict.items() if v.get('total') is not None]
    owner_unit_lst = [{k: v.get('owner')} for k, v in new_dict.items() if v.get('owner') is not None]
    renter_unit_lst = [{k: v.get('renter')} for k, v in new_dict.items() if v.get('renter') is not None]

    # Function to convert list of dictionaries into a DataFrame
    def create_df(lst):
        df = pd.concat({k: pd.DataFrame.from_dict(v, 'index') for d in lst for k, v in d.items()}, axis=0)
        df.reset_index(inplace=True)
        df.columns = ['State', 'Housing Type', 'Count']
        df['Code'] = df['State'].map(state_abbreviations)
        return df

    # Convert the list of nested dictionaries into a DataFrame
    df_total = create_df(total_unit_lst)
    df_owner = create_df(owner_unit_lst)
    df_renter = create_df(renter_unit_lst)

    return df_total, df_owner, df_renter

In [148]:
df_total, df_owner, df_renter = data_cleanup(units_in_struc)

In [149]:
df_total.head()

Unnamed: 0,State,Housing Type,Count,Code
0,Alabama,"1, detached",1436137,AL
1,Alabama,"1, attached",41268,AL
2,Alabama,2 apartments,35683,AL
3,Alabama,3 or 4 apartments,57324,AL
4,Alabama,5 to 9 apartments,75649,AL


In [150]:
df_owner.head()

Unnamed: 0,State,Housing Type,Count,Code
0,Alabama,"1, detached",1205520,AL
1,Alabama,"1, attached",23036,AL
2,Alabama,2 apartments,1255,AL
3,Alabama,3 or 4 apartments,2986,AL
4,Alabama,5 to 9 apartments,2717,AL


In [130]:
year_struc = house_char_data.iloc[[10,11,12,13,14,15,16]]
year_struc.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020 or later,45295,2.2%,38381,2.7%,6914,1.2%,2604,0.9%,1822,1.0%,...,2822,1.6%,869,1.3%,904,0.1%,609,0.1%,295,0.1%
2010 to 2019,241552,12.0%,171350,12.1%,70202,11.7%,29643,10.8%,18717,10.3%,...,17123,9.7%,7252,10.9%,48084,3.7%,31575,3.6%,16509,3.9%
2000 to 2009,322821,16.0%,249161,17.6%,73660,12.3%,44414,16.2%,32530,17.9%,...,27518,15.6%,9678,14.6%,180653,14.0%,135080,15.5%,45573,10.9%
1980 to 1999,587134,29.1%,422475,29.8%,164659,27.4%,96638,35.2%,66956,36.9%,...,40947,23.2%,18035,27.1%,406272,31.5%,293055,33.7%,113217,27.0%
1960 to 1979,515788,25.6%,341032,24.1%,174756,29.1%,82183,29.9%,50826,28.0%,...,48307,27.3%,15887,23.9%,475389,36.9%,311542,35.8%,163847,39.0%


In [131]:
rooms = house_char_data.iloc[[18,19,20,21,22]]
rooms.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1 room,22256,1.1%,4681,0.3%,17575,2.9%,10980,4.0%,2672,1.5%,...,939,0.5%,2354,3.5%,19305,1.5%,5490,0.6%,13815,3.3%
2 or 3 rooms,135880,6.7%,32531,2.3%,103349,17.2%,44248,16.1%,15808,8.7%,...,6101,3.4%,15425,23.2%,138185,10.7%,56309,6.5%,81876,19.5%
4 or 5 rooms,652409,32.4%,357333,25.2%,295076,49.2%,105243,38.3%,67880,37.4%,...,45143,25.5%,33069,49.8%,808988,62.7%,549749,63.2%,259239,61.8%
6 or 7 rooms,708144,35.1%,568715,40.2%,139429,23.2%,68647,25.0%,55125,30.4%,...,56898,32.2%,10411,15.7%,268223,20.8%,208947,24.0%,59276,14.1%
8 or more rooms,497759,24.7%,453073,32.0%,44686,7.4%,45456,16.6%,40101,22.1%,...,67782,38.3%,5199,7.8%,54610,4.2%,49140,5.7%,5470,1.3%


In [132]:
bedroom = house_char_data.iloc[[24,25,26,27]]
bedroom.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
No bedroom,25446,1.3%,6976,0.5%,18470,3.1%,12100,4.4%,3007,1.7%,...,1503,0.8%,2496,3.8%,19816,1.5%,5928,0.7%,13888,3.3%
1 bedroom,119508,5.9%,19517,1.4%,99991,16.7%,28810,10.5%,11510,6.3%,...,3560,2.0%,14044,21.1%,61854,4.8%,18135,2.1%,43719,10.4%
2 or 3 bedrooms,1383847,68.6%,950850,67.1%,432997,72.2%,173245,63.1%,114575,63.1%,...,105974,59.9%,44039,66.3%,998500,77.4%,676336,77.8%,322164,76.8%
4 or more bedrooms,487647,24.2%,438990,31.0%,48657,8.1%,60419,22.0%,52494,28.9%,...,65826,37.2%,5879,8.8%,209141,16.2%,169236,19.5%,39905,9.5%


In [133]:
vehicles = house_char_data.iloc[[32,33,34,35]]
vehicles.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
No vehicle available,105803,5.2%,38015,2.7%,67788,11.3%,24877,9.1%,11554,6.4%,...,3213,1.8%,6813,10.3%,166871,12.9%,83863,9.6%,83008,19.8%
1 vehicle available,636767,31.6%,338136,23.9%,298631,49.8%,84808,30.9%,39654,21.8%,...,34868,19.7%,29075,43.7%,517633,40.1%,315782,36.3%,201851,48.1%
2 vehicles available,743303,36.9%,575831,40.7%,167472,27.9%,99642,36.3%,74592,41.1%,...,63076,35.7%,21711,32.7%,401292,31.1%,301060,34.6%,100232,23.9%
3 or more vehicles available,530575,26.3%,464351,32.8%,66224,11.0%,65247,23.8%,55786,30.7%,...,75706,42.8%,8859,13.3%,203515,15.8%,168930,19.4%,34585,8.2%


In [134]:
house_heat_fuel = house_char_data.iloc[[39,40,41,42,43,44,45]]
house_heat_fuel.tail()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Electricity,1366095,67.7%,904767,63.9%,461328,76.9%,41891,15.3%,15650,8.6%,...,28291,16.0%,27151,40.9%,124002,9.6%,100837,11.6%,23165,5.5%
"Fuel oil, kerosene, etc.",2610,0.1%,1073,0.1%,1537,0.3%,77028,28.1%,53243,29.3%,...,381,0.2%,228,0.3%,0,0.0%,0,0.0%,0,0.0%
Coal or coke,199,0.0%,0,0.0%,199,0.0%,278,0.1%,105,0.1%,...,469,0.3%,27,0.0%,103,0.0%,103,0.0%,0,0.0%
All other fuels,16376,0.8%,13440,0.9%,2936,0.5%,16756,6.1%,12905,7.1%,...,10553,6.0%,2756,4.1%,13631,1.1%,13161,1.5%,470,0.1%
No fuel used,14076,0.7%,8635,0.6%,5441,0.9%,1677,0.6%,669,0.4%,...,1098,0.6%,500,0.8%,1140913,88.5%,746305,85.8%,394608,94.0%
