In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import re

%matplotlib inline
init_notebook_mode(connected=True) 

# Physical House Occupancy Characteristics

In [None]:
state_abbreviations = {
'Alabama': 'AL',
'Alaska': 'AK',
'Arizona': 'AZ',
'Arkansas': 'AR',
'California': 'CA',
'Colorado': 'CO',
'Connecticut': 'CT',
'Delaware': 'DE',
'District of Columbia': 'DC',
'Florida': 'FL',
'Georgia': 'GA',
'Hawaii': 'HI',
'Idaho': 'ID',
'Illinois': 'IL',
'Indiana': 'IN',
'Iowa': 'IA',
'Kansas': 'KS',
'Kentucky': 'KY',
'Louisiana': 'LA',
'Maine': 'ME',
'Maryland': 'MD',
'Massachusetts': 'MA',
'Michigan': 'MI',
'Minnesota': 'MN',
'Mississippi': 'MS',
'Missouri': 'MO',
'Montana': 'MT',
'Nebraska': 'NE',
'Nevada': 'NV',
'New Hampshire': 'NH',
'New Jersey': 'NJ',
'New Mexico': 'NM',
'New York': 'NY',
'North Carolina': 'NC',
'North Dakota': 'ND',
'Ohio': 'OH',
'Oklahoma': 'OK',
'Oregon': 'OR',
'Pennsylvania': 'PA',
'Rhode Island': 'RI',
'South Carolina': 'SC',
'South Dakota': 'SD',
'Tennessee': 'TN',
'Texas': 'TX',
'Utah': 'UT',
'Vermont': 'VT',
'Virginia': 'VA',
'Washington': 'WA',
'West Virginia': 'WV',
'Wisconsin': 'WI',
'Wyoming': 'WY',
'Puerto Rico': 'PR'
}

In [None]:
def convert_value(value):
    if '%' in value:
        return float(value.replace('%', '')) / 100  # Convert percentage to a decimal
    else:
        return int(value.replace(',', ''))  # Remove commas and convert to integer

In [None]:
def clean_house_char_headers(val):
    if isinstance(val, str):
        if 'Occupied' in val:
            val = val.split("!!")[0]
            val = val + "_total"
        elif 'Percent occupied housing units' in val:
            val = val.split("!!")[0]
            val = val + "_total_percent"
        elif 'Owner-occupied housing'in val:
            val = val.split("!!")[0]
            val = val + "_owner"
        elif 'Percent owner-occupied housing units' in val:
            val = val.split("!!")[0]
            val = val + "_own_percent"
        elif 'Renter-occupied housing units' in val:
            val = val.split("!!")[0]
            val = val + "_renter"
        elif 'Percent renter-occupied' in val:
            val = val.split("!!")[0]
            val = val + "_rent_percent"
        else:
            val = val.split("!!")[0]
        return val
    else:
        return val

In [None]:
house_char_data = pd.read_csv('../Data/Physical_Housing_Occup.csv', index_col=0)
house_char_data = house_char_data.rename(columns=clean_house_char_headers)
house_char_data.head()

In [None]:
units_in_struc = house_char_data.iloc[[2,3,4,5,6,7,8]]
units_in_struc.head()

In [None]:
dict_house = units_in_struc.to_dict()
cleaned_dict = {state: {key.strip(): convert_value(value) for key, value in data.items()} for state, data in dict_house.items()}

In [None]:
new_dict = {}
final_lst = []

# Iterate over the keys and values in the original dictionary
for state_attr, attr_values in cleaned_dict.items():
    # Split the key into state and attribute
    state, attribute = state_attr.split("_", 1)
    
    # Iterate over the attribute values
    for attr, value in attr_values.items():
        # If the attribute is not in the new dictionary, add it
        if attr not in new_dict:
            new_dict[attr] = {}
        
        # If the state is not in the attribute's dictionary, add it
        if state not in new_dict[attr]:
            new_dict[attr][state] = {}
        
        # Add the value to the attribute's dictionary
        new_dict[attr][state][attribute] = value
        final_lst.append(new_dict)

In [None]:
year_struc = house_char_data.iloc[[10,11,12,13,14,15,16]]
year_struc.head()

In [None]:
rooms = house_char_data.iloc[[18,19,20,21,22]]
rooms.head()

In [None]:
bedroom = house_char_data.iloc[[24,25,26,27]]
bedroom.head()

In [None]:
vehicles = house_char_data.iloc[[32,33,34,35]]
vehicles.head()

In [None]:
house_heat_fuel = house_char_data.iloc[[39,40,41,42,43,44,45]]
house_heat_fuel.tail()