In [243]:
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import re

%matplotlib inline
init_notebook_mode(connected=True) 

# Physical House Occupancy Characteristics

In [244]:
state_abbreviations = {
'Alabama': 'AL',
'Alaska': 'AK',
'Arizona': 'AZ',
'Arkansas': 'AR',
'California': 'CA',
'Colorado': 'CO',
'Connecticut': 'CT',
'Delaware': 'DE',
'District of Columbia': 'DC',
'Florida': 'FL',
'Georgia': 'GA',
'Hawaii': 'HI',
'Idaho': 'ID',
'Illinois': 'IL',
'Indiana': 'IN',
'Iowa': 'IA',
'Kansas': 'KS',
'Kentucky': 'KY',
'Louisiana': 'LA',
'Maine': 'ME',
'Maryland': 'MD',
'Massachusetts': 'MA',
'Michigan': 'MI',
'Minnesota': 'MN',
'Mississippi': 'MS',
'Missouri': 'MO',
'Montana': 'MT',
'Nebraska': 'NE',
'Nevada': 'NV',
'New Hampshire': 'NH',
'New Jersey': 'NJ',
'New Mexico': 'NM',
'New York': 'NY',
'North Carolina': 'NC',
'North Dakota': 'ND',
'Ohio': 'OH',
'Oklahoma': 'OK',
'Oregon': 'OR',
'Pennsylvania': 'PA',
'Rhode Island': 'RI',
'South Carolina': 'SC',
'South Dakota': 'SD',
'Tennessee': 'TN',
'Texas': 'TX',
'Utah': 'UT',
'Vermont': 'VT',
'Virginia': 'VA',
'Washington': 'WA',
'West Virginia': 'WV',
'Wisconsin': 'WI',
'Wyoming': 'WY',
'Puerto Rico': 'PR'
}

In [245]:
def convert_value(value):
    if '%' in value:
        return float(value.replace('%', '')) / 100  # Convert percentage to a decimal
    else:
        return int(value.replace(',', ''))  # Remove commas and convert to integer

In [246]:
def clean_house_char_headers(val):
    if isinstance(val, str):
        if 'Occupied' in val:
            val = val.split("!!")[0]
            val = val + "_total"
        elif 'Percent occupied housing units' in val:
            val = val.split("!!")[0]
            val = val + "_total_percent"
        elif 'Owner-occupied housing'in val:
            val = val.split("!!")[0]
            val = val + "_owner"
        elif 'Percent owner-occupied housing units' in val:
            val = val.split("!!")[0]
            val = val + "_own_percent"
        elif 'Renter-occupied housing units' in val:
            val = val.split("!!")[0]
            val = val + "_renter"
        elif 'Percent renter-occupied' in val:
            val = val.split("!!")[0]
            val = val + "_rent_percent"
        else:
            val = val.split("!!")[0]
        return val
    else:
        return val

In [247]:
house_char_data = pd.read_csv('./Data/Physical_Housing_Occup.csv', index_col=0)
house_char_data = house_char_data.rename(columns=clean_house_char_headers)
house_char_data.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Occupied housing units,2016448.0,2016448,1416333.0,1416333,600115.0,600115,274574.0,274574,181586.0,181586,...,176863.0,176863,66458.0,66458,1289311.0,1289311,869635.0,869635,419676.0,419676
UNITS IN STRUCTURE,,,,,,,,,,,...,,,,,,,,,,
"1, detached",1436137.0,71.2%,1205520.0,85.1%,230617.0,38.4%,170997.0,62.3%,149053.0,82.1%,...,147580.0,83.4%,20001.0,30.1%,890441.0,69.1%,703191.0,80.9%,187250.0,44.6%
"1, attached",41268.0,2.0%,23036.0,1.6%,18232.0,3.0%,22604.0,8.2%,13565.0,7.5%,...,7390.0,4.2%,4919.0,7.4%,145191.0,11.3%,83088.0,9.6%,62103.0,14.8%
2 apartments,35683.0,1.8%,1255.0,0.1%,34428.0,5.7%,15026.0,5.5%,4122.0,2.3%,...,873.0,0.5%,3457.0,5.2%,34016.0,2.6%,14680.0,1.7%,19336.0,4.6%


In [248]:
units_in_struc = house_char_data.iloc[[2,3,4,5,6,7,8]]
units_in_struc.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"1, detached",1436137,71.2%,1205520,85.1%,230617,38.4%,170997,62.3%,149053,82.1%,...,147580,83.4%,20001,30.1%,890441,69.1%,703191,80.9%,187250,44.6%
"1, attached",41268,2.0%,23036,1.6%,18232,3.0%,22604,8.2%,13565,7.5%,...,7390,4.2%,4919,7.4%,145191,11.3%,83088,9.6%,62103,14.8%
2 apartments,35683,1.8%,1255,0.1%,34428,5.7%,15026,5.5%,4122,2.3%,...,873,0.5%,3457,5.2%,34016,2.6%,14680,1.7%,19336,4.6%
3 or 4 apartments,57324,2.8%,2986,0.2%,54338,9.1%,20093,7.3%,2783,1.5%,...,198,0.1%,10707,16.1%,33814,2.6%,7645,0.9%,26169,6.2%
5 to 9 apartments,75649,3.8%,2717,0.2%,72932,12.2%,15293,5.6%,2599,1.4%,...,384,0.2%,7442,11.2%,59961,4.7%,16203,1.9%,43758,10.4%


In [249]:
def data_cleanup(df):

    df_dict = df.to_dict()
    cleaned_dict = {state: {key.strip(): convert_value(value) for key, value in data.items()} for state, data in df_dict.items()}

    # Create nested dictionary for each state to combine data by state
    new_dict = {}
    for state_attr, attr_values in cleaned_dict.items():
        state, attribute = state_attr.split("_", 1)
        if state not in new_dict:
            new_dict[state] = {}
        if attribute not in new_dict[state]:
            new_dict[state][attribute] = {}
        for attr, value in attr_values.items():
            new_dict[state][attribute][attr] = value

    # Create category by total units in state, homeowner units and renter units
    total_unit_lst = [{k: v.get('total')} for k, v in new_dict.items() if v.get('total') is not None]
    owner_unit_lst = [{k: v.get('owner')} for k, v in new_dict.items() if v.get('owner') is not None]
    renter_unit_lst = [{k: v.get('renter')} for k, v in new_dict.items() if v.get('renter') is not None]

    # Function to convert list of dictionaries into a DataFrame
    def create_df(lst):
        df = pd.concat({k: pd.DataFrame.from_dict(v, 'index') for d in lst for k, v in d.items()}, axis=0)
        df.reset_index(inplace=True)
        df.columns = ['State', 'Value', 'Count']
        df['Code'] = df['State'].map(state_abbreviations)
        return df

    # Convert the list of nested dictionaries into a DataFrame
    df_total = create_df(total_unit_lst)
    df_owner = create_df(owner_unit_lst)
    df_renter = create_df(renter_unit_lst)

    return df_total, df_owner, df_renter

In [250]:
df_total, df_owner, df_renter = data_cleanup(units_in_struc)

In [251]:
df_total.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,"1, detached",1436137,AL
1,Alabama,"1, attached",41268,AL
2,Alabama,2 apartments,35683,AL
3,Alabama,3 or 4 apartments,57324,AL
4,Alabama,5 to 9 apartments,75649,AL


In [252]:
df_owner.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,"1, detached",1205520,AL
1,Alabama,"1, attached",23036,AL
2,Alabama,2 apartments,1255,AL
3,Alabama,3 or 4 apartments,2986,AL
4,Alabama,5 to 9 apartments,2717,AL


In [253]:
df_renter.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,"1, detached",230617,AL
1,Alabama,"1, attached",18232,AL
2,Alabama,2 apartments,34428,AL
3,Alabama,3 or 4 apartments,54338,AL
4,Alabama,5 to 9 apartments,72932,AL


In [254]:
year_struc = house_char_data.iloc[[10,11,12,13,14,15,16]]
year_struc.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020 or later,45295,2.2%,38381,2.7%,6914,1.2%,2604,0.9%,1822,1.0%,...,2822,1.6%,869,1.3%,904,0.1%,609,0.1%,295,0.1%
2010 to 2019,241552,12.0%,171350,12.1%,70202,11.7%,29643,10.8%,18717,10.3%,...,17123,9.7%,7252,10.9%,48084,3.7%,31575,3.6%,16509,3.9%
2000 to 2009,322821,16.0%,249161,17.6%,73660,12.3%,44414,16.2%,32530,17.9%,...,27518,15.6%,9678,14.6%,180653,14.0%,135080,15.5%,45573,10.9%
1980 to 1999,587134,29.1%,422475,29.8%,164659,27.4%,96638,35.2%,66956,36.9%,...,40947,23.2%,18035,27.1%,406272,31.5%,293055,33.7%,113217,27.0%
1960 to 1979,515788,25.6%,341032,24.1%,174756,29.1%,82183,29.9%,50826,28.0%,...,48307,27.3%,15887,23.9%,475389,36.9%,311542,35.8%,163847,39.0%


In [255]:
df_total_yr, df_owner_yr, df_renter_yr = data_cleanup(year_struc)

In [256]:
df_total_yr.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,2020 or later,45295,AL
1,Alabama,2010 to 2019,241552,AL
2,Alabama,2000 to 2009,322821,AL
3,Alabama,1980 to 1999,587134,AL
4,Alabama,1960 to 1979,515788,AL


In [257]:
df_owner_yr.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,2020 or later,38381,AL
1,Alabama,2010 to 2019,171350,AL
2,Alabama,2000 to 2009,249161,AL
3,Alabama,1980 to 1999,422475,AL
4,Alabama,1960 to 1979,341032,AL


In [258]:
df_renter_yr.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,2020 or later,6914,AL
1,Alabama,2010 to 2019,70202,AL
2,Alabama,2000 to 2009,73660,AL
3,Alabama,1980 to 1999,164659,AL
4,Alabama,1960 to 1979,174756,AL


In [259]:
rooms = house_char_data.iloc[[18,19,20,21,22]]
rooms.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1 room,22256,1.1%,4681,0.3%,17575,2.9%,10980,4.0%,2672,1.5%,...,939,0.5%,2354,3.5%,19305,1.5%,5490,0.6%,13815,3.3%
2 or 3 rooms,135880,6.7%,32531,2.3%,103349,17.2%,44248,16.1%,15808,8.7%,...,6101,3.4%,15425,23.2%,138185,10.7%,56309,6.5%,81876,19.5%
4 or 5 rooms,652409,32.4%,357333,25.2%,295076,49.2%,105243,38.3%,67880,37.4%,...,45143,25.5%,33069,49.8%,808988,62.7%,549749,63.2%,259239,61.8%
6 or 7 rooms,708144,35.1%,568715,40.2%,139429,23.2%,68647,25.0%,55125,30.4%,...,56898,32.2%,10411,15.7%,268223,20.8%,208947,24.0%,59276,14.1%
8 or more rooms,497759,24.7%,453073,32.0%,44686,7.4%,45456,16.6%,40101,22.1%,...,67782,38.3%,5199,7.8%,54610,4.2%,49140,5.7%,5470,1.3%


In [260]:
df_room_total, df_room_owner, df_room_renter = data_cleanup(rooms)

In [261]:
df_room_total.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,1 room,22256,AL
1,Alabama,2 or 3 rooms,135880,AL
2,Alabama,4 or 5 rooms,652409,AL
3,Alabama,6 or 7 rooms,708144,AL
4,Alabama,8 or more rooms,497759,AL


In [262]:
df_room_owner.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,1 room,4681,AL
1,Alabama,2 or 3 rooms,32531,AL
2,Alabama,4 or 5 rooms,357333,AL
3,Alabama,6 or 7 rooms,568715,AL
4,Alabama,8 or more rooms,453073,AL


In [263]:
df_room_renter.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,1 room,17575,AL
1,Alabama,2 or 3 rooms,103349,AL
2,Alabama,4 or 5 rooms,295076,AL
3,Alabama,6 or 7 rooms,139429,AL
4,Alabama,8 or more rooms,44686,AL


In [264]:
bedroom = house_char_data.iloc[[24,25,26,27]]
bedroom.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
No bedroom,25446,1.3%,6976,0.5%,18470,3.1%,12100,4.4%,3007,1.7%,...,1503,0.8%,2496,3.8%,19816,1.5%,5928,0.7%,13888,3.3%
1 bedroom,119508,5.9%,19517,1.4%,99991,16.7%,28810,10.5%,11510,6.3%,...,3560,2.0%,14044,21.1%,61854,4.8%,18135,2.1%,43719,10.4%
2 or 3 bedrooms,1383847,68.6%,950850,67.1%,432997,72.2%,173245,63.1%,114575,63.1%,...,105974,59.9%,44039,66.3%,998500,77.4%,676336,77.8%,322164,76.8%
4 or more bedrooms,487647,24.2%,438990,31.0%,48657,8.1%,60419,22.0%,52494,28.9%,...,65826,37.2%,5879,8.8%,209141,16.2%,169236,19.5%,39905,9.5%


In [265]:
df_bed_total, df_bed_owner, df_bed_renter = data_cleanup(bedroom)

In [266]:
df_bed_total.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,No bedroom,25446,AL
1,Alabama,1 bedroom,119508,AL
2,Alabama,2 or 3 bedrooms,1383847,AL
3,Alabama,4 or more bedrooms,487647,AL
4,Alaska,No bedroom,12100,AK


In [267]:
df_bed_owner.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,No bedroom,6976,AL
1,Alabama,1 bedroom,19517,AL
2,Alabama,2 or 3 bedrooms,950850,AL
3,Alabama,4 or more bedrooms,438990,AL
4,Alaska,No bedroom,3007,AK


In [268]:
df_bed_renter.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,No bedroom,18470,AL
1,Alabama,1 bedroom,99991,AL
2,Alabama,2 or 3 bedrooms,432997,AL
3,Alabama,4 or more bedrooms,48657,AL
4,Alaska,No bedroom,9093,AK


In [269]:
vehicles = house_char_data.iloc[[32,33,34,35]]
vehicles.head()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
No vehicle available,105803,5.2%,38015,2.7%,67788,11.3%,24877,9.1%,11554,6.4%,...,3213,1.8%,6813,10.3%,166871,12.9%,83863,9.6%,83008,19.8%
1 vehicle available,636767,31.6%,338136,23.9%,298631,49.8%,84808,30.9%,39654,21.8%,...,34868,19.7%,29075,43.7%,517633,40.1%,315782,36.3%,201851,48.1%
2 vehicles available,743303,36.9%,575831,40.7%,167472,27.9%,99642,36.3%,74592,41.1%,...,63076,35.7%,21711,32.7%,401292,31.1%,301060,34.6%,100232,23.9%
3 or more vehicles available,530575,26.3%,464351,32.8%,66224,11.0%,65247,23.8%,55786,30.7%,...,75706,42.8%,8859,13.3%,203515,15.8%,168930,19.4%,34585,8.2%


In [270]:
df_car_total, df_car_owner, df_car_renter = data_cleanup(vehicles)

In [271]:
df_car_total.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,No vehicle available,105803,AL
1,Alabama,1 vehicle available,636767,AL
2,Alabama,2 vehicles available,743303,AL
3,Alabama,3 or more vehicles available,530575,AL
4,Alaska,No vehicle available,24877,AK


In [272]:
df_car_owner.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,No vehicle available,38015,AL
1,Alabama,1 vehicle available,338136,AL
2,Alabama,2 vehicles available,575831,AL
3,Alabama,3 or more vehicles available,464351,AL
4,Alaska,No vehicle available,11554,AK


In [273]:
df_car_renter.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,No vehicle available,67788,AL
1,Alabama,1 vehicle available,298631,AL
2,Alabama,2 vehicles available,167472,AL
3,Alabama,3 or more vehicles available,66224,AL
4,Alaska,No vehicle available,13323,AK


In [274]:
house_heat_fuel = house_char_data.iloc[[39,40,41,42,43,44,45]]
house_heat_fuel.tail()

Unnamed: 0_level_0,Alabama_total,Alabama_total_percent,Alabama_owner,Alabama_own_percent,Alabama_renter,Alabama_rent_percent,Alaska_total,Alaska_total_percent,Alaska_owner,Alaska_own_percent,...,Wyoming_owner,Wyoming_own_percent,Wyoming_renter,Wyoming_rent_percent,Puerto Rico_total,Puerto Rico_total_percent,Puerto Rico_owner,Puerto Rico_own_percent,Puerto Rico_renter,Puerto Rico_rent_percent
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Electricity,1366095,67.7%,904767,63.9%,461328,76.9%,41891,15.3%,15650,8.6%,...,28291,16.0%,27151,40.9%,124002,9.6%,100837,11.6%,23165,5.5%
"Fuel oil, kerosene, etc.",2610,0.1%,1073,0.1%,1537,0.3%,77028,28.1%,53243,29.3%,...,381,0.2%,228,0.3%,0,0.0%,0,0.0%,0,0.0%
Coal or coke,199,0.0%,0,0.0%,199,0.0%,278,0.1%,105,0.1%,...,469,0.3%,27,0.0%,103,0.0%,103,0.0%,0,0.0%
All other fuels,16376,0.8%,13440,0.9%,2936,0.5%,16756,6.1%,12905,7.1%,...,10553,6.0%,2756,4.1%,13631,1.1%,13161,1.5%,470,0.1%
No fuel used,14076,0.7%,8635,0.6%,5441,0.9%,1677,0.6%,669,0.4%,...,1098,0.6%,500,0.8%,1140913,88.5%,746305,85.8%,394608,94.0%


In [275]:
df_heat_total, df_heat_owner, df_heat_renter = data_cleanup(house_heat_fuel)

In [276]:
df_heat_total.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,Utility gas,510312,AL
1,Alabama,"Bottled, tank, or LP gas",106780,AL
2,Alabama,Electricity,1366095,AL
3,Alabama,"Fuel oil, kerosene, etc.",2610,AL
4,Alabama,Coal or coke,199,AL


In [277]:
df_heat_owner.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,Utility gas,401896,AL
1,Alabama,"Bottled, tank, or LP gas",86522,AL
2,Alabama,Electricity,904767,AL
3,Alabama,"Fuel oil, kerosene, etc.",1073,AL
4,Alabama,Coal or coke,0,AL


In [278]:
df_heat_renter.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,Utility gas,108416,AL
1,Alabama,"Bottled, tank, or LP gas",20258,AL
2,Alabama,Electricity,461328,AL
3,Alabama,"Fuel oil, kerosene, etc.",1537,AL
4,Alabama,Coal or coke,199,AL


In [279]:
df_total.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,"1, detached",1436137,AL
1,Alabama,"1, attached",41268,AL
2,Alabama,2 apartments,35683,AL
3,Alabama,3 or 4 apartments,57324,AL
4,Alabama,5 to 9 apartments,75649,AL


In [280]:
df_total.groupby('State').describe()

Unnamed: 0_level_0,Count,Count,Count,Count,Count,Count,Count,Count
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
State,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Alabama,7.0,288064.0,511128.8,35683.0,49296.0,75649.0,185193.5,1436137.0
Alaska,7.0,39224.86,58271.52,9701.0,15159.5,20093.0,21732.0,170997.0
Arizona,7.0,407196.7,661831.6,39774.0,90819.0,148000.0,296435.5,1888094.0
Arkansas,7.0,173743.9,300473.1,24877.0,40768.5,43527.0,107998.5,850269.0
California,7.0,1935798.0,2698089.0,307658.0,600955.5,745229.0,1728072.5,7839643.0
Colorado,7.0,340654.9,523751.4,35410.0,79470.5,98628.0,298736.0,1494133.0
Connecticut,7.0,204805.0,291015.7,9950.0,78587.5,93688.0,160770.0,851282.0
Delaware,7.0,57476.29,87194.67,3957.0,11028.0,23540.0,51591.5,249598.0
District of Columbia,7.0,46710.0,55121.94,0.0,17098.0,25437.0,53098.5,161140.0
Florida,7.0,1260913.0,1734331.0,181445.0,367773.0,621163.0,1114152.5,5059935.0


In [281]:
total_occupied_housing = df_total.groupby('Code').sum()

In [282]:
total_occupied_housing = total_occupied_housing.reset_index()

In [283]:
total_occupied_housing.head()

Unnamed: 0,Code,Count
0,AK,274574
1,AL,2016448
2,AR,1216207
3,AZ,2850377
4,CA,13550586


In [284]:
def make_map(df, data_year: str):

    data = dict(type = 'choropleth',
                colorscale = 'Portland',
                locations = df['Code'],
                locationmode = 'USA-states',
                z=df['Count'],
                colorbar = {'title':'Occupied housing units'})

    layout = dict(
        title = {'text': f'{data_year} US Occupied housing units', 'x':0.5, 'xanchor': 'center'},
        geo = dict(scope = 'usa'),
        autosize = True,
        width = 800,
        height = 600,
    )

    choromap = go.Figure(data = [data],layout = layout)

    iplot(choromap)

In [285]:
make_map(total_occupied_housing, '2022')

## Segment states by predominant housing type - cluster analysis on housing structure types by state

In [286]:
df_total.head()

Unnamed: 0,State,Value,Count,Code
0,Alabama,"1, detached",1436137,AL
1,Alabama,"1, attached",41268,AL
2,Alabama,2 apartments,35683,AL
3,Alabama,3 or 4 apartments,57324,AL
4,Alabama,5 to 9 apartments,75649,AL


In [287]:
#Get dominant house type and return state and headers
df_max = df_total.loc[df_total.groupby('State')['Count'].idxmax()]

print(df_max)

                    State                  Value    Count Code
0                 Alabama            1, detached  1436137   AL
7                  Alaska            1, detached   170997   AK
14                Arizona            1, detached  1888094   AZ
21               Arkansas            1, detached   850269   AR
28             California            1, detached  7839643   CA
35               Colorado            1, detached  1494133   CO
42            Connecticut            1, detached   851282   CT
49               Delaware            1, detached   249598   DE
61   District of Columbia  10 or more apartments   161140   DC
63                Florida            1, detached  5059935   FL
70                Georgia            1, detached  2772474   GA
77                 Hawaii            1, detached   281254   HI
84                  Idaho            1, detached   534334   ID
91               Illinois            1, detached  2994287   IL
98                Indiana            1, detached  20179

In [288]:
fig = px.scatter(df_max, y="Value", x="Count", color="State")
#fig.update_traces(marker_size=10)
fig.show()

## Fertility rates by state

In [289]:
def clean_headers(val):
    if isinstance(val, str):
        if 'Total' in val:
            val = val.split("!!")[0]
            val = val + "_total"
        elif 'Women with births in the past 12 months!!Number!!Estimate' in val:
            val = val.split("!!")[0]
            val = val + "_births"
        elif 'Women with births in the past 12 months!!Rate per 1,000 women!!Estimate' in val:
            val = val.split("!!")[0]
            val = val + "_thou"
        else:
            val = val.split("!!")[0]
        return val
    else:
        return val

In [290]:
fert_data = pd.read_excel('./Data/fertility_data.xlsx', index_col=0)
fert_data = fert_data.rename(columns=clean_headers)

fert_data.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022,,,,,,,,,,,...,,,,,,,,,,
Women 15 to 50 years,1184720.0,61506.0,52.0,167711.0,10125.0,60.0,1680785.0,86548.0,51.0,691139.0,...,52.0,1301817.0,66580.0,51.0,126741.0,7469.0,59.0,743480.0,21708.0,29.0
15 to 19 years,171783.0,651.0,4.0,22013.0,124.0,6.0,236982.0,3252.0,14.0,96432.0,...,15.0,185708.0,819.0,4.0,17961.0,220.0,12.0,93630.0,788.0,8.0
20 to 34 years,500263.0,48565.0,97.0,73709.0,7354.0,100.0,741982.0,57861.0,78.0,293111.0,...,94.0,555914.0,49876.0,90.0,51110.0,5354.0,105.0,309106.0,17447.0,56.0
35 to 50 years,512674.0,12290.0,24.0,71989.0,2647.0,37.0,701821.0,25435.0,36.0,301596.0,...,24.0,560195.0,15885.0,28.0,57670.0,1895.0,33.0,340744.0,3473.0,10.0


In [291]:
def fert_data_cleanup(df, year:str):

    df_dict = df.to_dict()
    cleaned_dict = {state: {key.strip(): value for key, value in data.items()} for state, data in df_dict.items()}

    # Create nested dictionary for each state to combine data by state
    new_dict = {}
    for state_attr, attr_values in cleaned_dict.items():
        state, attribute = state_attr.split("_", 1)
        if state not in new_dict:
            new_dict[state] = {}
        if attribute not in new_dict[state]:
            new_dict[state][attribute] = {}
        for attr, value in attr_values.items():
            new_dict[state][attribute][attr] = value

    # Create category by total units in state, homeowner units and renter units
    total_lst = [{k: v.get('total')} for k, v in new_dict.items() if v.get('total') is not None]
    birth_lst = [{k: v.get('births')} for k, v in new_dict.items() if v.get('births') is not None]
    thou_lst = [{k: v.get('thou')} for k, v in new_dict.items() if v.get('thou') is not None]

    # Function to convert list of dictionaries into a DataFrame
    def create_df(lst):
        #count_column = f'Count'
        df = pd.concat({k: pd.DataFrame.from_dict(v, 'index') for d in lst for k, v in d.items()}, axis=0)
        df.reset_index(inplace=True)
        df.columns = ['State', 'Value', 'Count']
        df['Code'] = df['State'].map(state_abbreviations)
        return df

    # Convert the list of nested dictionaries into a DataFrame
    df_total = create_df(total_lst)
    df_birth = create_df(birth_lst)
    df_thou = create_df(thou_lst)

    return df_total, df_birth, df_thou

In [292]:
births_data_22 = fert_data.iloc[[1]]
births_data_21 = fert_data.iloc[[12]]
births_data_19 = fert_data.iloc[[23]]
births_data_18 = fert_data.iloc[[34]]
births_data_17 = fert_data.iloc[[45]]
births_data_16 = fert_data.iloc[[56]]
births_data_15 = fert_data.iloc[[67]]
births_data_14 = fert_data.iloc[[78]]
births_data_13 = fert_data.iloc[[89]]
births_data_12 = fert_data.iloc[[100]]
births_data_11 = fert_data.iloc[[111]]
births_data_10 = fert_data.iloc[[122]]


In [320]:
# List of data and corresponding years
data_years = [(births_data_22, '2022'), (births_data_21, '2021'), (births_data_19, '2019'), 
              (births_data_18, '2018'), (births_data_17, '2017'), (births_data_16, '2016'), 
              (births_data_15, '2015'), (births_data_14, '2014'), (births_data_13, '2013'), 
              (births_data_12, '2012'), (births_data_11, '2011'), (births_data_10, '2010')]

def consolidate_dataframe(data_years: list):

    # Initialize dictionaries to store dataframes
    fert_pop_dict = {}
    birth_dict = {}
    birth_thou_dict = {}

    # Initialize a list to store dataframes
    df_list = []

    # Loop over all data and years
    for data, year in data_years:
        fert_pop, birth, birth_thou = fert_data_cleanup(data, year)
        fert_pop_dict[year] = fert_pop
        birth_dict[year] = birth
        birth_thou_dict[year] = birth_thou

        # Add a 'Year' column to the dataframe
        birth['Year'] = year
        # Append the dataframe to df_list
        df_list.append(birth)

    # Concatenate all dataframes in df_list
    all_years_df = pd.concat(df_list)

    # Reset the index of all_years_df
    all_years_df.reset_index(drop=True, inplace=True)

    return all_years_df, fert_pop_dict, birth_dict, birth_thou_dict


In [321]:
df, fert_pop_dict, birth_dict, birth_thou_dict = consolidate_dataframe(data_years)


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.




DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.



In [322]:
df.tail()

Unnamed: 0,State,Value,Count,Code,Year
619,Washington,Women 15 to 50 years,94636.0,WA,2010
620,West Virginia,Women 15 to 50 years,20260.0,WV,2010
621,Wisconsin,Women 15 to 50 years,73724.0,WI,2010
622,Wyoming,Women 15 to 50 years,8314.0,WY,2010
623,Puerto Rico,Women 15 to 50 years,45712.0,PR,2010


In [323]:
fig = px.line(df, x='Year', y='Count', color='State', title='Births by Year and State')
fig.show()

In [324]:
target_state = df[df['State']=='Virginia']

fig = px.line(target_state, x='Year', y='Count', title='Births by Year in Virginia')
fig.show()

In [325]:
labor_2022 = fert_data.iloc[[10]]
labor_2021 = fert_data.iloc[[21]]
labor_2019 = fert_data.iloc[[32]]
labor_2018 = fert_data.iloc[[43]]
labor_2017 = fert_data.iloc[[54]]
labor_2016 = fert_data.iloc[[65]]
labor_2015 = fert_data.iloc[[76]]
labor_2014 = fert_data.iloc[[87]]
labor_2013 = fert_data.iloc[[98]]
labor_2012 = fert_data.iloc[[109]]
labor_2011 = fert_data.iloc[[120]]
labor_2010 = fert_data.iloc[[131]]

In [326]:
labor_data_years = [(labor_2022, '2022'), (labor_2021, '2021'), (labor_2019, '2019'), 
              (labor_2018, '2018'), (labor_2017, '2017'), (labor_2016, '2016'), 
              (labor_2015, '2015'), (labor_2014, '2014'), (labor_2013, '2013'), 
              (labor_2012, '2012'), (labor_2011, '2011'), (labor_2010, '2010')]

In [327]:
labor_df, lab_fert_pop_dict, lab_birth_dict, lab_birth_thou_dict = consolidate_dataframe(labor_data_years)
#labor_df.rename(columns={'Count': 'Employed'}, inplace=True)


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.




DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.


DataFrame columns are not unique, some columns will be omitted.



In [328]:
labor_df.tail()

Unnamed: 0,State,Value,Count,Code,Year
619,Washington,In labor force,53607.0,WA,2010
620,West Virginia,In labor force,10813.0,WV,2010
621,Wisconsin,In labor force,53240.0,WI,2010
622,Wyoming,In labor force,5181.0,WY,2010
623,Puerto Rico,In labor force,22839.0,PR,2010


In [329]:
target_state_lab = labor_df[labor_df['State']=='Virginia']

fig = px.line(target_state_lab, x='Year', y='Count', title='Women who gave birth in employment for Virginia')
fig.show()

In [330]:
consol_df = pd.concat([df,labor_df])

In [331]:
consol_df.tail()

Unnamed: 0,State,Value,Count,Code,Year
619,Washington,In labor force,53607.0,WA,2010
620,West Virginia,In labor force,10813.0,WV,2010
621,Wisconsin,In labor force,53240.0,WI,2010
622,Wyoming,In labor force,5181.0,WY,2010
623,Puerto Rico,In labor force,22839.0,PR,2010


In [332]:
target_state = consol_df[consol_df['State']=='Virginia']
# Create the line chart
fig = px.line(target_state, x='Year', y='Count', color='Value', title='Births and Employment by Year and State')
fig.show()

In [333]:
target_state = consol_df[consol_df['State']=='California']
# Create the line chart
fig = px.line(target_state, x='Year', y='Count', color='Value', title='Births and Employment by Year and State')
fig.show()

In [351]:
# Merge the two dataframes
merged_df = pd.merge(df, labor_df, on=['State', 'Code', 'Year'], suffixes=('_birth', '_labor'))

# Calculate the spread
merged_df['Spread'] = merged_df['Count_birth'] - merged_df['Count_labor']

fig = px.line(merged_df, x='Year', y='Spread', color='State', title='Spread of Births and Labor by Year and State')
fig.show()


In [353]:
target_state = merged_df[merged_df['State']=='California']
fig = px.line(target_state, x='Year', y='Spread', color='State', title='Spread of Births and Labor by Year and State')
fig.show()

In [334]:
fem_age_22 = fert_data.iloc[[2,3,4]]
fem_age_22.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,171783.0,651.0,4.0,22013.0,124.0,6.0,236982.0,3252.0,14.0,96432.0,...,15.0,185708.0,819.0,4.0,17961.0,220.0,12.0,93630.0,788.0,8.0
20 to 34 years,500263.0,48565.0,97.0,73709.0,7354.0,100.0,741982.0,57861.0,78.0,293111.0,...,94.0,555914.0,49876.0,90.0,51110.0,5354.0,105.0,309106.0,17447.0,56.0
35 to 50 years,512674.0,12290.0,24.0,71989.0,2647.0,37.0,701821.0,25435.0,36.0,301596.0,...,24.0,560195.0,15885.0,28.0,57670.0,1895.0,33.0,340744.0,3473.0,10.0


In [335]:
fem_age_21 = fert_data.iloc[[13,14, 15]]
fem_age_21.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,170097.0,1408.0,8.0,22382.0,132.0,6.0,235694.0,1363.0,6.0,99501.0,...,10.0,189422.0,1130.0,6.0,19793.0,83.0,4.0,98793.0,1172.0,12.0
20 to 34 years,483785.0,42509.0,88.0,72194.0,6892.0,95.0,720415.0,60973.0,85.0,288081.0,...,90.0,548767.0,46155.0,84.0,49880.0,6114.0,123.0,310354.0,13953.0,45.0
35 to 50 years,517551.0,11676.0,23.0,73735.0,1653.0,22.0,703539.0,21263.0,30.0,302659.0,...,21.0,569388.0,16536.0,29.0,58531.0,988.0,17.0,348179.0,4091.0,12.0


In [336]:
fem_age_19 = fert_data.iloc[[24, 25, 26]]
fem_age_19.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,161722.0,2369.0,15.0,20665.0,109.0,5.0,235308.0,3601.0,15.0,100455.0,...,12.0,184797.0,1370.0,7.0,18591.0,586.0,32.0,102312.0,1090.0,11.0
20 to 34 years,480248.0,47019.0,98.0,75888.0,7221.0,95.0,718095.0,67710.0,94.0,292150.0,...,111.0,547989.0,52695.0,96.0,53299.0,4537.0,85.0,306411.0,14323.0,47.0
35 to 50 years,500550.0,9154.0,18.0,70759.0,3172.0,45.0,708780.0,17708.0,25.0,301998.0,...,20.0,566065.0,14610.0,26.0,54238.0,1782.0,33.0,334991.0,5573.0,17.0


In [337]:
fem_age_18 = fert_data.iloc[[35,36,37]]
fem_age_18.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,161910.0,2636.0,16.0,23716.0,604.0,25.0,234719.0,4119.0,18.0,103480.0,...,23.0,186776.0,1750.0,9.0,19736.0,386.0,20.0,102317.0,2097.0,20.0
20 to 34 years,481003.0,52593.0,109.0,76103.0,7308.0,96.0,706193.0,65389.0,93.0,293131.0,...,91.0,552020.0,51246.0,93.0,52846.0,6678.0,126.0,305223.0,15886.0,52.0
35 to 50 years,500606.0,10841.0,22.0,72171.0,2835.0,39.0,699314.0,23272.0,33.0,298024.0,...,18.0,560701.0,15747.0,28.0,55272.0,1233.0,22.0,337683.0,3124.0,9.0


In [338]:
fem_age_17 = fert_data.iloc[[46, 47, 48]]
fem_age_17.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,168330.0,2181.0,13.0,21654.0,241.0,11.0,233978.0,3505.0,15.0,101114.0,...,18.0,187706.0,1562.0,8.0,18127.0,621.0,34.0,111821.0,1870.0,17.0
20 to 34 years,482918.0,46216.0,96.0,78570.0,6947.0,88.0,685767.0,64963.0,95.0,285646.0,...,92.0,552470.0,50676.0,92.0,55582.0,5124.0,92.0,320657.0,15784.0,49.0
35 to 50 years,494740.0,10631.0,21.0,71227.0,2773.0,39.0,684272.0,20253.0,30.0,300200.0,...,19.0,562767.0,15045.0,27.0,51644.0,1257.0,24.0,361617.0,4506.0,12.0


In [339]:
fem_age_16 = fert_data.iloc[[57,58,59]]
fem_age_16.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,163021.0,3100.0,19.0,22097.0,279.0,13.0,231747.0,3476.0,15.0,106285.0,...,13.0,187002.0,1313.0,7.0,19001.0,311.0,16.0,114146.0,3195.0,28.0
20 to 34 years,481980.0,45480.0,94.0,82264.0,7712.0,94.0,678738.0,58967.0,87.0,286616.0,...,115.0,555087.0,57494.0,104.0,56479.0,6017.0,107.0,333374.0,19170.0,58.0
35 to 50 years,501915.0,8611.0,17.0,71586.0,1737.0,24.0,676641.0,18854.0,28.0,296185.0,...,12.0,561074.0,11499.0,20.0,53303.0,1058.0,20.0,370287.0,6191.0,17.0


In [340]:
fem_age_15 = fert_data.iloc[[68,69,70]]
fem_age_15.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,166258.0,2811.0,17.0,23568.0,526.0,22.0,227494.0,3806.0,17.0,94229.0,...,26.0,184065.0,1926.0,10.0,20787.0,623.0,30.0,121941.0,2752.0,23.0
20 to 34 years,483984.0,47797.0,99.0,80588.0,8859.0,110.0,675070.0,64595.0,96.0,295494.0,...,91.0,556923.0,51991.0,93.0,58479.0,6101.0,104.0,340903.0,22237.0,65.0
35 to 50 years,508055.0,8766.0,17.0,69404.0,1759.0,25.0,672789.0,17397.0,26.0,296925.0,...,17.0,569618.0,12010.0,21.0,54460.0,1003.0,18.0,378262.0,3939.0,10.0


In [341]:
fem_age_14 = fert_data.iloc[[79,80,81]]
fem_age_14.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,163448.0,3273.0,20.0,24491.0,779.0,32.0,221498.0,2601.0,12.0,99250.0,...,21.0,187334.0,1695.0,9.0,17844.0,273.0,15.0,121990.0,3468.0,28.0
20 to 34 years,486349.0,47875.0,98.0,79325.0,9130.0,115.0,670787.0,64662.0,96.0,291691.0,...,90.0,557112.0,51664.0,93.0,59517.0,5959.0,100.0,353769.0,26081.0,74.0
35 to 50 years,506390.0,7742.0,15.0,71750.0,1794.0,25.0,665297.0,15511.0,23.0,303046.0,...,14.0,576869.0,13639.0,24.0,54979.0,1297.0,24.0,386479.0,6558.0,17.0


In [342]:
fem_age_13 = fert_data.iloc[[90, 91, 92]]
fem_age_13.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,165283.0,4361.0,26.0,23594.0,372.0,16.0,222019.0,3483.0,16.0,93659.0,...,19.0,188546.0,2003.0,11.0,17569.0,279.0,16.0,131178.0,4263.0,32.0
20 to 34 years,483599.0,46816.0,97.0,79751.0,7975.0,100.0,659097.0,65145.0,99.0,292280.0,...,83.0,553673.0,50299.0,91.0,60470.0,6441.0,107.0,362867.0,24825.0,68.0
35 to 50 years,514443.0,6876.0,13.0,71952.0,2064.0,29.0,660744.0,14383.0,22.0,300714.0,...,9.0,582942.0,11606.0,20.0,55047.0,459.0,8.0,397081.0,4429.0,11.0


In [343]:
fem_age_12 = fert_data.iloc[[101,102,103]]
fem_age_12.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,160449.0,4737.0,30.0,22135.0,423.0,19.0,217771.0,5287.0,24.0,96953.0,...,13.0,189983.0,2599.0,14.0,19863.0,372.0,19.0,135488.0,3773.0,28.0
20 to 34 years,486392.0,49327.0,101.0,79959.0,8083.0,101.0,651745.0,63402.0,97.0,292020.0,...,92.0,551997.0,54606.0,99.0,56595.0,6525.0,115.0,367508.0,26456.0,72.0
35 to 50 years,515539.0,9383.0,18.0,75432.0,2215.0,29.0,666400.0,20983.0,31.0,305632.0,...,12.0,594203.0,13874.0,23.0,55395.0,1040.0,19.0,404721.0,5668.0,14.0


In [344]:
fem_age_11 = fert_data.iloc[[112,113,114]]
fem_age_11.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,164501.0,4937.0,30.0,24175.0,539.0,22.0,222455.0,6549.0,29.0,93381.0,...,23.0,195618.0,3195.0,16.0,18468.0,155.0,8.0,143213.0,5644.0,39.0
20 to 34 years,482901.0,53474.0,111.0,76832.0,9064.0,118.0,643169.0,59582.0,93.0,291619.0,...,83.0,546832.0,54536.0,100.0,56620.0,5824.0,103.0,376109.0,28952.0,77.0
35 to 50 years,526088.0,12190.0,23.0,77685.0,3280.0,42.0,664103.0,18565.0,28.0,308482.0,...,16.0,609365.0,11659.0,19.0,55677.0,1032.0,19.0,409910.0,4851.0,12.0


In [345]:
fem_age_10 = fert_data.iloc[[123,124,125]]
fem_age_10.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
15 to 19 years,167894.0,4756.0,28.0,25070.0,746.0,30.0,222457.0,6827.0,31.0,103730.0,...,32.0,197213.0,4170.0,21.0,17806.0,1066.0,60.0,138231.0,6742.0,49.0
20 to 34 years,479361.0,50576.0,106.0,74393.0,9550.0,128.0,635805.0,64576.0,102.0,282848.0,...,96.0,541445.0,55447.0,102.0,56386.0,6167.0,109.0,384133.0,33045.0,86.0
35 to 50 years,526757.0,10911.0,21.0,77818.0,2198.0,28.0,668891.0,14807.0,22.0,310609.0,...,13.0,621080.0,14107.0,23.0,56201.0,1081.0,19.0,410233.0,5925.0,14.0


In [346]:
fem_stat_2022 = fert_data.iloc[[6,7]]

fem_stat_2022.head()

Unnamed: 0_level_0,Alabama_total,Alabama_births,Alabama_thou,Alaska_total,Alaska_births,Alaska_thou,Arizona_total,Arizona_births,Arizona_thou,Arkansas_total,...,West Virginia_thou,Wisconsin_total,Wisconsin_births,Wisconsin_thou,Wyoming_total,Wyoming_births,Wyoming_thou,Puerto Rico_total,Puerto Rico_births,Puerto Rico_thou
Label (Grouping),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Native,1124176.0,55575.0,49.0,152185.0,8756.0,58.0,1415213.0,68239.0,48.0,644947.0,...,51.0,1212803.0,60199.0,50.0,121847.0,7430.0,61.0,724276.0,21141.0,29.0
Foreign born,60544.0,5931.0,98.0,15526.0,1369.0,88.0,265572.0,18309.0,69.0,46192.0,...,85.0,89014.0,6381.0,72.0,4894.0,39.0,8.0,19204.0,567.0,30.0
